Skip to content

Commit 2595e7a

Browse files
committed
format
1 parent 88d3f9d commit 2595e7a

2 files changed

Lines changed: 60 additions & 16 deletions

File tree

tests/serve/test_dynamo_serve.py

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
from tests.utils.deployment_graph import (
2525
DeploymentGraph,
2626
Payload,
27-
completions_response_handler,
2827
chat_completions_response_handler,
28+
completions_response_handler,
2929
)
3030
from tests.utils.managed_process import ManagedProcess
3131

@@ -88,7 +88,10 @@
8888
config="configs/agg.yaml",
8989
directory="/workspace/examples/llm",
9090
endpoints=["v1/chat/completions", "v1/completions"],
91-
response_handlers=[chat_completions_response_handler, completions_response_handler],
91+
response_handlers=[
92+
chat_completions_response_handler,
93+
completions_response_handler,
94+
],
9295
marks=[pytest.mark.gpu_1, pytest.mark.vllm],
9396
),
9497
text_payload,
@@ -99,7 +102,10 @@
99102
config="configs/agg.yaml",
100103
directory="/workspace/examples/sglang",
101104
endpoints=["v1/chat/completions", "v1/completions"],
102-
response_handlers=[chat_completions_response_handler, completions_response_handler],
105+
response_handlers=[
106+
chat_completions_response_handler,
107+
completions_response_handler,
108+
],
103109
marks=[pytest.mark.gpu_1, pytest.mark.sglang],
104110
),
105111
text_payload,
@@ -110,7 +116,10 @@
110116
config="configs/disagg.yaml",
111117
directory="/workspace/examples/llm",
112118
endpoints=["v1/chat/completions", "v1/completions"],
113-
response_handlers=[chat_completions_response_handler, completions_response_handler],
119+
response_handlers=[
120+
chat_completions_response_handler,
121+
completions_response_handler,
122+
],
114123
marks=[pytest.mark.gpu_2, pytest.mark.vllm],
115124
),
116125
text_payload,
@@ -121,7 +130,10 @@
121130
config="configs/agg_router.yaml",
122131
directory="/workspace/examples/llm",
123132
endpoints=["v1/chat/completions", "v1/completions"],
124-
response_handlers=[chat_completions_response_handler, completions_response_handler],
133+
response_handlers=[
134+
chat_completions_response_handler,
135+
completions_response_handler,
136+
],
125137
marks=[pytest.mark.gpu_1, pytest.mark.vllm],
126138
),
127139
text_payload,
@@ -132,7 +144,10 @@
132144
config="configs/disagg_router.yaml",
133145
directory="/workspace/examples/llm",
134146
endpoints=["v1/chat/completions", "v1/completions"],
135-
response_handlers=[chat_completions_response_handler, completions_response_handler],
147+
response_handlers=[
148+
chat_completions_response_handler,
149+
completions_response_handler,
150+
],
136151
marks=[pytest.mark.gpu_2, pytest.mark.vllm],
137152
),
138153
text_payload,
@@ -143,7 +158,10 @@
143158
config="configs/agg.yaml",
144159
directory="/workspace/examples/multimodal",
145160
endpoints=["v1/chat/completions", "v1/completions"],
146-
response_handlers=[chat_completions_response_handler, completions_response_handler],
161+
response_handlers=[
162+
chat_completions_response_handler,
163+
completions_response_handler,
164+
],
147165
marks=[pytest.mark.gpu_2, pytest.mark.vllm],
148166
),
149167
multimodal_payload,
@@ -154,7 +172,10 @@
154172
config="configs/agg.yaml",
155173
directory="/workspace/examples/vllm_v1",
156174
endpoints=["v1/chat/completions", "v1/completions"],
157-
response_handlers=[chat_completions_response_handler, completions_response_handler],
175+
response_handlers=[
176+
chat_completions_response_handler,
177+
completions_response_handler,
178+
],
158179
marks=[pytest.mark.gpu_1, pytest.mark.vllm],
159180
),
160181
text_payload,
@@ -165,7 +186,10 @@
165186
config="configs/agg.yaml",
166187
directory="/workspace/examples/tensorrt_llm",
167188
endpoints=["v1/chat/completions", "v1/completions"],
168-
response_handlers=[chat_completions_response_handler, completions_response_handler],
189+
response_handlers=[
190+
chat_completions_response_handler,
191+
completions_response_handler,
192+
],
169193
marks=[pytest.mark.gpu_1, pytest.mark.tensorrtllm],
170194
),
171195
text_payload,
@@ -176,7 +200,10 @@
176200
config="configs/agg_router.yaml",
177201
directory="/workspace/examples/tensorrt_llm",
178202
endpoints=["v1/chat/completions", "v1/completions"],
179-
response_handlers=[chat_completions_response_handler, completions_response_handler],
203+
response_handlers=[
204+
chat_completions_response_handler,
205+
completions_response_handler,
206+
],
180207
marks=[pytest.mark.gpu_1, pytest.mark.tensorrtllm],
181208
# FIXME: This is a hack to allow deployments to start before sending any requests.
182209
# When using KV-router, if all the endpoints are not registered, the service
@@ -191,7 +218,10 @@
191218
config="configs/disagg.yaml",
192219
directory="/workspace/examples/tensorrt_llm",
193220
endpoints=["v1/chat/completions", "v1/completions"],
194-
response_handlers=[chat_completions_response_handler, completions_response_handler],
221+
response_handlers=[
222+
chat_completions_response_handler,
223+
completions_response_handler,
224+
],
195225
marks=[pytest.mark.gpu_2, pytest.mark.tensorrtllm],
196226
),
197227
text_payload,
@@ -202,7 +232,10 @@
202232
config="configs/disagg_router.yaml",
203233
directory="/workspace/examples/tensorrt_llm",
204234
endpoints=["v1/chat/completions", "v1/completions"],
205-
response_handlers=[chat_completions_response_handler, completions_response_handler],
235+
response_handlers=[
236+
chat_completions_response_handler,
237+
completions_response_handler,
238+
],
206239
marks=[pytest.mark.gpu_2, pytest.mark.tensorrtllm],
207240
# FIXME: This is a hack to allow deployments to start before sending any requests.
208241
# When using KV-router, if all the endpoints are not registered, the service
@@ -301,17 +334,27 @@ def check_response(response, response_handler):
301334
assert content, "Empty response content"
302335
for expected in payload.expected_response:
303336
assert expected in content, "Expected '%s' not found in response" % expected
337+
304338
with DynamoServeProcess(deployment_graph, request) as server_process:
305339
first_success_pending = True
306-
for endpoint, response_handler in zip(deployment_graph.endpoints, deployment_graph.response_handlers):
340+
for endpoint, response_handler in zip(
341+
deployment_graph.endpoints, deployment_graph.response_handlers
342+
):
307343
url = f"http://localhost:{server_process.port}/{endpoint}"
308344
start_time = time.time()
309345
retry_delay = 5
310346
elapsed = 0.0
311-
request_body = payload.payload_chat if endpoint == "v1/chat/completions" else payload.payload_completions
347+
request_body = (
348+
payload.payload_chat
349+
if endpoint == "v1/chat/completions"
350+
else payload.payload_completions
351+
)
312352

313-
# We can skip this
314-
while time.time() - start_time < deployment_graph.timeout and first_success_pending:
353+
# We can skip this
354+
while (
355+
time.time() - start_time < deployment_graph.timeout
356+
and first_success_pending
357+
):
315358
elapsed = time.time() - start_time
316359
try:
317360
response = requests.post(

tests/utils/deployment_graph.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def chat_completions_response_handler(response):
5959
assert "content" in result["choices"][0]["message"], "Missing 'content' in message"
6060
return result["choices"][0]["message"]["content"]
6161

62+
6263
def completions_response_handler(response):
6364
"""
6465
Process completions API responses.

0 commit comments

Comments
 (0)