@@ -192,7 +192,7 @@ def wait_for_ready(self, payload, logger=logging.getLogger()):
192192 completions_response_handler ,
193193 ],
194194 model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" ,
195- delayed_start = 45 ,
195+ delayed_start = 60 ,
196196 ),
197197 "disaggregated" : TRTLLMConfig (
198198 name = "disaggregated" ,
@@ -205,7 +205,36 @@ def wait_for_ready(self, payload, logger=logging.getLogger()):
205205 completions_response_handler ,
206206 ],
207207 model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" ,
208- delayed_start = 45 ,
208+ delayed_start = 60 ,
209+ ),
210+ # TODO: These are sanity tests that the kv router examples launch
211+ # and inference without error, but do not do detailed checks on the
212+ # behavior of KV routing.
213+ "aggregated_router" : TRTLLMConfig (
214+ name = "aggregated_router" ,
215+ directory = "/workspace/components/backends/trtllm" ,
216+ script_name = "agg_router.sh" ,
217+ marks = [pytest .mark .gpu_1 , pytest .mark .tensorrtllm ],
218+ endpoints = ["v1/chat/completions" , "v1/completions" ],
219+ response_handlers = [
220+ chat_completions_response_handler ,
221+ completions_response_handler ,
222+ ],
223+ model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" ,
224+ delayed_start = 60 ,
225+ ),
226+ "disaggregated_router" : TRTLLMConfig (
227+ name = "disaggregated_router" ,
228+ directory = "/workspace/components/backends/trtllm" ,
229+ script_name = "disagg_router.sh" ,
230+ marks = [pytest .mark .gpu_2 , pytest .mark .tensorrtllm ],
231+ endpoints = ["v1/chat/completions" , "v1/completions" ],
232+ response_handlers = [
233+ chat_completions_response_handler ,
234+ completions_response_handler ,
235+ ],
236+ model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" ,
237+ delayed_start = 60 ,
209238 ),
210239}
211240
0 commit comments