| 1 |
4-gpu-models/test_qwen3_next_models.py |
pre-merge-C |
e2e accuracy |
350s |
🔲 NOT STARTED |
| 2 |
4-gpu-models/test_qwen3_next_models_mtp.py |
pre-merge-C |
e2e accuracy |
500s |
🔲 NOT STARTED |
| 3 |
8-gpu-models/test_deepseek_v32_basic.py |
pre-merge-C |
e2e accuracy |
360s |
🔲 NOT STARTED |
| 4 |
8-gpu-models/test_deepseek_v32_mtp.py |
pre-merge-C |
e2e accuracy |
720s |
🔲 NOT STARTED |
| 5 |
8-gpu-models/test_deepseek_v3_basic.py |
pre-merge-C |
e2e accuracy |
275s |
🔲 NOT STARTED |
| 6 |
8-gpu-models/test_deepseek_v3_mtp.py |
pre-merge-C |
e2e accuracy |
275s |
🔲 NOT STARTED |
| 7 |
8-gpu-models/test_mimo_models.py |
pre-merge-C |
e2e accuracy |
200s |
🔲 NOT STARTED |
| 8 |
8-gpu-models/test_ring_2_5_1t.py |
pre-merge-C |
e2e accuracy |
1000s |
🔲 NOT STARTED |
| 9 |
attention/test_chunk_gated_delta_rule.py |
pre-merge-B |
kernel |
60s |
🔲 NOT STARTED |
| 10 |
attention/test_fa3.py |
pre-merge-B |
e2e accuracy |
300s |
🔲 NOT STARTED |
| 11 |
attention/test_hybrid_attn_backend.py |
pre-merge-B |
e2e accuracy |
200s |
🔲 NOT STARTED |
| 12 |
attention/test_kda_kernels.py |
pre-merge-B |
kernel |
30s |
🔲 NOT STARTED |
| 13 |
attention/test_local_attn.py |
pre-merge-C |
e2e perf |
200s |
🔲 NOT STARTED |
| 14 |
attention/test_triton_attention_backend.py |
pre-merge-B |
e2e perf |
1400s |
🔲 NOT STARTED |
| 15 |
attention/test_triton_attention_kernels.py |
pre-merge-B |
kernel |
30s |
🔲 NOT STARTED |
| 16 |
attention/test_triton_sliding_window.py |
pre-merge-B |
e2e accuracy |
200s |
🔲 NOT STARTED |
| 17 |
backends/test_qwen3_fp4_trtllm_gen_moe.py |
nightly |
e2e model accuracy |
300s |
🔲 NOT STARTED |
| 18 |
backends/test_torch_compile.py |
pre-merge-B |
e2e feature accuracy |
1100s |
🔲 NOT STARTED |
| 19 |
bench_fn/test_bench_serving_functionality.py |
nightly |
e2e feature correctness |
300s |
🔲 NOT STARTED |
| 20 |
core/test_gpt_oss_1gpu.py |
pre-merge-B |
e2e accuracy |
750s |
🔲 NOT STARTED |
| 21 |
core/test_score_api.py |
pre-merge-B |
e2e feature accuracy |
260s |
🔲 NOT STARTED |
| 22 |
core/test_srt_engine.py |
pre-merge-B |
e2e feature accuracy |
261s |
🔲 NOT STARTED |
| 23 |
disaggregation/test_disaggregation_basic.py |
pre-merge-B |
e2e feature accuracy |
400s |
🔲 NOT STARTED |
| 24 |
disaggregation/test_disaggregation_decode_offload.py |
pre-merge-B |
e2e feature accuracy |
600s |
🔲 NOT STARTED |
| 25 |
distributed/test_data_parallelism.py |
pre-merge-B |
e2e accuracy |
73s |
🔲 NOT STARTED |
| 26 |
distributed/test_disaggregation_different_tp.py |
pre-merge-C |
e2e accuracy |
600s |
🔲 NOT STARTED |
| 27 |
distributed/test_disaggregation_dp_attention.py |
pre-merge-C |
e2e accuracy |
580s |
🔲 NOT STARTED |
| 28 |
distributed/test_disaggregation_hybrid_attention.py |
pre-merge-C |
e2e accuracy |
400s |
🔲 NOT STARTED |
| 29 |
distributed/test_disaggregation_pp.py |
pre-merge-C |
e2e accuracy |
180s |
🔲 NOT STARTED |
| 30 |
distributed/test_dp_attention.py |
pre-merge-B |
e2e accuracy |
350s |
🔲 NOT STARTED |
| 31 |
distributed/test_dp_attention_large.py |
pre-merge-C |
e2e accuracy |
350s |
🔲 NOT STARTED |
| 32 |
distributed/test_epd_disaggregation.py |
pre-merge-C |
e2e accuracy |
150s |
🔲 NOT STARTED |
| 33 |
distributed/test_load_weights_from_remote_instance.py |
pre-merge-B |
e2e feature correctness |
72s |
🔲 NOT STARTED |
| 34 |
distributed/test_pp_single_node.py |
pre-merge-C |
e2e accuracy |
500s |
🔲 NOT STARTED |
| 35 |
dllm/test_llada2_mini.py |
pre-merge-B |
e2e perf |
330s |
🔲 NOT STARTED |
| 36 |
ep/test_deepep_large.py |
pre-merge-C |
e2e accuracy |
563s |
🔲 NOT STARTED |
| 37 |
ep/test_deepep_small.py |
pre-merge-C |
e2e accuracy |
531s |
🔲 NOT STARTED |
| 38 |
ep/test_mooncake_ep_small.py |
pre-merge-C |
e2e accuracy |
660s |
🔲 NOT STARTED |
| 39 |
eval/test_moe_eval_accuracy_large.py |
pre-merge-B |
e2e accuracy |
500s |
🔲 NOT STARTED |
| 40 |
kernels/test_fused_topk_deepseek.py |
nightly |
kernel |
2s |
🔲 NOT STARTED |
| 41 |
kernels/test_nsa_indexer.py |
pre-merge-B |
unit |
2s |
🔲 NOT STARTED |
| 42 |
layers/mamba/test_mamba2_mixer.py |
pre-merge-B |
unit |
50s |
🔲 NOT STARTED |
| 43 |
layers/test_fla_layernorm_guard.py |
pre-merge-B |
kernel |
60s |
🔲 NOT STARTED |
| 44 |
lora/test_lora_qwen3.py |
nightly, pre-merge-B |
e2e accuracy |
97s |
🔲 NOT STARTED |
| 45 |
lora/test_lora_radix_cache.py |
nightly |
e2e feature accuracy |
200s |
🔲 NOT STARTED |
| 46 |
lora/test_lora_tied_lm_head.py |
nightly |
e2e feature accuracy |
120s |
🔲 NOT STARTED |
| 47 |
lora/test_lora_tp.py |
pre-merge-B |
e2e feature accuracy |
116s |
🔲 NOT STARTED |
| 48 |
lora/test_lora_update.py |
pre-merge-B |
e2e feature accuracy |
487s |
🔲 NOT STARTED |
| 49 |
lora/test_multi_lora_backend.py |
pre-merge-B |
e2e feature accuracy |
100s |
🔲 NOT STARTED |
| 50 |
mla/test_flashmla.py |
pre-merge-B |
e2e accuracy |
284s |
🔲 NOT STARTED |
| 51 |
mla/test_mla.py |
pre-merge-B |
e2e accuracy |
1100s |
🔲 NOT STARTED |
| 52 |
mla/test_mla_deepseek_v3.py |
pre-merge-B |
e2e accuracy |
442s |
🔲 NOT STARTED |
| 53 |
mla/test_mla_flashinfer.py |
pre-merge-B |
e2e accuracy |
302s |
🔲 NOT STARTED |
| 54 |
mla/test_mla_fp8.py |
pre-merge-B |
e2e accuracy |
800s |
🔲 NOT STARTED |
| 55 |
models/test_compressed_tensors_models.py |
pre-merge-B |
e2e accuracy |
42s |
🔲 NOT STARTED |
| 56 |
models/test_dummy_grok_models.py |
pre-merge-B |
e2e accuracy |
120s |
🔲 NOT STARTED |
| 57 |
models/test_generation_models.py |
pre-merge-B |
e2e accuracy |
106s |
🔲 NOT STARTED |
| 58 |
models/test_gpt_oss_models_pcg.py |
pre-merge-B |
e2e accuracy |
400s |
🔲 NOT STARTED |
| 59 |
models/test_kimi_linear_models.py |
pre-merge-B |
e2e accuracy |
90s |
🔲 NOT STARTED |
| 60 |
models/test_kimi_linear_models_pcg.py |
pre-merge-B |
e2e accuracy |
100s |
🔲 NOT STARTED |
| 61 |
models/test_nvidia_nemotron_3_nano.py |
pre-merge-B |
e2e accuracy |
180s |
🔲 NOT STARTED |
| 62 |
models/test_nvidia_nemotron_nano_v2.py |
pre-merge-B |
e2e accuracy |
132s |
🔲 NOT STARTED |
| 63 |
models/test_nvidia_nemotron_nano_v2_vl.py |
pre-merge-B |
e2e accuracy |
214s |
🔲 NOT STARTED |
| 64 |
models/test_qwen3_next_models_pcg.py |
pre-merge-C |
e2e accuracy |
400s |
🔲 NOT STARTED |
| 65 |
models/test_vlm_models.py |
pre-merge-B |
e2e accuracy |
850s |
🔲 NOT STARTED |
| 66 |
moe/test_fused_moe.py |
pre-merge-B |
kernel |
80s |
🔲 NOT STARTED |
| 67 |
moe/test_glm4_moe_models.py |
pre-merge-B |
e2e accuracy |
100s |
🔲 NOT STARTED |
| 68 |
moe/test_moe_ep.py |
pre-merge-B |
e2e accuracy |
140s |
🔲 NOT STARTED |
| 69 |
moe/test_torch_compile_moe.py |
pre-merge-B |
e2e accuracy |
1400s |
🔲 NOT STARTED |
| 70 |
moe/test_triton_fused_moe.py |
pre-merge-B |
kernel |
89s |
🔲 NOT STARTED |
| 71 |
moe/test_triton_moe_channel_fp8_kernel.py |
pre-merge-B |
kernel |
16s |
🔲 NOT STARTED |
| 72 |
perf/test_bench_one_batch_1gpu.py |
pre-merge-B |
e2e perf |
120s |
🔲 NOT STARTED |
| 73 |
perf/test_bench_one_batch_2gpu.py |
pre-merge-B |
e2e perf |
630s |
🔲 NOT STARTED |
| 74 |
perf/test_bench_serving_1gpu_large.py |
pre-merge-B |
e2e perf |
300s |
🔲 NOT STARTED |
| 75 |
perf/test_bench_serving_1gpu_part1.py |
pre-merge-B |
e2e perf |
1100s |
🔲 NOT STARTED |
| 76 |
perf/test_bench_serving_1gpu_part2.py |
pre-merge-B |
e2e perf |
900s |
🔲 NOT STARTED |
| 77 |
perf/test_bench_serving_2gpu.py |
pre-merge-B |
e2e perf |
1100s |
🔲 NOT STARTED |
| 78 |
piecewise_cuda_graph/test_piecewise_cuda_graph_support_1_gpu.py |
pre-merge-B |
e2e feature accuracy |
220s |
🔲 NOT STARTED |
| 79 |
quant/test_autoround.py |
pre-merge-B |
e2e accuracy |
77s |
🔲 NOT STARTED |
| 80 |
quant/test_awq.py |
pre-merge-B |
e2e accuracy |
200s |
🔲 NOT STARTED |
| 81 |
quant/test_eval_fp8_accuracy.py |
pre-merge-B |
e2e accuracy |
600s |
🔲 NOT STARTED |
| 82 |
quant/test_fp8_kernel.py |
pre-merge-B |
kernel |
132s |
🔲 NOT STARTED |
| 83 |
quant/test_fp8_utils.py |
pre-merge-B |
kernel |
9s |
🔲 NOT STARTED |
| 84 |
quant/test_fp8kv_triton.py |
pre-merge-B |
e2e accuracy |
520s |
🔲 NOT STARTED |
| 85 |
quant/test_gptqmodel_dynamic.py |
pre-merge-B |
e2e feature correctness |
102s |
🔲 NOT STARTED |
| 86 |
quant/test_modelopt_fp8.py |
pre-merge-B |
e2e accuracy |
120s |
🔲 NOT STARTED |
| 87 |
quant/test_quantization.py |
pre-merge-B |
e2e accuracy |
185s |
🔲 NOT STARTED |
| 88 |
quant/test_w4a8_deepseek_v3.py |
pre-merge-C |
e2e accuracy |
520s |
🔲 NOT STARTED |
| 89 |
quant/test_w8a8_quantization.py |
pre-merge-B |
e2e accuracy |
160s |
🔲 NOT STARTED |
| 90 |
radix_cache/test_swa_radix_cache_kl.py |
pre-merge-B |
e2e feature accuracy |
100s |
🔲 NOT STARTED |
| 91 |
rl/test_multi_instance_release_memory_occupation.py |
pre-merge-C |
e2e feature correctness |
64s |
🔲 NOT STARTED |
| 92 |
rl/test_release_memory_occupation.py |
pre-merge-C |
e2e feature correctness |
200s |
🔲 NOT STARTED |
| 93 |
rl/test_return_routed_experts.py |
pre-merge-C |
e2e feature correctness |
360s |
🔲 NOT STARTED |
| 94 |
rotary/test_mrope.py |
pre-merge-B |
kernel |
15s |
🔲 NOT STARTED |
| 95 |
scheduler/test_no_chunked_prefill.py |
pre-merge-B |
e2e feature accuracy |
108s |
🔲 NOT STARTED |
| 96 |
scheduler/test_no_overlap_scheduler.py |
pre-merge-B |
e2e feature accuracy |
275s |
🔲 NOT STARTED |
| 97 |
spec/eagle/test_eagle_constrained_decoding.py |
pre-merge-B |
e2e feature correctness |
100s |
🔲 NOT STARTED |
| 98 |
spec/eagle/test_eagle_dp_attention.py |
pre-merge-C |
e2e accuracy |
200s |
🔲 NOT STARTED |
| 99 |
spec/eagle/test_eagle_infer_a.py |
pre-merge-B |
e2e feature correctness |
561s |
🔲 NOT STARTED |
| 100 |
spec/eagle/test_eagle_infer_b.py |
pre-merge-B |
e2e feature correctness |
1100s |
🔲 NOT STARTED |
| 101 |
spec/test_constrained_decoding_spec_reasoning.py |
pre-merge-B |
e2e feature correctness |
60s |
🔲 NOT STARTED |
| 102 |
spec/test_ngram_speculative_decoding.py |
pre-merge-B |
e2e feature correctness |
230s |
🔲 NOT STARTED |
| 103 |
spec/test_standalone_speculative_decoding.py |
pre-merge-B |
e2e feature accuracy |
308s |
🔲 NOT STARTED |
| 104 |
test_srt_backend.py |
pre-merge-A |
e2e feature correctness |
120s |
🔲 NOT STARTED |
| 105 |
tokenizer/test_multi_tokenizer.py |
pre-merge-B |
e2e perf |
345s |
🔲 NOT STARTED |
| 106 |
vlm/test_encoder_dp.py |
nightly |
e2e accuracy |
500s |
🔲 NOT STARTED |
| 107 |
vlm/test_vision_chunked_prefill.py |
pre-merge-B |
e2e feature accuracy |
270s |
🔲 NOT STARTED |
| 108 |
vlm/test_vision_openai_server_a.py |
pre-merge-B |
e2e feature accuracy |
957s |
🔲 NOT STARTED |
| 109 |
vlm/test_vlm_input_format.py |
pre-merge-B |
e2e feature accuracy |
447s |
🔲 NOT STARTED |
Hopper vs Blackwell CI Test Gaps (Backlog)
Backlog tracking tests that run on Hopper but not yet on Blackwell. Part of the SGLang CI improvement initiative (#20514).
Goal
Migrate high-priority tests currently running on Hopper (but not on Blackwell) to Blackwell CI suites. Since Blackwell capacity is limited, most tests that run per-commit on Hopper will run nightly on Blackwell, and some nightly/weekly tests will run weekly on Blackwell.
Summary
Progress Tracker
Nightly on Blackwell (109 files)
4-gpu-models/test_qwen3_next_models.py4-gpu-models/test_qwen3_next_models_mtp.py8-gpu-models/test_deepseek_v32_basic.py8-gpu-models/test_deepseek_v32_mtp.py8-gpu-models/test_deepseek_v3_basic.py8-gpu-models/test_deepseek_v3_mtp.py8-gpu-models/test_mimo_models.py8-gpu-models/test_ring_2_5_1t.pyattention/test_chunk_gated_delta_rule.pyattention/test_fa3.pyattention/test_hybrid_attn_backend.pyattention/test_kda_kernels.pyattention/test_local_attn.pyattention/test_triton_attention_backend.pyattention/test_triton_attention_kernels.pyattention/test_triton_sliding_window.pybackends/test_qwen3_fp4_trtllm_gen_moe.pybackends/test_torch_compile.pybench_fn/test_bench_serving_functionality.pycore/test_gpt_oss_1gpu.pycore/test_score_api.pycore/test_srt_engine.pydisaggregation/test_disaggregation_basic.pydisaggregation/test_disaggregation_decode_offload.pydistributed/test_data_parallelism.pydistributed/test_disaggregation_different_tp.pydistributed/test_disaggregation_dp_attention.pydistributed/test_disaggregation_hybrid_attention.pydistributed/test_disaggregation_pp.pydistributed/test_dp_attention.pydistributed/test_dp_attention_large.pydistributed/test_epd_disaggregation.pydistributed/test_load_weights_from_remote_instance.pydistributed/test_pp_single_node.pydllm/test_llada2_mini.pyep/test_deepep_large.pyep/test_deepep_small.pyep/test_mooncake_ep_small.pyeval/test_moe_eval_accuracy_large.pykernels/test_fused_topk_deepseek.pykernels/test_nsa_indexer.pylayers/mamba/test_mamba2_mixer.pylayers/test_fla_layernorm_guard.pylora/test_lora_qwen3.pylora/test_lora_radix_cache.pylora/test_lora_tied_lm_head.pylora/test_lora_tp.pylora/test_lora_update.pylora/test_multi_lora_backend.pymla/test_flashmla.pymla/test_mla.pymla/test_mla_deepseek_v3.pymla/test_mla_flashinfer.pymla/test_mla_fp8.pymodels/test_compressed_tensors_models.pymodels/test_dummy_grok_models.pymodels/test_generation_models.pymodels/test_gpt_oss_models_pcg.pymodels/test_kimi_linear_models.pymodels/test_kimi_linear_models_pcg.pymodels/test_nvidia_nemotron_3_nano.pymodels/test_nvidia_nemotron_nano_v2.pymodels/test_nvidia_nemotron_nano_v2_vl.pymodels/test_qwen3_next_models_pcg.pymodels/test_vlm_models.pymoe/test_fused_moe.pymoe/test_glm4_moe_models.pymoe/test_moe_ep.pymoe/test_torch_compile_moe.pymoe/test_triton_fused_moe.pymoe/test_triton_moe_channel_fp8_kernel.pyperf/test_bench_one_batch_1gpu.pyperf/test_bench_one_batch_2gpu.pyperf/test_bench_serving_1gpu_large.pyperf/test_bench_serving_1gpu_part1.pyperf/test_bench_serving_1gpu_part2.pyperf/test_bench_serving_2gpu.pypiecewise_cuda_graph/test_piecewise_cuda_graph_support_1_gpu.pyquant/test_autoround.pyquant/test_awq.pyquant/test_eval_fp8_accuracy.pyquant/test_fp8_kernel.pyquant/test_fp8_utils.pyquant/test_fp8kv_triton.pyquant/test_gptqmodel_dynamic.pyquant/test_modelopt_fp8.pyquant/test_quantization.pyquant/test_w4a8_deepseek_v3.pyquant/test_w8a8_quantization.pyradix_cache/test_swa_radix_cache_kl.pyrl/test_multi_instance_release_memory_occupation.pyrl/test_release_memory_occupation.pyrl/test_return_routed_experts.pyrotary/test_mrope.pyscheduler/test_no_chunked_prefill.pyscheduler/test_no_overlap_scheduler.pyspec/eagle/test_eagle_constrained_decoding.pyspec/eagle/test_eagle_dp_attention.pyspec/eagle/test_eagle_infer_a.pyspec/eagle/test_eagle_infer_b.pyspec/test_constrained_decoding_spec_reasoning.pyspec/test_ngram_speculative_decoding.pyspec/test_standalone_speculative_decoding.pytest_srt_backend.pytokenizer/test_multi_tokenizer.pyvlm/test_encoder_dp.pyvlm/test_vision_chunked_prefill.pyvlm/test_vision_openai_server_a.pyvlm/test_vlm_input_format.pyWeekly on Blackwell (3 files)
eval/test_text_models_gsm8k_eval.pyeval/test_vlms_mmmu_eval.pytest_hybrid_dp_ep_tp_mtp.pyStatus Legend
Weekly Progress
2026-03-13
Related GitHub Issues
TBA