Traceback (most recent call last):
File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/sgl-workspace/sglang/python/sglang/launch_server.py", line 14, in <module>
launch_server(server_args)
File "/sgl-workspace/sglang/python/sglang/srt/entrypoints/http_server.py", line 834, in launch_server
tokenizer_manager, template_manager, scheduler_info = _launch_subprocesses(
File "/sgl-workspace/sglang/python/sglang/srt/entrypoints/engine.py", line 754, in _launch_subprocesses
tokenizer_manager = TokenizerManager(server_args, port_args)
File "/sgl-workspace/sglang/python/sglang/srt/managers/tokenizer_manager.py", line 196, in __init__
self.model_config = ModelConfig.from_server_args(server_args)
File "/sgl-workspace/sglang/python/sglang/srt/configs/model_config.py", line 257, in from_server_args
return ModelConfig(
File "/sgl-workspace/sglang/python/sglang/srt/configs/model_config.py", line 243, in __init__
self._verify_quantization()
File "/sgl-workspace/sglang/python/sglang/srt/configs/model_config.py", line 395, in _verify_quantization
quantization_override = method.override_quantization_method(
File "/sgl-workspace/sglang/python/sglang/srt/layers/quantization/gptq.py", line 284, in override_quantization_method
can_convert = cls.is_gptq_marlin_compatible(hf_quant_cfg)
File "/sgl-workspace/sglang/python/sglang/srt/layers/quantization/gptq.py", line 347, in is_gptq_marlin_compatible
return check_marlin_supported(
NameError: name 'check_marlin_supported' is not defined. Did you mean: 'check_marlin_format'?
command: >
python3 -m sglang.launch_server
--model-path Qwen/Qwen3-30B-A3B-GPTQ-Int4
--served-model-name qwen-3-30b-a3b
--sleep-on-idle
--json-model-override-args '{"rope_scaling":{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}}'
--context-length 131072
--chat-template /root/templates/qwen3_nonthinking.jinja
--host 0.0.0.0
--enable-p2p-check
--tensor-parallel-size 2
--port 80
--tool-call-parser qwen25
Checklist
Describe the bug
I'm using the docker image
lmsysorg/sglang:v0.4.8-cu126Ran into this issue:
Reproduction
Start sglang docker image with the following command
Environment
Docker image
lmsysorg/sglang:v0.4.8-cu126