(sglang) ➜ sglang git:(main) # Example: Launch FLUX for image generation
python3 -m sglang.launch_server \
--model-path black-forest-labs/FLUX.2-klein-4B \
--host 0.0.0.0 --port 30000
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/home/chenyang/sglang/python/sglang/launch_server.py", line 32, in <module>
server_args = prepare_server_args(sys.argv[1:])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/chenyang/sglang/python/sglang/srt/server_args.py", line 5597, in prepare_server_args
return ServerArgs.from_cli_args(raw_args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/chenyang/sglang/python/sglang/srt/server_args.py", line 5083, in from_cli_args
return cls(**{attr: getattr(args, attr) for attr in attrs})
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<string>", line 331, in __init__
File "/home/chenyang/sglang/python/sglang/srt/server_args.py", line 733, in __post_init__
self._handle_gpu_memory_settings(gpu_mem)
File "/home/chenyang/sglang/python/sglang/srt/server_args.py", line 1010, in _handle_gpu_memory_settings
if not self.use_mla_backend():
^^^^^^^^^^^^^^^^^^^^^^
File "/home/chenyang/sglang/python/sglang/srt/server_args.py", line 5116, in use_mla_backend
model_config = self.get_model_config()
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/chenyang/sglang/python/sglang/srt/server_args.py", line 5097, in get_model_config
self.model_config = ModelConfig.from_server_args(self)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/chenyang/sglang/python/sglang/srt/configs/model_config.py", line 250, in from_server_args
return ModelConfig(
^^^^^^^^^^^^
File "/home/chenyang/sglang/python/sglang/srt/configs/model_config.py", line 127, in __init__
self.hf_config = get_config(
^^^^^^^^^^^
File "/home/chenyang/sglang/python/sglang/srt/utils/common.py", line 3475, in wrapper
result = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/chenyang/sglang/python/sglang/srt/utils/hf_transformers_utils.py", line 320, in get_config
raise e
File "/home/chenyang/sglang/python/sglang/srt/utils/hf_transformers_utils.py", line 315, in get_config
config = AutoConfig.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/chenyang/.python/sglang/lib/python3.12/site-packages/transformers/models/auto/configuration_auto.py", line 1380, in from_pretrained
raise ValueError(
ValueError: Unrecognized model in black-forest-labs/FLUX.2-klein-4B. Should have a `model_type` key in its config.json, or contain one of the following strings in its name: aimv2, aimv2_vision_model, albert, align, altclip, apertus, arcee, aria, aria_text, audio-spectrogram-transformer, autoformer, aya_vision, bamba, bark, bart, beit, bert, bert-generation, big_bird, bigbird_pegasus, biogpt, bit, bitnet, blenderbot, blenderbot-small, blip, blip-2, blip_2_qformer, bloom, blt, bridgetower, bros, camembert, canine, chameleon, chinese_clip, chinese_clip_vision_model, clap, clip, clip_text_model, clip_vision_model, clipseg, clvp, code_llama, codegen, cohere, cohere2, cohere2_vision, colpali, colqwen2, conditional_detr, convbert, convnext, convnextv2, cpmant, csm, ctrl, cvt, d_fine, dab-detr, dac, data2vec-audio, data2vec-text, data2vec-vision, dbrx, deberta, deberta-v2, decision_transformer, deepseek_v2, deepseek_v3, deepseek_vl, deepseek_vl_hybrid, deformable_detr, deit, depth_anything, depth_pro, deta, detr, dia, diffllama, dinat, dinov2, dinov2_with_registers, dinov3_convnext, dinov3_vit, distilbert, doge, donut-swin, dots1, dpr, dpt, edgetam, edgetam_video, edgetam_vision_model, efficientformer, efficientloftr, efficientnet, electra, emu3, encodec, encoder-decoder, eomt, ernie, ernie4_5, ernie4_5_moe, ernie_m, esm, evolla, exaone4, falcon, falcon_h1, falcon_mamba, fastspeech2_conformer, fastspeech2_conformer_with_hifigan, flaubert, flava, flex_olmo, florence2, fnet, focalnet, fsmt, funnel, fuyu, gemma, gemma2, gemma3, gemma3_text, gemma3n, gemma3n_audio, gemma3n_text, gemma3n_vision, git, glm, glm4, glm4_moe, glm4v, glm4v_moe, glm4v_moe_text, glm4v_text, glpn, got_ocr2, gpt-sw3, gpt2, gpt_bigcode, gpt_neo, gpt_neox, gpt_neox_japanese, gpt_oss, gptj, gptsan-japanese, granite, granite_speech, granitemoe, granitemoehybrid, granitemoeshared, granitevision, graphormer, grounding-dino, groupvit, helium, hgnet_v2, hiera, hubert, hunyuan_v1_dense, hunyuan_v1_moe, ibert, idefics, idefics2, idefics3, idefics3_vision, ijepa, imagegpt, informer, instructblip, instructblipvideo, internvl, internvl_vision, jamba, janus, jetmoe, jukebox, kosmos-2, kosmos-2.5, kyutai_speech_to_text, layoutlm, layoutlmv2, layoutlmv3, led, levit, lfm2, lfm2_vl, lightglue, lilt, llama, llama4, llama4_text, llava, llava_next, llava_next_video, llava_onevision, longcat_flash, longformer, longt5, luke, lxmert, m2m_100, mamba, mamba2, marian, markuplm, mask2former, maskformer, maskformer-swin, mbart, mctct, mega, megatron-bert, metaclip_2, mgp-str, mimi, minimax, ministral, mistral, mistral3, mixtral, mlcd, mllama, mm-grounding-dino, mobilebert, mobilenet_v1, mobilenet_v2, mobilevit, mobilevitv2, modernbert, modernbert-decoder, moonshine, moshi, mpnet, mpt, mra, mt5, musicgen, musicgen_melody, mvp, nat, nemotron, nezha, nllb-moe, nougat, nystromformer, olmo, olmo2, olmo3, olmoe, omdet-turbo, oneformer, open-llama, openai-gpt, opt, ovis2, owlv2, owlvit, paligemma, parakeet_ctc, parakeet_encoder, patchtsmixer, patchtst, pegasus, pegasus_x, perceiver, perception_encoder, perception_lm, persimmon, phi, phi3, phi4_multimodal, phimoe, pix2struct, pixtral, plbart, poolformer, pop2piano, prompt_depth_anything, prophetnet, pvt, pvt_v2, qdqbert, qwen2, qwen2_5_omni, qwen2_5_vl, qwen2_5_vl_text, qwen2_audio, qwen2_audio_encoder, qwen2_moe, qwen2_vl, qwen2_vl_text, qwen3, qwen3_moe, qwen3_next, qwen3_omni_moe, qwen3_vl, qwen3_vl_moe, qwen3_vl_moe_text, qwen3_vl_text, rag, realm, recurrent_gemma, reformer, regnet, rembert, resnet, retribert, roberta, roberta-prelayernorm, roc_bert, roformer, rt_detr, rt_detr_resnet, rt_detr_v2, rwkv, sam, sam2, sam2_hiera_det_model, sam2_video, sam2_vision_model, sam_hq, sam_hq_vision_model, sam_vision_model, seamless_m4t, seamless_m4t_v2, seed_oss, segformer, seggpt, sew, sew-d, shieldgemma2, siglip, siglip2, siglip2_vision_model, siglip_vision_model, smollm3, smolvlm, smolvlm_vision, speech-encoder-decoder, speech_to_text, speech_to_text_2, speecht5, splinter, squeezebert, stablelm, starcoder2, superglue, superpoint, swiftformer, swin, swin2sr, swinv2, switch_transformers, t5, t5gemma, table-transformer, tapas, textnet, time_series_transformer, timesfm, timesformer, timm_backbone, timm_wrapper, trajectory_transformer, transfo-xl, trocr, tvlt, tvp, udop, umt5, unispeech, unispeech-sat, univnet, upernet, van, vaultgemma, video_llava, videomae, vilt, vipllava, vision-encoder-decoder, vision-text-dual-encoder, visual_bert, vit, vit_hybrid, vit_mae, vit_msn, vitdet, vitmatte, vitpose, vitpose_backbone, vits, vivit, vjepa2, voxtral, voxtral_encoder, wav2vec2, wav2vec2-bert, wav2vec2-conformer, wavlm, whisper, xclip, xcodec, xglm, xlm, xlm-prophetnet, xlm-roberta, xlm-roberta-xl, xlnet, xlstm, xmod, yolos, yoso, zamba, zamba2, zoedepth, lfm2, lfm2_moe, afmoe, bailing_hybrid, chatglm, exaone, deepseek_vl_v2, multi_modality, kimi_vl, internvl_chat, step3_vl, kimi_linear, dots_vlm, dots_ocr, NemotronH_Nano_VL_V2, nemotron_h, deepseek-ocr, qwen3_5, qwen3_5_moe, jet_nemotron, jet_vlm, kimi_k25, step3p5
Checklist
Motivation
https://docs.sglang.io/basic_usage/diffusion.html
Is this what we expected?
Related resources
No response