fix: adapt vllm backend for vLLM 0.17.1+ compatibility

yangbo0926 · yangbo0926 · commit 37422f41fe47 · 2026-04-04T19:08:01.000+08:00
- Remove deprecated `multimodal_config` param from MMEncoderAttention (use `prefix` instead)
- Remove deprecated `attn_backend_override` param from get_vit_attn_backend
- Move `get_data_parser()` from Qwen3ASRMultiModalProcessor to Qwen3ASRProcessingInfo
  (vLLM 0.15+ changed the method location and naming convention)

Made-with: Cursor
diff --git a/qwen_asr/core/vllm_backend/qwen3_asr.py b/qwen_asr/core/vllm_backend/qwen3_asr.py
@@ -198,7 +198,7 @@ def __init__(
             num_heads=self.num_local_heads,
             head_size=self.head_dim,
             scale=self.scaling,
-            multimodal_config=multimodal_config,
+            prefix=prefix,
         )
 
     def forward(
@@ -358,16 +358,9 @@ def __init__(
         self.act = _ACTIVATION_REGISTRY[config.activation_function]
         self.proj2 = nn.Linear(config.d_model, config.output_dim)
 
-        # Get attention backend
-        attn_backend_override = (
-            multimodal_config.mm_encoder_attn_backend
-            if multimodal_config is not None
-            else None
-        )
         self.attn_backend = get_vit_attn_backend(
             head_size=config.d_model // config.encoder_attention_heads,
             dtype=torch.get_default_dtype(),
-            attn_backend_override=attn_backend_override,
         )
 
     def compute_attn_mask_seqlen(self, cu_seqlens: torch.Tensor) -> torch.Tensor | None:
@@ -553,6 +546,12 @@ def get_feature_extractor(self, **kwargs: object) -> WhisperFeatureExtractor:
     def get_supported_mm_limits(self) -> Mapping[str, int | None]:
         return {"audio": None}
 
+    def get_data_parser(self) -> MultiModalDataParser:
+        feature_extractor = self.get_feature_extractor()
+        return Qwen3ASRMultiModalDataParser(
+            target_sr=feature_extractor.sampling_rate,
+        )
+
 
 class Qwen3ASRDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3ASRProcessingInfo]):
     def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
@@ -622,11 +621,7 @@ def _parse_audio_data(
 class Qwen3ASRMultiModalProcessor(
     Qwen3OmniMoeThinkerMultiModalProcessor,
 ):
-    def _get_data_parser(self) -> MultiModalDataParser:
-        feature_extractor = self.info.get_feature_extractor()
-        return Qwen3ASRMultiModalDataParser(
-            target_sr=feature_extractor.sampling_rate,
-        )
+    pass
 
     def _get_mm_fields_config(
         self,