Skip to content

Commit 37422f4

Browse files
committed
fix: adapt vllm backend for vLLM 0.17.1+ compatibility
- Remove deprecated `multimodal_config` param from MMEncoderAttention (use `prefix` instead) - Remove deprecated `attn_backend_override` param from get_vit_attn_backend - Move `get_data_parser()` from Qwen3ASRMultiModalProcessor to Qwen3ASRProcessingInfo (vLLM 0.15+ changed the method location and naming convention) Made-with: Cursor
1 parent c17a131 commit 37422f4

File tree

1 file changed

+8
-13
lines changed

1 file changed

+8
-13
lines changed

qwen_asr/core/vllm_backend/qwen3_asr.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ def __init__(
198198
num_heads=self.num_local_heads,
199199
head_size=self.head_dim,
200200
scale=self.scaling,
201-
multimodal_config=multimodal_config,
201+
prefix=prefix,
202202
)
203203

204204
def forward(
@@ -358,16 +358,9 @@ def __init__(
358358
self.act = _ACTIVATION_REGISTRY[config.activation_function]
359359
self.proj2 = nn.Linear(config.d_model, config.output_dim)
360360

361-
# Get attention backend
362-
attn_backend_override = (
363-
multimodal_config.mm_encoder_attn_backend
364-
if multimodal_config is not None
365-
else None
366-
)
367361
self.attn_backend = get_vit_attn_backend(
368362
head_size=config.d_model // config.encoder_attention_heads,
369363
dtype=torch.get_default_dtype(),
370-
attn_backend_override=attn_backend_override,
371364
)
372365

373366
def compute_attn_mask_seqlen(self, cu_seqlens: torch.Tensor) -> torch.Tensor | None:
@@ -553,6 +546,12 @@ def get_feature_extractor(self, **kwargs: object) -> WhisperFeatureExtractor:
553546
def get_supported_mm_limits(self) -> Mapping[str, int | None]:
554547
return {"audio": None}
555548

549+
def get_data_parser(self) -> MultiModalDataParser:
550+
feature_extractor = self.get_feature_extractor()
551+
return Qwen3ASRMultiModalDataParser(
552+
target_sr=feature_extractor.sampling_rate,
553+
)
554+
556555

557556
class Qwen3ASRDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3ASRProcessingInfo]):
558557
def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
@@ -622,11 +621,7 @@ def _parse_audio_data(
622621
class Qwen3ASRMultiModalProcessor(
623622
Qwen3OmniMoeThinkerMultiModalProcessor,
624623
):
625-
def _get_data_parser(self) -> MultiModalDataParser:
626-
feature_extractor = self.info.get_feature_extractor()
627-
return Qwen3ASRMultiModalDataParser(
628-
target_sr=feature_extractor.sampling_rate,
629-
)
624+
pass
630625

631626
def _get_mm_fields_config(
632627
self,

0 commit comments

Comments
 (0)