diff --git a/vllm/model_executor/models/qwen3_asr_realtime.py b/vllm/model_executor/models/qwen3_asr_realtime.py index a149350d1..4fb6ef5d9 100644 --- a/vllm/model_executor/models/qwen3_asr_realtime.py +++ b/vllm/model_executor/models/qwen3_asr_realtime.py @@ -22,7 +22,6 @@ from collections.abc import AsyncGenerator, Mapping import numpy as np import torch -from vllm.compilation.decorators import support_torch_compile from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig from vllm.inputs.data import PromptType, TokensPrompt from vllm.logger import init_logger @@ -177,7 +176,6 @@ class Qwen3ASRRealtimeMultiModalProcessor(Qwen3ASRMultiModalProcessor): info=Qwen3ASRProcessingInfo, dummy_inputs=Qwen3ASRDummyInputsBuilder, ) -@support_torch_compile class Qwen3ASRRealtimeGeneration(Qwen3ASRForConditionalGeneration, SupportsRealtime): realtime_max_tokens = 64