Make Qwen3VL compatible with Transformers v5 (#34262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Roger Wang <hey@rogerw.io> Co-authored-by: Roger Wang <hey@rogerw.io>
2026-02-11 13:13:23 +01:00
parent 05339a7b20
commit 1e9204bff3
2 changed files with 25 additions and 37 deletions
--- a/vllm/model_executor/models/qwen3_vl.py
+++ b/vllm/model_executor/models/qwen3_vl.py
@@ -1112,17 +1112,6 @@ class Qwen3VLMultiModalProcessor(BaseMultiModalProcessor[Qwen3VLProcessingInfo])
    }
 )
 class Qwen3LLMModel(Qwen3Model):
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super().__init__(vllm_config=vllm_config, prefix=prefix)
        vision_config = vllm_config.model_config.hf_config.vision_config
        if not get_pp_group().is_first_rank and hasattr(
            vision_config, "deepstack_visual_indexes"
        ):
            assert self.start_layer >= len(vision_config.deepstack_visual_indexes), (
                "start_layer should be greater than or equal to "
                "len(deepstack_visual_indexes)"
            )
    def forward(
        self,
        input_ids: torch.Tensor | None,
@@ -1178,7 +1167,7 @@ class Qwen3LLMModel(Qwen3Model):
 class Qwen3LLMForCausalLM(Qwen3ForCausalLM):
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super(Qwen3ForCausalLM, self).__init__()
-        config = vllm_config.model_config.hf_config.text_config
+        config = vllm_config.model_config.hf_config
        quant_config = vllm_config.quant_config
        self.config = config
@@ -1298,7 +1287,18 @@ class Qwen3VLForConditionalGeneration(
        with self._mark_language_model(vllm_config):
            self.language_model = Qwen3LLMForCausalLM(
-                vllm_config=vllm_config, prefix=maybe_prefix(prefix, "language_model")
+                vllm_config=vllm_config.with_hf_config(config.text_config),
                prefix=maybe_prefix(prefix, "language_model"),
            )
        if not get_pp_group().is_first_rank and hasattr(
            config.vision_config, "deepstack_visual_indexes"
        ):
            assert self.language_model.start_layer >= len(
                config.vision_config.deepstack_visual_indexes
            ), (
                "start_layer should be greater than or equal to "
                "len(deepstack_visual_indexes)"
            )
        self.make_empty_intermediate_tensors = (
--- a/vllm/model_executor/models/qwen3_vl_moe.py
+++ b/vllm/model_executor/models/qwen3_vl_moe.py
@@ -48,7 +48,6 @@ from vllm.sequence import IntermediateTensors
 from .interfaces import MixtureOfExperts
 from .qwen3_moe import (
    Qwen3MoeDecoderLayer,
    Qwen3MoeForCausalLM,
    Qwen3MoeModel,
    Qwen3MoeSparseMoeBlock,
@@ -83,27 +82,6 @@ class Qwen3VLMoeProcessingInfo(Qwen3VLProcessingInfo):
    }
 )
 class Qwen3MoeLLMModel(Qwen3MoeModel):
    def __init__(
        self,
        *,
        vllm_config: VllmConfig,
        prefix: str = "",
        decoder_layer_type: type[torch.nn.Module] = Qwen3MoeDecoderLayer,
    ):
        super().__init__(
            vllm_config=vllm_config,
            prefix=prefix,
            decoder_layer_type=decoder_layer_type,
        )
        vision_config = vllm_config.model_config.hf_config.vision_config
        if not get_pp_group().is_first_rank and hasattr(
            vision_config, "deepstack_visual_indexes"
        ):
            assert self.start_layer >= len(vision_config.deepstack_visual_indexes), (
                "start_layer should be greater than or equal to "
                "len(deepstack_visual_indexes)"
            )
    def forward(
        self,
        input_ids: torch.Tensor | None,
@@ -352,7 +330,7 @@ class Qwen3MoeLLMModel(Qwen3MoeModel):
 class Qwen3MoeLLMForCausalLM(Qwen3MoeForCausalLM):
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super(Qwen3MoeForCausalLM, self).__init__()
-        self.config = vllm_config.model_config.hf_config.text_config
+        self.config = vllm_config.model_config.hf_config
        self.quant_config = vllm_config.quant_config
        self.model = Qwen3MoeLLMModel(
            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
@@ -473,10 +451,20 @@ class Qwen3VLMoeForConditionalGeneration(
        with self._mark_language_model(vllm_config):
            self.language_model = Qwen3MoeLLMForCausalLM(
-                vllm_config=vllm_config,
+                vllm_config=vllm_config.with_hf_config(config.text_config),
                prefix=maybe_prefix(prefix, "language_model"),
            )
        if not get_pp_group().is_first_rank and hasattr(
            config.vision_config, "deepstack_visual_indexes"
        ):
            assert self.language_model.start_layer >= len(
                config.vision_config.deepstack_visual_indexes
            ), (
                "start_layer should be greater than or equal to "
                "len(deepstack_visual_indexes)"
            )
        # Whether to include the gate_up_proj mapping is determined by
        # the language model.
        self.packed_modules_mapping = (