[Bugfix] Fix Qwen-VL tokenizer implementation (#36140)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-03-06 00:07:19 +08:00
parent 176c799f4c
commit 7196348157
9 changed files with 118 additions and 66 deletions
--- a/vllm/model_executor/models/qwen_vl.py
+++ b/vllm/model_executor/models/qwen_vl.py
@@ -6,11 +6,9 @@
 # Copyright (c) Alibaba Cloud.
 """Inference-only Qwen-VL model compatible with HuggingFace weights."""

-import copy
 import math
-import unicodedata
-from collections.abc import Callable, Collection, Mapping, Sequence, Set
-from functools import lru_cache, partial
+from collections.abc import Callable, Mapping, Sequence
+from functools import partial
 from typing import Annotated, Literal, TypeAlias

 import regex as re
@@ -436,60 +434,6 @@ class QwenVLModel(QWenModel):
        )


-@lru_cache(maxsize=1)
-def _get_tokenizer_without_image_pad(
-    tokenizer: PreTrainedTokenizer,
-) -> PreTrainedTokenizer:
-    """
-    The logic of adding image pad tokens should only be applied in
-    [`QwenVLProcessor`][vllm.model_executor.models.qwen_vl.QwenVLProcessor],
-    so they are patched out here.
-
-    The definition of the wrapped tokenizer can be found here:
-    https://huggingface.co/Qwen/Qwen-VL/blob/main/tokenization_qwen.py
-    """
-    new_tokenizer = copy.deepcopy(tokenizer)
-
-    class TokenizerWithoutImagePad(tokenizer.__class__):  # type: ignore
-        def tokenize(
-            self,
-            text: str,
-            allowed_special: Set[str] | str = "all",
-            disallowed_special: Collection[str] | str = (),
-            **kwargs,
-        ) -> list[bytes | str]:
-            text = unicodedata.normalize("NFC", text)
-
-            return [
-                self.decoder[t]
-                for t in self.tokenizer.encode(
-                    text,
-                    allowed_special=allowed_special,
-                    disallowed_special=disallowed_special,
-                )
-            ]
-
-        def _decode(
-            self,
-            token_ids: int | list[int],
-            skip_special_tokens: bool = False,
-            errors: str | None = None,
-            **kwargs,
-        ) -> str:
-            if isinstance(token_ids, int):
-                token_ids = [token_ids]
-
-            return self.tokenizer.decode(
-                token_ids,
-                errors=errors or self.errors,
-            )
-
-    TokenizerWithoutImagePad.__name__ = f"{tokenizer.__class__.__name__}WithoutImagePad"
-
-    new_tokenizer.__class__ = TokenizerWithoutImagePad
-    return new_tokenizer
-
-
 class QwenVLProcessor:
    """
    This model doesn't define its own HF processor,
@@ -574,12 +518,6 @@ class QwenVLProcessor:


 class QwenVLProcessingInfo(BaseProcessingInfo):
-    def get_tokenizer(self) -> PreTrainedTokenizer:
-        tokenizer = self.ctx.get_tokenizer()
-        assert isinstance(tokenizer, PreTrainedTokenizer)
-
-        return _get_tokenizer_without_image_pad(tokenizer)
-
    def get_hf_processor(self, **kwargs: object) -> QwenVLProcessor:
        return self.ctx.init_processor(
            QwenVLProcessor,