diff --git a/tests/tokenizers_/test_basic.py b/tests/tokenizers_/test_basic.py
index 1c1dd3338..cf0d8f53c 100644
--- a/tests/tokenizers_/test_basic.py
+++ b/tests/tokenizers_/test_basic.py
@@ -29,7 +29,8 @@ def test_tokenizer_like_protocol():
     _assert_tokenizer_like(tokenizer)
 
     tokenizer = get_tokenizer(
-        "mistralai/Mistral-7B-Instruct-v0.3", tokenizer_mode="mistral"
+        "mistralai/Mistral-7B-Instruct-v0.3",
+        tokenizer_mode="mistral",
     )
     assert isinstance(tokenizer, MistralTokenizer)
     _assert_tokenizer_like(tokenizer)
@@ -40,11 +41,20 @@ def test_tokenizer_like_protocol():
 
     tokenizer = get_tokenizer("deepseek-ai/DeepSeek-V3", tokenizer_mode="deepseek_v32")
     assert isinstance(tokenizer, HfTokenizer)
+
     # Verify it's a fast tokenizer (required for FastIncrementalDetokenizer)
     assert isinstance(tokenizer, PreTrainedTokenizerFast)
     assert "DSV32" in tokenizer.__class__.__name__
     _assert_tokenizer_like(tokenizer)
 
+    tokenizer = get_tokenizer(
+        "Qwen/Qwen-VL",
+        tokenizer_mode="qwen_vl",
+        trust_remote_code=True,
+    )
+    assert isinstance(tokenizer, HfTokenizer)
+    assert "WithoutImagePad" in tokenizer.__class__.__name__
+
 
 @pytest.mark.parametrize("tokenizer_name", ["facebook/opt-125m", "gpt2"])
 def test_tokenizer_revision(tokenizer_name: str):
diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py
index f8bf52de0..7c9a95ef1 100644
--- a/vllm/benchmarks/serve.py
+++ b/vllm/benchmarks/serve.py
@@ -1321,6 +1321,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
         - "slow" will always use the slow tokenizer.\n
         - "mistral" will always use the tokenizer from `mistral_common`.\n
         - "deepseek_v32" will always use the tokenizer from `deepseek_v32`.\n
+        - "qwen_vl" will always use the tokenizer from `qwen_vl`.\n
         - Other custom values can be supported via plugins.""",
     )
     parser.add_argument("--use-beam-search", action="store_true")
diff --git a/vllm/config/model.py b/vllm/config/model.py
index 4e3568fa1..6c48bfde6 100644
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -126,6 +126,7 @@ class ModelConfig:
     - "slow" will always use the slow tokenizer.\n
     - "mistral" will always use the tokenizer from `mistral_common`.\n
     - "deepseek_v32" will always use the tokenizer from `deepseek_v32`.\n
+    - "qwen_vl" will always use the tokenizer from `qwen_vl`.\n
     - Other custom values can be supported via plugins."""
     trust_remote_code: bool = False
     """Trust remote code (e.g., from HuggingFace) when downloading the model
diff --git a/vllm/model_executor/models/qwen_vl.py b/vllm/model_executor/models/qwen_vl.py
index 8ac541f73..1eb8ecc2d 100644
--- a/vllm/model_executor/models/qwen_vl.py
+++ b/vllm/model_executor/models/qwen_vl.py
@@ -6,11 +6,9 @@
 # Copyright (c) Alibaba Cloud.
 """Inference-only Qwen-VL model compatible with HuggingFace weights."""
 
-import copy
 import math
-import unicodedata
-from collections.abc import Callable, Collection, Mapping, Sequence, Set
-from functools import lru_cache, partial
+from collections.abc import Callable, Mapping, Sequence
+from functools import partial
 from typing import Annotated, Literal, TypeAlias
 
 import regex as re
@@ -436,60 +434,6 @@ class QwenVLModel(QWenModel):
         )
 
 
-@lru_cache(maxsize=1)
-def _get_tokenizer_without_image_pad(
-    tokenizer: PreTrainedTokenizer,
-) -> PreTrainedTokenizer:
-    """
-    The logic of adding image pad tokens should only be applied in
-    [`QwenVLProcessor`][vllm.model_executor.models.qwen_vl.QwenVLProcessor],
-    so they are patched out here.
-
-    The definition of the wrapped tokenizer can be found here:
-    https://huggingface.co/Qwen/Qwen-VL/blob/main/tokenization_qwen.py
-    """
-    new_tokenizer = copy.deepcopy(tokenizer)
-
-    class TokenizerWithoutImagePad(tokenizer.__class__):  # type: ignore
-        def tokenize(
-            self,
-            text: str,
-            allowed_special: Set[str] | str = "all",
-            disallowed_special: Collection[str] | str = (),
-            **kwargs,
-        ) -> list[bytes | str]:
-            text = unicodedata.normalize("NFC", text)
-
-            return [
-                self.decoder[t]
-                for t in self.tokenizer.encode(
-                    text,
-                    allowed_special=allowed_special,
-                    disallowed_special=disallowed_special,
-                )
-            ]
-
-        def _decode(
-            self,
-            token_ids: int | list[int],
-            skip_special_tokens: bool = False,
-            errors: str | None = None,
-            **kwargs,
-        ) -> str:
-            if isinstance(token_ids, int):
-                token_ids = [token_ids]
-
-            return self.tokenizer.decode(
-                token_ids,
-                errors=errors or self.errors,
-            )
-
-    TokenizerWithoutImagePad.__name__ = f"{tokenizer.__class__.__name__}WithoutImagePad"
-
-    new_tokenizer.__class__ = TokenizerWithoutImagePad
-    return new_tokenizer
-
-
 class QwenVLProcessor:
     """
     This model doesn't define its own HF processor,
@@ -574,12 +518,6 @@ class QwenVLProcessor:
 
 
 class QwenVLProcessingInfo(BaseProcessingInfo):
-    def get_tokenizer(self) -> PreTrainedTokenizer:
-        tokenizer = self.ctx.get_tokenizer()
-        assert isinstance(tokenizer, PreTrainedTokenizer)
-
-        return _get_tokenizer_without_image_pad(tokenizer)
-
     def get_hf_processor(self, **kwargs: object) -> QwenVLProcessor:
         return self.ctx.init_processor(
             QwenVLProcessor,
diff --git a/vllm/renderers/qwen_vl.py b/vllm/renderers/qwen_vl.py
new file mode 100644
index 000000000..4b47d0216
--- /dev/null
+++ b/vllm/renderers/qwen_vl.py
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from typing import Any
+
+from vllm.config import VllmConfig
+from vllm.tokenizers import cached_get_tokenizer
+from vllm.tokenizers.qwen_vl import QwenVLTokenizer
+
+from .base import BaseRenderer
+from .hf import HfRenderer
+
+
+class QwenVLRenderer(BaseRenderer[QwenVLTokenizer]):
+    @classmethod
+    def from_config(  # type: ignore[override]
+        cls,
+        config: VllmConfig,
+        tokenizer_kwargs: dict[str, Any],
+    ) -> "HfRenderer":
+        model_config = config.model_config
+        if model_config.skip_tokenizer_init:
+            tokenizer = None
+        else:
+            tokenizer = cached_get_tokenizer(
+                tokenizer_cls=QwenVLTokenizer,
+                **tokenizer_kwargs,
+            )
+
+        return HfRenderer(config, tokenizer)
diff --git a/vllm/renderers/registry.py b/vllm/renderers/registry.py
index cd09c80f9..de95505ec 100644
--- a/vllm/renderers/registry.py
+++ b/vllm/renderers/registry.py
@@ -20,6 +20,7 @@ _VLLM_RENDERERS = {
     "hf": ("hf", "HfRenderer"),
     "grok2": ("grok2", "Grok2Renderer"),
     "mistral": ("mistral", "MistralRenderer"),
+    "qwen_vl": ("qwen_vl", "QwenVLRenderer"),
     "terratorch": ("terratorch", "TerratorchRenderer"),
 }
 
diff --git a/vllm/tokenizers/deepseek_v32.py b/vllm/tokenizers/deepseek_v32.py
index 28071ef69..4525eaa34 100644
--- a/vllm/tokenizers/deepseek_v32.py
+++ b/vllm/tokenizers/deepseek_v32.py
@@ -7,9 +7,9 @@ from transformers import AutoTokenizer
 
 from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
 
-from . import TokenizerLike
 from .deepseek_v32_encoding import encode_messages
 from .hf import HfTokenizer, get_cached_tokenizer
+from .protocol import TokenizerLike
 
 
 def get_deepseek_v32_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer:
diff --git a/vllm/tokenizers/qwen_vl.py b/vllm/tokenizers/qwen_vl.py
new file mode 100644
index 000000000..5b506df4d
--- /dev/null
+++ b/vllm/tokenizers/qwen_vl.py
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import copy
+import unicodedata
+from collections.abc import Collection, Set
+
+from transformers import AutoTokenizer
+
+from .hf import HfTokenizer, get_cached_tokenizer
+from .protocol import TokenizerLike
+
+
+def get_qwen_vl_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer:
+    """
+    The logic of adding image pad tokens should only be applied in
+    `QwenVLProcessor`, so they are patched out here.
+
+    The definition of the wrapped tokenizer can be found here:
+    https://huggingface.co/Qwen/Qwen-VL/blob/main/tokenization_qwen.py
+    """
+    new_tokenizer = copy.copy(tokenizer)
+
+    class TokenizerWithoutImagePad(tokenizer.__class__):  # type: ignore
+        def tokenize(
+            self,
+            text: str,
+            allowed_special: Set[str] | str = "all",
+            disallowed_special: Collection[str] | str = (),
+            **kwargs,
+        ) -> list[bytes | str]:
+            text = unicodedata.normalize("NFC", text)
+
+            return [
+                self.decoder[t]
+                for t in self.tokenizer.encode(
+                    text,
+                    allowed_special=allowed_special,
+                    disallowed_special=disallowed_special,
+                )
+            ]
+
+        def _decode(
+            self,
+            token_ids: int | list[int],
+            skip_special_tokens: bool = False,
+            errors: str | None = None,
+            **kwargs,
+        ) -> str:
+            if isinstance(token_ids, int):
+                token_ids = [token_ids]
+
+            return self.tokenizer.decode(
+                token_ids,
+                errors=errors or self.errors,
+            )
+
+    TokenizerWithoutImagePad.__name__ = f"{tokenizer.__class__.__name__}WithoutImagePad"
+
+    new_tokenizer.__class__ = TokenizerWithoutImagePad
+    return new_tokenizer
+
+
+class QwenVLTokenizer(TokenizerLike):
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs) -> HfTokenizer:
+        tokenizer = AutoTokenizer.from_pretrained(*args, **kwargs)
+        return get_cached_tokenizer(get_qwen_vl_tokenizer(tokenizer))
diff --git a/vllm/tokenizers/registry.py b/vllm/tokenizers/registry.py
index 2da7842b0..4512f766c 100644
--- a/vllm/tokenizers/registry.py
+++ b/vllm/tokenizers/registry.py
@@ -36,6 +36,7 @@ _VLLM_TOKENIZERS = {
     "grok2": ("grok2", "Grok2Tokenizer"),
     "hf": ("hf", "CachedHfTokenizer"),
     "mistral": ("mistral", "MistralTokenizer"),
+    "qwen_vl": ("qwen_vl", "QwenVLTokenizer"),
 }
 
 
@@ -165,6 +166,10 @@ def resolve_tokenizer_args(
     ):
         tokenizer_mode = "grok2"
 
+    # Model-specific tokenizers
+    if tokenizer_mode == "auto" and "/Qwen-VL" in str(tokenizer_name):
+        tokenizer_mode = "qwen_vl"
+
     # Fallback to HF tokenizer
     if tokenizer_mode == "auto":
         tokenizer_mode = "hf"