diff --git a/vllm/config/model.py b/vllm/config/model.py index 3e8e63be2..7d2409d70 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -540,6 +540,8 @@ class ModelConfig: self.tokenizer_mode = "kimi_audio" elif arch == "QwenVLForConditionalGeneration": self.tokenizer_mode = "qwen_vl" + elif arch == "DeepseekV32ForCausalLM": + self.tokenizer_mode = "deepseek_v32" if self.tokenizer_mode != "auto": logger.info( diff --git a/vllm/tokenizers/deepseek_v32.py b/vllm/tokenizers/deepseek_v32.py index 4525eaa34..51199de5c 100644 --- a/vllm/tokenizers/deepseek_v32.py +++ b/vllm/tokenizers/deepseek_v32.py @@ -3,7 +3,7 @@ import copy from typing import Any -from transformers import AutoTokenizer +from transformers import PreTrainedTokenizerFast from vllm.entrypoints.chat_utils import ChatCompletionMessageParam @@ -85,5 +85,5 @@ def get_deepseek_v32_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer: class DeepseekV32Tokenizer(TokenizerLike): @classmethod def from_pretrained(cls, *args, **kwargs) -> HfTokenizer: - tokenizer = AutoTokenizer.from_pretrained(*args, **kwargs) + tokenizer = PreTrainedTokenizerFast.from_pretrained(*args, **kwargs) return get_cached_tokenizer(get_deepseek_v32_tokenizer(tokenizer))