[Bugfix] Fix DeepSeek-V3.2 tokenizer stripping spaces (#37004)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
Matthew Bonanni
2026-03-13 18:55:36 -04:00
committed by GitHub
parent f1816fb192
commit 9efc4db965
2 changed files with 4 additions and 2 deletions

View File

@@ -540,6 +540,8 @@ class ModelConfig:
self.tokenizer_mode = "kimi_audio" self.tokenizer_mode = "kimi_audio"
elif arch == "QwenVLForConditionalGeneration": elif arch == "QwenVLForConditionalGeneration":
self.tokenizer_mode = "qwen_vl" self.tokenizer_mode = "qwen_vl"
elif arch == "DeepseekV32ForCausalLM":
self.tokenizer_mode = "deepseek_v32"
if self.tokenizer_mode != "auto": if self.tokenizer_mode != "auto":
logger.info( logger.info(

View File

@@ -3,7 +3,7 @@
import copy import copy
from typing import Any from typing import Any
from transformers import AutoTokenizer from transformers import PreTrainedTokenizerFast
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
@@ -85,5 +85,5 @@ def get_deepseek_v32_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer:
class DeepseekV32Tokenizer(TokenizerLike): class DeepseekV32Tokenizer(TokenizerLike):
@classmethod @classmethod
def from_pretrained(cls, *args, **kwargs) -> HfTokenizer: def from_pretrained(cls, *args, **kwargs) -> HfTokenizer:
tokenizer = AutoTokenizer.from_pretrained(*args, **kwargs) tokenizer = PreTrainedTokenizerFast.from_pretrained(*args, **kwargs)
return get_cached_tokenizer(get_deepseek_v32_tokenizer(tokenizer)) return get_cached_tokenizer(get_deepseek_v32_tokenizer(tokenizer))