[Bugfix] Fix DeepSeek-V3.2 tokenizer stripping spaces (#37004)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
@@ -540,6 +540,8 @@ class ModelConfig:
|
||||
self.tokenizer_mode = "kimi_audio"
|
||||
elif arch == "QwenVLForConditionalGeneration":
|
||||
self.tokenizer_mode = "qwen_vl"
|
||||
elif arch == "DeepseekV32ForCausalLM":
|
||||
self.tokenizer_mode = "deepseek_v32"
|
||||
|
||||
if self.tokenizer_mode != "auto":
|
||||
logger.info(
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
import copy
|
||||
from typing import Any
|
||||
|
||||
from transformers import AutoTokenizer
|
||||
from transformers import PreTrainedTokenizerFast
|
||||
|
||||
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
|
||||
|
||||
@@ -85,5 +85,5 @@ def get_deepseek_v32_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer:
|
||||
class DeepseekV32Tokenizer(TokenizerLike):
|
||||
@classmethod
|
||||
def from_pretrained(cls, *args, **kwargs) -> HfTokenizer:
|
||||
tokenizer = AutoTokenizer.from_pretrained(*args, **kwargs)
|
||||
tokenizer = PreTrainedTokenizerFast.from_pretrained(*args, **kwargs)
|
||||
return get_cached_tokenizer(get_deepseek_v32_tokenizer(tokenizer))
|
||||
|
||||
Reference in New Issue
Block a user