[Bugfix] Fix DeepSeek-V3.2 tokenizer stripping spaces (#37004)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
@@ -540,6 +540,8 @@ class ModelConfig:
|
|||||||
self.tokenizer_mode = "kimi_audio"
|
self.tokenizer_mode = "kimi_audio"
|
||||||
elif arch == "QwenVLForConditionalGeneration":
|
elif arch == "QwenVLForConditionalGeneration":
|
||||||
self.tokenizer_mode = "qwen_vl"
|
self.tokenizer_mode = "qwen_vl"
|
||||||
|
elif arch == "DeepseekV32ForCausalLM":
|
||||||
|
self.tokenizer_mode = "deepseek_v32"
|
||||||
|
|
||||||
if self.tokenizer_mode != "auto":
|
if self.tokenizer_mode != "auto":
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
import copy
|
import copy
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from transformers import AutoTokenizer
|
from transformers import PreTrainedTokenizerFast
|
||||||
|
|
||||||
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
|
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
|
||||||
|
|
||||||
@@ -85,5 +85,5 @@ def get_deepseek_v32_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer:
|
|||||||
class DeepseekV32Tokenizer(TokenizerLike):
|
class DeepseekV32Tokenizer(TokenizerLike):
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_pretrained(cls, *args, **kwargs) -> HfTokenizer:
|
def from_pretrained(cls, *args, **kwargs) -> HfTokenizer:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(*args, **kwargs)
|
tokenizer = PreTrainedTokenizerFast.from_pretrained(*args, **kwargs)
|
||||||
return get_cached_tokenizer(get_deepseek_v32_tokenizer(tokenizer))
|
return get_cached_tokenizer(get_deepseek_v32_tokenizer(tokenizer))
|
||||||
|
|||||||
Reference in New Issue
Block a user