[Perf] Simplify DeepseekV32 tokenizer, ensure fast detokenization used (#33855)
Signed-off-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
@@ -11,6 +11,7 @@ from transformers import (
|
||||
|
||||
from vllm.tokenizers import TokenizerLike, get_tokenizer
|
||||
from vllm.tokenizers.grok2 import Grok2Tokenizer
|
||||
from vllm.tokenizers.hf import HfTokenizer
|
||||
from vllm.tokenizers.mistral import MistralTokenizer
|
||||
|
||||
|
||||
@@ -42,6 +43,13 @@ def test_tokenizer_like_protocol():
|
||||
assert isinstance(tokenizer, Grok2Tokenizer)
|
||||
_assert_tokenizer_like(tokenizer)
|
||||
|
||||
tokenizer = get_tokenizer("deepseek-ai/DeepSeek-V3", tokenizer_mode="deepseek_v32")
|
||||
assert isinstance(tokenizer, HfTokenizer)
|
||||
# Verify it's a fast tokenizer (required for FastIncrementalDetokenizer)
|
||||
assert isinstance(tokenizer, PreTrainedTokenizerFast)
|
||||
assert "DSV32" in tokenizer.__class__.__name__
|
||||
_assert_tokenizer_like(tokenizer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tokenizer_name", ["facebook/opt-125m", "gpt2"])
|
||||
def test_tokenizer_revision(tokenizer_name: str):
|
||||
|
||||
Reference in New Issue
Block a user