[Bugfix] Fix Qwen-VL tokenizer implementation (#36140)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -29,7 +29,8 @@ def test_tokenizer_like_protocol():
|
||||
_assert_tokenizer_like(tokenizer)
|
||||
|
||||
tokenizer = get_tokenizer(
|
||||
"mistralai/Mistral-7B-Instruct-v0.3", tokenizer_mode="mistral"
|
||||
"mistralai/Mistral-7B-Instruct-v0.3",
|
||||
tokenizer_mode="mistral",
|
||||
)
|
||||
assert isinstance(tokenizer, MistralTokenizer)
|
||||
_assert_tokenizer_like(tokenizer)
|
||||
@@ -40,11 +41,20 @@ def test_tokenizer_like_protocol():
|
||||
|
||||
tokenizer = get_tokenizer("deepseek-ai/DeepSeek-V3", tokenizer_mode="deepseek_v32")
|
||||
assert isinstance(tokenizer, HfTokenizer)
|
||||
|
||||
# Verify it's a fast tokenizer (required for FastIncrementalDetokenizer)
|
||||
assert isinstance(tokenizer, PreTrainedTokenizerFast)
|
||||
assert "DSV32" in tokenizer.__class__.__name__
|
||||
_assert_tokenizer_like(tokenizer)
|
||||
|
||||
tokenizer = get_tokenizer(
|
||||
"Qwen/Qwen-VL",
|
||||
tokenizer_mode="qwen_vl",
|
||||
trust_remote_code=True,
|
||||
)
|
||||
assert isinstance(tokenizer, HfTokenizer)
|
||||
assert "WithoutImagePad" in tokenizer.__class__.__name__
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tokenizer_name", ["facebook/opt-125m", "gpt2"])
|
||||
def test_tokenizer_revision(tokenizer_name: str):
|
||||
|
||||
Reference in New Issue
Block a user