[Bugfix] Fix Qwen-VL tokenizer implementation (#36140)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -6,11 +6,9 @@
|
||||
# Copyright (c) Alibaba Cloud.
|
||||
"""Inference-only Qwen-VL model compatible with HuggingFace weights."""
|
||||
|
||||
import copy
|
||||
import math
|
||||
import unicodedata
|
||||
from collections.abc import Callable, Collection, Mapping, Sequence, Set
|
||||
from functools import lru_cache, partial
|
||||
from collections.abc import Callable, Mapping, Sequence
|
||||
from functools import partial
|
||||
from typing import Annotated, Literal, TypeAlias
|
||||
|
||||
import regex as re
|
||||
@@ -436,60 +434,6 @@ class QwenVLModel(QWenModel):
|
||||
)
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _get_tokenizer_without_image_pad(
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
) -> PreTrainedTokenizer:
|
||||
"""
|
||||
The logic of adding image pad tokens should only be applied in
|
||||
[`QwenVLProcessor`][vllm.model_executor.models.qwen_vl.QwenVLProcessor],
|
||||
so they are patched out here.
|
||||
|
||||
The definition of the wrapped tokenizer can be found here:
|
||||
https://huggingface.co/Qwen/Qwen-VL/blob/main/tokenization_qwen.py
|
||||
"""
|
||||
new_tokenizer = copy.deepcopy(tokenizer)
|
||||
|
||||
class TokenizerWithoutImagePad(tokenizer.__class__): # type: ignore
|
||||
def tokenize(
|
||||
self,
|
||||
text: str,
|
||||
allowed_special: Set[str] | str = "all",
|
||||
disallowed_special: Collection[str] | str = (),
|
||||
**kwargs,
|
||||
) -> list[bytes | str]:
|
||||
text = unicodedata.normalize("NFC", text)
|
||||
|
||||
return [
|
||||
self.decoder[t]
|
||||
for t in self.tokenizer.encode(
|
||||
text,
|
||||
allowed_special=allowed_special,
|
||||
disallowed_special=disallowed_special,
|
||||
)
|
||||
]
|
||||
|
||||
def _decode(
|
||||
self,
|
||||
token_ids: int | list[int],
|
||||
skip_special_tokens: bool = False,
|
||||
errors: str | None = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
if isinstance(token_ids, int):
|
||||
token_ids = [token_ids]
|
||||
|
||||
return self.tokenizer.decode(
|
||||
token_ids,
|
||||
errors=errors or self.errors,
|
||||
)
|
||||
|
||||
TokenizerWithoutImagePad.__name__ = f"{tokenizer.__class__.__name__}WithoutImagePad"
|
||||
|
||||
new_tokenizer.__class__ = TokenizerWithoutImagePad
|
||||
return new_tokenizer
|
||||
|
||||
|
||||
class QwenVLProcessor:
|
||||
"""
|
||||
This model doesn't define its own HF processor,
|
||||
@@ -574,12 +518,6 @@ class QwenVLProcessor:
|
||||
|
||||
|
||||
class QwenVLProcessingInfo(BaseProcessingInfo):
|
||||
def get_tokenizer(self) -> PreTrainedTokenizer:
|
||||
tokenizer = self.ctx.get_tokenizer()
|
||||
assert isinstance(tokenizer, PreTrainedTokenizer)
|
||||
|
||||
return _get_tokenizer_without_image_pad(tokenizer)
|
||||
|
||||
def get_hf_processor(self, **kwargs: object) -> QwenVLProcessor:
|
||||
return self.ctx.init_processor(
|
||||
QwenVLProcessor,
|
||||
|
||||
Reference in New Issue
Block a user