[VLM] Avoid unnecessary tokenization (#12310)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-01-22 19:08:31 +08:00
parent 68ad4e3a8d
commit cd7b6f0857
9 changed files with 71 additions and 40 deletions
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@@ -67,9 +67,10 @@ def get_cached_tokenizer(tokenizer: AnyTokenizer) -> AnyTokenizer:
    tokenizer_all_special_tokens_extended = (
        tokenizer.all_special_tokens_extended)
    tokenizer_all_special_tokens = set(tokenizer.all_special_tokens)
+    tokenizer_vocab = tokenizer.get_vocab()
    tokenizer_len = len(tokenizer)

-    max_token_id = max(tokenizer.get_vocab().values())
+    max_token_id = max(tokenizer_vocab.values())
    # Some tokenizers (e.g., QwenTokenizer) have special tokens that
    # are added and included in the implementation of the vocab_size
    # property, but not in get_vocab(); if there is an implementation
@@ -96,6 +97,9 @@ def get_cached_tokenizer(tokenizer: AnyTokenizer) -> AnyTokenizer:
        def max_token_id(self):
            return max_token_id

+        def get_vocab(self):
+            return tokenizer_vocab
+
        def __len__(self):
            return tokenizer_len