[VLM] Avoid unnecessary tokenization (#12310)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -315,13 +315,14 @@ class PixtralHFMultiModalProcessor(
|
||||
hf_processor_mm_kwargs: Mapping[str, object],
|
||||
out_mm_kwargs: MultiModalKwargs,
|
||||
) -> list[PromptReplacement]:
|
||||
processor = self.info.get_hf_processor(**hf_processor_mm_kwargs)
|
||||
hf_config = self.info.get_hf_config()
|
||||
image_token_id = hf_config.image_token_index
|
||||
tokenizer = self.info.get_tokenizer()
|
||||
vocab = tokenizer.get_vocab()
|
||||
|
||||
processor = self.info.get_hf_processor()
|
||||
image_token = processor.image_token
|
||||
image_break_token = processor.image_break_token
|
||||
image_end_token = processor.image_end_token
|
||||
image_break_id = vocab[processor.image_break_token]
|
||||
image_token_id = hf_config.image_token_index
|
||||
image_end_id = vocab[processor.image_end_token]
|
||||
|
||||
vision_config = hf_config.vision_config
|
||||
assert isinstance(vision_config, PixtralVisionConfig)
|
||||
@@ -336,10 +337,10 @@ class PixtralHFMultiModalProcessor(
|
||||
image_height=image_size.height,
|
||||
)
|
||||
|
||||
tokens = ([image_token] * ncols + [image_break_token]) * nrows
|
||||
tokens[-1] = image_end_token
|
||||
tokens = ([image_token_id] * ncols + [image_break_id]) * nrows
|
||||
tokens[-1] = image_end_id
|
||||
|
||||
return "".join(tokens)
|
||||
return tokens
|
||||
|
||||
return [
|
||||
PromptReplacement(
|
||||
|
||||
Reference in New Issue
Block a user