[Refactor] Pass tokenizer explicitly instead of binding to prompt update (#23542)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-08-25 21:31:57 +08:00
parent e269be2ba2
commit 6879cd80ae
4 changed files with 95 additions and 144 deletions
--- a/vllm/model_executor/models/gemma3n_mm.py
+++ b/vllm/model_executor/models/gemma3n_mm.py
@@ -35,7 +35,6 @@ from vllm.multimodal.processing import (BaseMultiModalProcessor,
                                        PlaceholderFeaturesInfo,
                                        PromptReplacement, PromptUpdate,
                                        PromptUpdateDetails,
-                                        find_mm_placeholders,
                                        replace_token_matches)
 # yapf: enable
 from vllm.multimodal.profiling import BaseDummyInputsBuilder
@@ -318,7 +317,8 @@ class Gemma3nMultiModalProcessor(BaseMultiModalProcessor[Gemma3nProcessingInfo]
            repl_token_ids.extend(repl_toks)
            repl_orig_idxs.extend(orig_idx for _ in range(len(repl_toks)))

-        repls = find_mm_placeholders(repl_token_ids, mm_prompt_updates)
+        repls = super()._find_mm_placeholders(repl_token_ids,
+                                              mm_prompt_updates)

        return {
            modality: [