Revert "[V1] Scatter and gather placeholders in the model runner" (#16075)

This commit is contained in:
Roger Wang
2025-04-04 14:50:57 -07:00
committed by GitHub
parent f5722a5052
commit af51d80fa1
42 changed files with 942 additions and 496 deletions

View File

@@ -57,7 +57,7 @@ class NVLMProcessor(BaseInternVLProcessor):
# when trying to find "<tile" as a subsequence of "<Image><tile"
repl = "<Image>" + features + "</Image>"
return PromptUpdateDetails.select_text(repl, IMG_PAD)
return PromptUpdateDetails(full=repl, features=repl)
class NVLMProcessingInfo(BaseInternVLProcessingInfo):
@@ -84,6 +84,31 @@ class NVLMProcessingInfo(BaseInternVLProcessingInfo):
**kwargs,
)
def get_max_image_tokens(self) -> int:
hf_processor = self.get_hf_processor()
tokenizer = hf_processor.tokenizer
max_num_patches = hf_processor.max_dynamic_patch
# we need +1 here because max_dynamic_patch in config doesn't
# include the thumbnail patch
tile_pos_identifiers = [
f"<tile_{i+1}>" for i in range(max_num_patches)
]
if hf_processor.use_thumbnail and max_num_patches != 1:
tile_pos_identifiers += ["<tile_global_thumbnail>"]
# "<Image><tile" is tokenized as ["<Image", "><", "tile"]
# so we include <tile_1> in the start_str
start_str = "<Image>" + tile_pos_identifiers.pop(0)
end_str = "</Image>"
start_token_len = len(tokenizer.encode(start_str))
end_token_len = len(tokenizer.encode(end_str))
tile_token_len = sum(
len(tokenizer.encode(identifier))
for identifier in tile_pos_identifiers)
non_image_tokens_num = start_token_len + end_token_len + tile_token_len
return super().get_max_image_tokens() + non_image_tokens_num
class NVLMDummyInputsBuilder(InternVLDummyInputsBuilder[NVLMProcessingInfo]):
@@ -152,7 +177,10 @@ class NVLMMultiModalProcessor(InternVLMultiModalProcessor[NVLMProcessingInfo]):
repl = hf_processor.get_image_repl(feature_size, num_patches)
return PromptUpdateDetails.select_text(repl.full + "\n", IMG_PAD)
return PromptUpdateDetails(
full=repl.full + "\n",
features=repl.features + "\n",
)
# See note in dummy data regarding why we have the extra newline
return [