[Refactor] Decouple TimingContext from InputProcessingContext (#35083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-02-23 22:15:50 +08:00
committed by GitHub
parent 1e8438a89a
commit 392645454b
38 changed files with 419 additions and 649 deletions

View File

@@ -389,13 +389,13 @@ def _test_processing_correctness_one(
mm_items = baseline_processor.info.parse_mm_data(mm_data)
ignore_mm_keys = _IGNORE_MM_KEYS.get(model_type, set[str]())
baseline_tokenized_result = baseline_processor.apply(
baseline_tokenized_result = baseline_processor(
token_prompt,
mm_items=mm_items,
hf_processor_mm_kwargs={},
)
cached_tokenized_result = cached_processor.apply(
cached_tokenized_result = cached_processor(
token_prompt,
mm_items=mm_items,
hf_processor_mm_kwargs={},
@@ -409,12 +409,12 @@ def _test_processing_correctness_one(
)
if text_prompt is not None:
baseline_text_result = baseline_processor.apply(
baseline_text_result = baseline_processor(
text_prompt,
mm_items=mm_items,
hf_processor_mm_kwargs={},
)
cached_text_result = cached_processor.apply(
cached_text_result = cached_processor(
text_prompt,
mm_items=mm_items,
hf_processor_mm_kwargs={},

View File

@@ -176,7 +176,7 @@ def test_get_image_size_with_most_features(
for asset in image_assets:
mm_data = {"image": [asset.pil_image]}
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=hf_processor_mm_kwargs,

View File

@@ -52,7 +52,7 @@ def test_processor_override(
metadata["fps"] = fps
mm_data = {"video": [(video, metadata)]}
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
@@ -104,12 +104,12 @@ def test_video_loader_consistency(
static_mm_data = {"video": [(static_video, static_metadata)]}
dynamic_mm_data = {"video": [(dynamic_video, dynamic_metadata)]}
static_outputs = processor.apply(
static_outputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(static_mm_data),
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
)
dynamic_outputs = processor.apply(
dynamic_outputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(dynamic_mm_data),
hf_processor_mm_kwargs=hf_processor_mm_kwargs,

View File

@@ -106,7 +106,7 @@ def _run_check(
for image in images
)
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=mm_processor_kwargs,

View File

@@ -61,7 +61,7 @@ def test_processor_override(
dummy_image = image_assets[0].pil_image.resize(dummy_image_size)
mm_data = {"image": [dummy_image] * num_imgs}
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=hf_processor_mm_kwargs,

View File

@@ -66,7 +66,7 @@ def _run_check(
for image in images
)
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=mm_processor_kwargs,

View File

@@ -49,7 +49,7 @@ def test_processor_override(
if tokenized_prompt:
prompt = tokenizer.encode(prompt)
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=mm_processor_kwargs,

View File

@@ -87,7 +87,7 @@ def _validate_image_prompt_replacements_one(
try:
# The processor will throw an error if there is a mismatch
# in the prompt replacements
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs={},

View File

@@ -87,7 +87,7 @@ def _validate_image_prompt_replacements_one(
try:
# The processor will throw an error if there is a mismatch
# in the prompt replacements
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs={},

View File

@@ -29,7 +29,7 @@ def test_processor_override(
image = Image.new("RGB", size=(364, 364))
mm_data = {"image": [image] * num_imgs}
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs={},
@@ -50,7 +50,7 @@ def _validate_image_prompt_replacements_one(
mm_data = {"image": [image] * num_imgs}
try:
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs={},

View File

@@ -68,7 +68,7 @@ def _run_check(
for image in images
)
print(total_expected_num_patches)
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=mm_processor_kwargs,

View File

@@ -47,7 +47,7 @@ def test_processor_override(
prompt = f"<|user|>\n{img_str}<|end|>\n<|assistant|>\n"
mm_data = {"image": [image_assets[0].pil_image] * num_imgs}
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=hf_processor_mm_kwargs,

View File

@@ -51,7 +51,7 @@ def test_processor_override(
dummy_image = image_assets[0].pil_image.resize(dummy_image_size)
mm_data = {"image": [dummy_image] * num_imgs}
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=hf_processor_mm_kwargs,

View File

@@ -42,7 +42,7 @@ def test_processor_override(
prompt = "<|vision_start|><|image_pad|><|vision_end|>" * num_imgs
mm_data = {"image": [image_assets[0].pil_image] * num_imgs}
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
@@ -88,7 +88,7 @@ def test_get_image_size_with_most_features(
prompt = "<|vision_start|><|image_pad|><|vision_end|>"
for asset in image_assets:
mm_data = {"image": [asset.pil_image]}
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=hf_processor_mm_kwargs,

View File

@@ -51,7 +51,7 @@ def test_processor_with_audio_sample_rate(
hf_processor_mm_kwargs: dict[str, Any] = {
"audio_sample_rate": audio_sample_rate,
}
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
@@ -94,7 +94,7 @@ def test_longer_audio_generates_more_tokens(model_id: str) -> None:
hf_processor_mm_kwargs: dict[str, Any] = {
"audio_sample_rate": audio_sample_rate,
}
processed = processor.apply(
processed = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=hf_processor_mm_kwargs,

View File

@@ -61,7 +61,7 @@ def test_processor_override(
dummy_image = image_assets[0].pil_image.resize(dummy_image_size)
mm_data = {"image": [dummy_image] * num_imgs}
processed_inputs = processor.apply(
processed_inputs = processor(
prompt,
mm_items=processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs=hf_processor_mm_kwargs,

View File

@@ -99,7 +99,7 @@ def create_batched_mm_kwargs(
mm_counts=mm_counts,
mm_options={},
)
mm_items = processor_inputs.mm_items
mm_items = processor_inputs.mm_data_items
resized_mm_data = {
modality: resize_mm_data(items.data, size_factors)
for modality, items in mm_items.items()
@@ -108,11 +108,10 @@ def create_batched_mm_kwargs(
# video metadata will be added back to the resized video data here.
text_prompt, token_prompt = get_text_token_prompts(processor, resized_mm_data)
mm_kwargs = processor.apply(
mm_kwargs = processor(
prompt=token_prompt if text_prompt is None else text_prompt,
mm_items=processor.info.parse_mm_data(resized_mm_data),
hf_processor_mm_kwargs=processor_inputs.hf_processor_mm_kwargs,
tokenization_kwargs=processor_inputs.tokenization_kwargs,
)["mm_kwargs"].require_data()
return group_mm_kwargs_by_modality(

View File

@@ -19,7 +19,7 @@ def test_multimodal_processor(model_id):
image_pil = ImageAsset("cherry_blossom").pil_image
mm_data = {"image": image_pil}
str_prompt = "<|im_start|>user <image>\nWhat is the content of this image?<|im_end|><|im_start|>assistant\n" # noqa: E501
str_processed_inputs = mm_processor.apply(
str_processed_inputs = mm_processor(
prompt=str_prompt,
mm_items=mm_processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs={},
@@ -44,7 +44,7 @@ def test_multimodal_processor(model_id):
77091,
198,
]
ids_processed_inputs = mm_processor.apply(
ids_processed_inputs = mm_processor(
prompt=ids_prompt,
mm_items=mm_processor.info.parse_mm_data(mm_data),
hf_processor_mm_kwargs={},