[Multimodal] Simplify MM input definitions (#33331)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-01-29 21:32:04 +08:00
committed by GitHub
parent 17b17c0684
commit c6e7404cc5
17 changed files with 142 additions and 164 deletions

View File

@@ -131,7 +131,7 @@ def test_e2e_streaming_with_multimodal_features(mock_model_runner_with_input_bat
# Step 1: Create initial request state with one multimodal feature
mm_feature_1 = MultiModalFeatureSpec(
data=MultiModalKwargsItem.dummy("audio"),
data=MultiModalKwargsItem.dummy(),
modality="audio",
identifier="audio_1",
mm_position=PlaceholderRange(offset=2, length=10),
@@ -158,7 +158,7 @@ def test_e2e_streaming_with_multimodal_features(mock_model_runner_with_input_bat
# The scheduler has already set prompt_token_ids to the full sequence
# (original prompt + intermediate outputs + new prompt with new multimodal feature)
mm_feature_2 = MultiModalFeatureSpec(
data=MultiModalKwargsItem.dummy("audio"),
data=MultiModalKwargsItem.dummy(),
modality="audio",
identifier="audio_2",
mm_position=PlaceholderRange(offset=15, length=5),

View File

@@ -174,7 +174,7 @@ class TestStreamingScheduler(unittest.TestCase):
scheduler = create_scheduler()
mm_feature = MultiModalFeatureSpec(
data=MultiModalKwargsItem.dummy("audio"),
data=MultiModalKwargsItem.dummy(),
modality="audio",
identifier="",
mm_position=PlaceholderRange(offset=1, length=1),
@@ -187,7 +187,7 @@ class TestStreamingScheduler(unittest.TestCase):
session.num_computed_tokens = len(session.prompt_token_ids)
mm_feature = MultiModalFeatureSpec(
data=MultiModalKwargsItem.dummy("audio"),
data=MultiModalKwargsItem.dummy(),
modality="audio",
identifier="",
mm_position=PlaceholderRange(offset=2, length=1),