[Model] Move multimodal_cpu_fields definition to field config (#30181)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-12-06 21:40:02 +08:00
committed by GitHub
parent 21bb323542
commit 671427efbf
15 changed files with 141 additions and 95 deletions

View File

@@ -984,14 +984,14 @@ class Qwen3VLMultiModalProcessor(BaseMultiModalProcessor[Qwen3VLProcessingInfo])
image_embeds=MultiModalFieldConfig.flat_from_sizes(
"image", image_grid_sizes
),
image_grid_thw=MultiModalFieldConfig.batched("image"),
image_grid_thw=MultiModalFieldConfig.batched("image", keep_on_cpu=True),
pixel_values_videos=MultiModalFieldConfig.flat_from_sizes(
"video", video_grid_sizes
),
video_embeds=MultiModalFieldConfig.flat_from_sizes(
"video", video_grid_sizes
),
video_grid_thw=MultiModalFieldConfig.batched("video"),
video_grid_thw=MultiModalFieldConfig.batched("video", keep_on_cpu=True),
)
def _get_prompt_updates(
@@ -1190,8 +1190,6 @@ class Qwen3VLForConditionalGeneration(
SupportsMRoPE,
SupportsEagle3,
):
multimodal_cpu_fields = {"image_grid_thw", "video_grid_thw"}
packed_modules_mapping = {
"qkv_proj": [
"q_proj",