[Model] Move multimodal_cpu_fields definition to field config (#30181)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-12-06 21:40:02 +08:00
committed by GitHub
parent 21bb323542
commit 671427efbf
15 changed files with 141 additions and 95 deletions

View File

@@ -563,7 +563,7 @@ def _hunyuan_vl_field_config(hf_inputs: Mapping[str, torch.Tensor]):
return dict(
pixel_values=MultiModalFieldConfig.flat_from_sizes("image", image_grid_sizes),
image_embeds=MultiModalFieldConfig.flat_from_sizes("image", image_grid_sizes),
image_grid_thw=MultiModalFieldConfig.batched("image"),
image_grid_thw=MultiModalFieldConfig.batched("image", keep_on_cpu=True),
)
@@ -786,8 +786,6 @@ class HunYuanVLForConditionalGeneration(
SupportsQuant,
SupportsXDRoPE,
):
multimodal_cpu_fields = {"image_grid_thw"}
# To ensure correct weight loading and mapping.
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={