[Model] Move multimodal_cpu_fields definition to field config (#30181)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -811,14 +811,14 @@ def _create_qwen2vl_field_factory(
|
||||
image_embeds=MultiModalFieldConfig.flat_from_sizes(
|
||||
"image", image_embed_grid_sizes
|
||||
),
|
||||
image_grid_thw=MultiModalFieldConfig.batched("image"),
|
||||
image_grid_thw=MultiModalFieldConfig.batched("image", keep_on_cpu=True),
|
||||
pixel_values_videos=MultiModalFieldConfig.flat_from_sizes(
|
||||
"video", video_grid_sizes
|
||||
),
|
||||
video_embeds=MultiModalFieldConfig.flat_from_sizes(
|
||||
"video", video_embed_grid_sizes
|
||||
),
|
||||
video_grid_thw=MultiModalFieldConfig.batched("video"),
|
||||
video_grid_thw=MultiModalFieldConfig.batched("video", keep_on_cpu=True),
|
||||
)
|
||||
|
||||
return _qwen2vl_field_config
|
||||
@@ -1131,8 +1131,6 @@ class Qwen2VLMultiModalProcessor(BaseMultiModalProcessor[Qwen2VLProcessingInfo])
|
||||
class Qwen2VLForConditionalGeneration(
|
||||
nn.Module, SupportsMultiModal, SupportsLoRA, SupportsPP, SupportsMRoPE
|
||||
):
|
||||
multimodal_cpu_fields = {"image_grid_thw", "video_grid_thw"}
|
||||
|
||||
# To ensure correct weight loading and mapping.
|
||||
hf_to_vllm_mapper = WeightsMapper(
|
||||
orig_to_new_prefix={
|
||||
@@ -1393,9 +1391,11 @@ class Qwen2VLForConditionalGeneration(
|
||||
else:
|
||||
pixel_values_videos = video_input["pixel_values_videos"]
|
||||
if self.use_data_parallel:
|
||||
grid_thw_list = grid_thw.tolist()
|
||||
return run_dp_sharded_mrope_vision_model(
|
||||
self.visual, pixel_values_videos, grid_thw_list, rope_type="rope_3d"
|
||||
self.visual,
|
||||
pixel_values_videos,
|
||||
grid_thw.tolist(),
|
||||
rope_type="rope_3d",
|
||||
)
|
||||
else:
|
||||
video_embeds = self.visual(pixel_values_videos, grid_thw=grid_thw)
|
||||
|
||||
Reference in New Issue
Block a user