[V1] Fix multimodal profiling for Molmo (#11325)

Signed-off-by: ywang96 <ywang@example.com>
Co-authored-by: ywang96 <ywang@example.com>
This commit is contained in:
Roger Wang
2024-12-19 08:27:22 -08:00
committed by GitHub
parent 6c7f881541
commit 7379b3d4b2
4 changed files with 24 additions and 4 deletions

View File

@@ -928,7 +928,11 @@ def image_input_mapper_for_molmo(
data: object,
):
if isinstance(data, list):
assert len(data) == 1, "Molmo supports only one image per prompt."
data = data[0]
# Remove unused dummy PIL image
data.pop('raw_mm_data', None)
return MultiModalKwargs(data)
@@ -974,6 +978,7 @@ def dummy_data_for_molmo(ctx: InputContext, seq_len: int,
dummy_imgdata = {
"images": out["images"],
"image_input_idx": out["image_input_idx"],
"raw_mm_data": dummy_image,
}
if "image_masks" in out:
dummy_imgdata["image_masks"] = out["image_masks"]