[V1] Fix multimodal profiling for Molmo (#11325)
Signed-off-by: ywang96 <ywang@example.com> Co-authored-by: ywang96 <ywang@example.com>
This commit is contained in:
@@ -928,7 +928,11 @@ def image_input_mapper_for_molmo(
|
||||
data: object,
|
||||
):
|
||||
if isinstance(data, list):
|
||||
assert len(data) == 1, "Molmo supports only one image per prompt."
|
||||
data = data[0]
|
||||
|
||||
# Remove unused dummy PIL image
|
||||
data.pop('raw_mm_data', None)
|
||||
return MultiModalKwargs(data)
|
||||
|
||||
|
||||
@@ -974,6 +978,7 @@ def dummy_data_for_molmo(ctx: InputContext, seq_len: int,
|
||||
dummy_imgdata = {
|
||||
"images": out["images"],
|
||||
"image_input_idx": out["image_input_idx"],
|
||||
"raw_mm_data": dummy_image,
|
||||
}
|
||||
if "image_masks" in out:
|
||||
dummy_imgdata["image_masks"] = out["image_masks"]
|
||||
|
||||
Reference in New Issue
Block a user