[Refactor] Dynamic target and content for prompt updates (#23411)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -824,9 +824,7 @@ class Phi4MMMultiModalProcessor(BaseMultiModalProcessor[Phi4MMProcessingInfo]):
|
||||
processor=hf_processor,
|
||||
)
|
||||
|
||||
image_tokens = [_IMAGE_PLACEHOLDER_TOKEN_ID] * num_image_tokens
|
||||
|
||||
return image_tokens
|
||||
return [_IMAGE_PLACEHOLDER_TOKEN_ID] * num_image_tokens
|
||||
|
||||
def get_audio_replacement_phi4mm(item_idx: int):
|
||||
audios = mm_items.get_items("audio", AudioProcessorItems)
|
||||
@@ -837,28 +835,20 @@ class Phi4MMMultiModalProcessor(BaseMultiModalProcessor[Phi4MMProcessingInfo]):
|
||||
audio_embed_size = self.info._compute_audio_embed_size(
|
||||
audio_frames)
|
||||
|
||||
audio_tokens = [_AUDIO_PLACEHOLDER_TOKEN_ID] * audio_embed_size
|
||||
return [_AUDIO_PLACEHOLDER_TOKEN_ID] * audio_embed_size
|
||||
|
||||
return audio_tokens
|
||||
|
||||
num_images = mm_items.get_count("image", strict=False)
|
||||
num_audios = mm_items.get_count("audio", strict=False)
|
||||
|
||||
image_repl = [
|
||||
return [
|
||||
PromptReplacement(
|
||||
modality="image",
|
||||
target=image_token,
|
||||
target=image_tokens.__getitem__,
|
||||
replacement=get_image_replacement_phi4mm,
|
||||
) for image_token in image_tokens[:num_images]
|
||||
]
|
||||
audio_repl = [
|
||||
),
|
||||
PromptReplacement(
|
||||
modality="audio",
|
||||
target=audio_token,
|
||||
target=audio_tokens.__getitem__,
|
||||
replacement=get_audio_replacement_phi4mm,
|
||||
) for audio_token in audio_tokens[:num_audios]
|
||||
),
|
||||
]
|
||||
return image_repl + audio_repl
|
||||
|
||||
|
||||
@MULTIMODAL_REGISTRY.register_processor(
|
||||
|
||||
Reference in New Issue
Block a user