[VLM] merged multimodal processor and V1 support for idefics3 (#12660)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
Isotr0py
2025-02-04 20:00:51 +08:00
committed by GitHub
parent 18a88fcccc
commit 815079de8e
7 changed files with 320 additions and 462 deletions

View File

@@ -192,6 +192,14 @@ def deepseekvl2_trunc_hf_output(hf_output: RunnerOutput,
return output_ids, output_str, out_logprobs
def idefics3_trunc_hf_output(hf_output: RunnerOutput,
model: str) -> RunnerOutput:
output_ids, output_str, out_logprobs = hf_output
if output_str.endswith("<end_of_utterance>"):
output_str = output_str.split("<end_of_utterance>")[0]
return output_ids, output_str, out_logprobs
def minicpmv_trunc_hf_output(hf_output: RunnerOutput,
model: str) -> RunnerOutput:
output_ids, output_str, out_logprobs = hf_output