[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Isotr0py
2025-05-16 17:18:08 +08:00
committed by GitHub
parent 541817670c
commit 390ec88905
9 changed files with 282 additions and 215 deletions

View File

@@ -237,6 +237,18 @@ def minimax_vl_01_hf_output(hf_output: RunnerOutput,
return output_ids, output_str, out_logprobs
def ultravox_trunc_hf_output(hf_output: RunnerOutput,
model: str) -> RunnerOutput:
output_ids, output_str, out_logprobs = hf_output
tokenizer = AutoTokenizer.from_pretrained(model)
eos_token_id = tokenizer.eos_token_id
eos_token = tokenizer.decode(eos_token_id)
if output_str.endswith(eos_token):
output_str = output_str.split(eos_token)[0]
return output_ids, output_str, out_logprobs
####### Functions for converting image assets to embeddings
def get_llava_embeddings(image_assets: ImageTestAssets):
return [asset.image_embeds for asset in image_assets]