[CI] Heavy refactoring of Voxtral multimodal audio model tests (#34294)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
@@ -184,22 +184,42 @@ def get_text_token_prompts(
|
||||
text_prompt: str | None
|
||||
token_prompt: list[int]
|
||||
if isinstance(tokenizer, MistralTokenizer):
|
||||
images = parsed_data.get("image", [])
|
||||
request = ChatCompletionRequest(
|
||||
messages=[
|
||||
UserMessage(
|
||||
content=[
|
||||
TextChunk(text=""),
|
||||
*(ImageChunk(image=image) for image in images),
|
||||
]
|
||||
),
|
||||
]
|
||||
# ChatCompletionRequest only supports ImageChunk natively;
|
||||
# for other modalities (e.g. audio), fall back to the model's
|
||||
# own dummy inputs builder which knows the right placeholders.
|
||||
has_non_image = any(
|
||||
k != "image" and count > 0 for k, count in mm_counts.items()
|
||||
)
|
||||
res = tokenizer.mistral.encode_chat_completion(request)
|
||||
|
||||
# Mistral does not support decode_tokens with skip_special_tokens=False
|
||||
text_prompt = None
|
||||
token_prompt = res.tokens
|
||||
if has_non_image:
|
||||
inputs = dummy_inputs.get_dummy_processor_inputs(
|
||||
model_config.max_model_len,
|
||||
mm_counts,
|
||||
)
|
||||
text_prompt = None
|
||||
token_prompt = (
|
||||
inputs.prompt
|
||||
if isinstance(inputs.prompt, list)
|
||||
else tokenizer.encode(inputs.prompt, add_special_tokens=False)
|
||||
)
|
||||
else:
|
||||
images = parsed_data.get("image", [])
|
||||
request = ChatCompletionRequest(
|
||||
messages=[
|
||||
UserMessage(
|
||||
content=[
|
||||
TextChunk(text=""),
|
||||
*(ImageChunk(image=image) for image in images),
|
||||
]
|
||||
),
|
||||
]
|
||||
)
|
||||
res = tokenizer.mistral.encode_chat_completion(request)
|
||||
|
||||
# Mistral does not support decode_tokens with
|
||||
# skip_special_tokens=False
|
||||
text_prompt = None
|
||||
token_prompt = res.tokens
|
||||
else:
|
||||
inputs = dummy_inputs.get_dummy_processor_inputs(
|
||||
model_config.max_model_len,
|
||||
|
||||
Reference in New Issue
Block a user