[ROCm][CI][Bugfix] Fixing the Multi-Modal Models Test (Extended) 1 group (#30013)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas
2025-12-04 05:00:16 -06:00
committed by GitHub
parent 6366c098d7
commit e96a6a6dca
10 changed files with 139 additions and 9 deletions

View File

@@ -403,12 +403,13 @@ VLM_TEST_SETTINGS = {
# So, we need to reduce the number of tokens for the test to pass.
max_tokens=8,
num_logprobs=10,
auto_cls=AutoModelForCausalLM,
marks=[large_gpu_mark(min_gb=32)],
),
"glm4_1v": VLMTestInfo(
models=["zai-org/GLM-4.1V-9B-Thinking"],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",
prompt_formatter=lambda img_prompt: f"[gMASK]<|user|>\n{img_prompt}<|assistant|>\n", # noqa: E501
img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>",
video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>",
max_model_len=2048,
@@ -423,6 +424,7 @@ VLM_TEST_SETTINGS = {
models=["zai-org/GLM-4.1V-9B-Thinking"],
# GLM4.1V require include video metadata for input
test_type=VLMTestType.CUSTOM_INPUTS,
prompt_formatter=lambda vid_prompt: f"[gMASK]<|user|>\n{vid_prompt}<|assistant|>\n", # noqa: E501
max_model_len=4096,
max_num_seqs=2,
auto_cls=AutoModelForImageTextToText,
@@ -737,7 +739,13 @@ VLM_TEST_SETTINGS = {
max_model_len=8192,
max_num_seqs=2,
auto_cls=AutoModelForImageTextToText,
marks=[large_gpu_mark(min_gb=48)],
marks=[
large_gpu_mark(min_gb=48),
pytest.mark.skipif(
current_platform.is_rocm(),
reason="Model produces a vector of <UNK> output in HF on ROCm",
),
],
),
"qwen_vl": VLMTestInfo(
models=["Qwen/Qwen-VL"],