[ROCm][CI] Update MiniCPM model test: MiniCPM3-4B to MiniCPM4.1-8B and simplify attention backend testing (#31551)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas
2025-12-31 02:12:01 -06:00
committed by GitHub
parent 357d435c54
commit cf16342d43
2 changed files with 19 additions and 7 deletions

View File

@@ -10,6 +10,11 @@ from ....utils import large_gpu_mark
from ...registry import HF_EXAMPLE_MODELS
from ...utils import check_logprobs_close
# Models that require embedding scaling for prompt_embeds test
EMBED_SCALING_MODELS = {
"openbmb/MiniCPM4.1-8B",
}
# This list contains the model that are using AITER kernel.
# Skip model that are not using AITER tests.
# When more AITER kernels are added, this list will not be
@@ -64,8 +69,8 @@ AITER_MODEL_LIST = [
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
),
pytest.param(
"openbmb/MiniCPM3-4B",
marks=[pytest.mark.core_model, large_gpu_mark(min_gb=32)],
"openbmb/MiniCPM4.1-8B", # minicpm
marks=[pytest.mark.core_model, large_gpu_mark(min_gb=48)],
),
pytest.param(
"facebook/opt-125m", # opt
@@ -135,16 +140,20 @@ def test_models(
prompt_embeds: list[torch.Tensor] | None = [] if use_prompt_embeds else None
prompt_token_ids = []
for prompt in example_prompts:
token_ids = hf_model.tokenizer(prompt, return_tensors="pt").input_ids.to(
hf_model.model.device
)
prompt_token_ids.append(token_ids)
if prompt_embeds is not None:
prompt_embeds.append(
hf_model.model.get_input_embeddings()(token_ids).squeeze(0)
)
embed = hf_model.model.get_input_embeddings()(token_ids)
# MiniCPM models apply scale_emb to embeddings internally.
# vLLM expects pre-scaled embeddings when using inputs_embeds.
if model in EMBED_SCALING_MODELS:
config = hf_model.model.config
embed = embed * config.scale_emb
prompt_embeds.append(embed.squeeze(0))
with vllm_runner(
model,