diff --git a/tests/models/language/generation/test_common.py b/tests/models/language/generation/test_common.py
index df6c2cab7..5a90cb85f 100644
--- a/tests/models/language/generation/test_common.py
+++ b/tests/models/language/generation/test_common.py
@@ -10,6 +10,11 @@ from ....utils import large_gpu_mark
 from ...registry import HF_EXAMPLE_MODELS
 from ...utils import check_logprobs_close
 
+# Models that require embedding scaling for prompt_embeds test
+EMBED_SCALING_MODELS = {
+    "openbmb/MiniCPM4.1-8B",
+}
+
 # This list contains the model that are using AITER kernel.
 # Skip model that are not using AITER tests.
 # When more AITER kernels are added, this list will not be
@@ -64,8 +69,8 @@ AITER_MODEL_LIST = [
             marks=[pytest.mark.core_model, pytest.mark.cpu_model],
         ),
         pytest.param(
-            "openbmb/MiniCPM3-4B",
-            marks=[pytest.mark.core_model, large_gpu_mark(min_gb=32)],
+            "openbmb/MiniCPM4.1-8B",  # minicpm
+            marks=[pytest.mark.core_model, large_gpu_mark(min_gb=48)],
         ),
         pytest.param(
             "facebook/opt-125m",  # opt
@@ -135,16 +140,20 @@ def test_models(
 
         prompt_embeds: list[torch.Tensor] | None = [] if use_prompt_embeds else None
 
-        prompt_token_ids = []
         for prompt in example_prompts:
             token_ids = hf_model.tokenizer(prompt, return_tensors="pt").input_ids.to(
                 hf_model.model.device
             )
-            prompt_token_ids.append(token_ids)
             if prompt_embeds is not None:
-                prompt_embeds.append(
-                    hf_model.model.get_input_embeddings()(token_ids).squeeze(0)
-                )
+                embed = hf_model.model.get_input_embeddings()(token_ids)
+
+                # MiniCPM models apply scale_emb to embeddings internally.
+                # vLLM expects pre-scaled embeddings when using inputs_embeds.
+                if model in EMBED_SCALING_MODELS:
+                    config = hf_model.model.config
+                    embed = embed * config.scale_emb
+
+                prompt_embeds.append(embed.squeeze(0))
 
     with vllm_runner(
         model,
diff --git a/tests/models/registry.py b/tests/models/registry.py
index 4fe4c7a3a..ff62e7e91 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -355,6 +355,9 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
     "MiniCPM3ForCausalLM": _HfExamplesInfo(
         "openbmb/MiniCPM3-4B", trust_remote_code=True
     ),
+    "MiniCPM4ForCausalLM": _HfExamplesInfo(
+        "openbmb/MiniCPM4.1-8B", trust_remote_code=True
+    ),
     "MiniMaxForCausalLM": _HfExamplesInfo("MiniMaxAI/MiniMax-Text-01-hf"),
     "MiniMaxText01ForCausalLM": _HfExamplesInfo(
         "MiniMaxAI/MiniMax-Text-01",