[CORE] Prompt Embeddings Support for v1 Engine (#24278)

Signed-off-by: Andrew Sansom <andrew@protopia.ai> Signed-off-by: Andrew Sansom <qthequartermasterman@gmail.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
2025-09-18 19:03:09 -05:00
parent 9fac6aa30b
commit 9a4600e4dc
20 changed files with 305 additions and 76 deletions
--- a/tests/models/language/generation/test_common.py
+++ b/tests/models/language/generation/test_common.py
@@ -125,12 +125,6 @@ def test_models(hf_runner, vllm_runner, example_prompts, model: str,
        # in parts of the operators
        pytest.skip(f"Skipping '{model}' model test with AITER kernel.")

-    # Note: can be removed when
-    # https://github.com/vllm-project/vllm/pull/24278 finished
-    if current_platform.is_cpu() and use_prompt_embeds:
-        pytest.skip("Skipping use_prompt_embeds=True with "
-                    "V1-only CPU backend.")
-
    with hf_runner(model) as hf_model:
        hf_outputs = hf_model.generate_greedy_logprobs_limit(
            example_prompts, max_tokens, num_logprobs)