[Core] Gate prompt_embeds behind a feature flag (#17607)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-05-04 00:19:20 +08:00
parent a92842454c
commit 887d7af882
8 changed files with 84 additions and 33 deletions
--- a/tests/worker/test_model_runner.py
+++ b/tests/worker/test_model_runner.py
@@ -43,6 +43,7 @@ def test_prepare_prompt(batch_size, use_prompt_embeds, monkeypatch):
        max_num_batched_tokens=100000,
        max_num_seqs=100000,
        enable_chunked_prefill=False,
+        enable_prompt_embeds=True,
    )

    seq_lens: list[int] = []
@@ -179,6 +180,7 @@ def test_prepare_decode_cuda_graph(batch_size, use_prompt_embeds, monkeypatch):
        max_num_batched_tokens=100000,
        max_num_seqs=100000,
        enable_chunked_prefill=False,
+        enable_prompt_embeds=True,
    )

    context_lens: list[int] = []
@@ -359,6 +361,7 @@ def test_hybrid_batches(batch_size, enforce_eager, use_prompt_embeds,
        max_num_batched_tokens=100000,
        max_num_seqs=100000,
        enable_chunked_prefill=True,
+        enable_prompt_embeds=True,
    )

    # Add prefill requests.