[Core] Gate prompt_embeds behind a feature flag (#17607)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-05-04 00:19:20 +08:00
committed by GitHub
parent a92842454c
commit 887d7af882
8 changed files with 84 additions and 33 deletions

View File

@@ -43,6 +43,7 @@ def test_prepare_prompt(batch_size, use_prompt_embeds, monkeypatch):
max_num_batched_tokens=100000,
max_num_seqs=100000,
enable_chunked_prefill=False,
enable_prompt_embeds=True,
)
seq_lens: list[int] = []
@@ -179,6 +180,7 @@ def test_prepare_decode_cuda_graph(batch_size, use_prompt_embeds, monkeypatch):
max_num_batched_tokens=100000,
max_num_seqs=100000,
enable_chunked_prefill=False,
enable_prompt_embeds=True,
)
context_lens: list[int] = []
@@ -359,6 +361,7 @@ def test_hybrid_batches(batch_size, enforce_eager, use_prompt_embeds,
max_num_batched_tokens=100000,
max_num_seqs=100000,
enable_chunked_prefill=True,
enable_prompt_embeds=True,
)
# Add prefill requests.