[Bugfix] Fix for Spec model TP + Chunked Prefill (#10232)

Signed-off-by: andoorve <37849411+andoorve@users.noreply.github.com> Signed-off-by: Sourashis Roy <sroy@roblox.com> Co-authored-by: Sourashis Roy <sroy@roblox.com>
2024-11-26 09:11:16 -08:00
parent 1f6584ee85
commit db66e018ea
8 changed files with 144 additions and 72 deletions
--- a/tests/spec_decode/test_spec_decode_worker.py
+++ b/tests/spec_decode/test_spec_decode_worker.py
@@ -867,7 +867,8 @@ def test_chunked_prefill_flow(k: int, batch_size: int, batch_composition: str):
    target_group_metadata_list = prefill + decodes
    execute_model_req = ExecuteModelRequest(
        seq_group_metadata_list=target_group_metadata_list,
-        num_lookahead_slots=k)
+        # For prefill only batches we expect num_lookahead_slots = 0.
+        num_lookahead_slots=k if n_decodes > 0 else 0)

    target_token_ids = torch.randint(low=0,
                                     high=vocab_size,