[Bugfix] Fix for Spec model TP + Chunked Prefill (#10232)
Signed-off-by: andoorve <37849411+andoorve@users.noreply.github.com> Signed-off-by: Sourashis Roy <sroy@roblox.com> Co-authored-by: Sourashis Roy <sroy@roblox.com>
This commit is contained in:
committed by
GitHub
parent
1f6584ee85
commit
db66e018ea
@@ -413,6 +413,45 @@ def test_chunked_prefill_preempt():
|
||||
assert out.num_batched_tokens == max_num_batched_tokens
|
||||
|
||||
|
||||
@pytest.mark.parametrize("num_scheduler_steps", [1, 5])
|
||||
def test_chunked_prefill_spec_prefill(num_scheduler_steps):
|
||||
"""Verify that the num_lookahead_slots is set appropriately for an all"""
|
||||
"""prefill batch depending on whether multi-step scheduling is enabled"""
|
||||
"""or not"""
|
||||
block_size = 4
|
||||
max_seqs = 30
|
||||
max_model_len = 200
|
||||
max_num_batched_tokens = 30
|
||||
num_lookahead_slots = 4
|
||||
scheduler_config = SchedulerConfig(
|
||||
"generate",
|
||||
max_num_batched_tokens,
|
||||
max_seqs,
|
||||
max_model_len,
|
||||
enable_chunked_prefill=True,
|
||||
num_lookahead_slots=num_lookahead_slots,
|
||||
num_scheduler_steps=num_scheduler_steps,
|
||||
)
|
||||
cache_config = CacheConfig(block_size, 1.0, 1, "auto")
|
||||
cache_config.num_cpu_blocks = 16
|
||||
cache_config.num_gpu_blocks = 16
|
||||
scheduler = Scheduler(scheduler_config, cache_config, None)
|
||||
|
||||
_, seq_group = create_dummy_prompt("1",
|
||||
prompt_length=30,
|
||||
block_size=block_size)
|
||||
scheduler.add_seq_group(seq_group)
|
||||
_, out = schedule_and_update_computed_tokens(scheduler)
|
||||
# The request is chunked.
|
||||
# prefill scheduled now.
|
||||
assert len(out.scheduled_seq_groups) == 1
|
||||
assert out.num_prefill_groups == 1
|
||||
assert out.num_batched_tokens == max_num_batched_tokens
|
||||
print(out.num_lookahead_slots)
|
||||
assert out.num_lookahead_slots == (0 if (num_scheduler_steps == 1) else
|
||||
num_lookahead_slots)
|
||||
|
||||
|
||||
def test_chunked_prefill_max_seqs():
|
||||
block_size = 4
|
||||
max_seqs = 2
|
||||
|
||||
Reference in New Issue
Block a user