[CI][SpecDecode] Fix spec decode tests, use flash attention backend for spec decode CI tests. (#8975)

This commit is contained in:
Lily Liu
2024-09-30 17:51:40 -07:00
committed by GitHub
parent 1425a1bcf9
commit bce324487a
2 changed files with 4 additions and 3 deletions

View File

@@ -673,7 +673,10 @@ def test_use_draft_model_runner_advance_step():
worker.model_runner._gpu_advance_step.side_effect = ValueError(
exception_secret)
seq_group_metadata_list, _, _ = create_batch(batch_size, k)
seq_group_metadata_list, _, _ = create_batch(batch_size,
k,
block_size=block_size,
num_gpu_blocks=num_gpu_blocks)
# Fallback (should not call) when num_steps=1.
execute_model_req = ExecuteModelRequest(