[WIP][CI][Bugfix] Fix test_run_eagle_dp (#38584)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
@@ -69,9 +69,7 @@ async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch, attn_backend: str):
|
||||
)
|
||||
|
||||
prompt = "This is a test of data parallel with eagle"
|
||||
# This test might be flaky, see
|
||||
# https://github.com/vllm-project/vllm/issues/31913
|
||||
num_expected_tokens = 20
|
||||
num_expected_tokens = 100
|
||||
sampling_params = SamplingParams(
|
||||
max_tokens=num_expected_tokens,
|
||||
ignore_eos=True,
|
||||
|
||||
@@ -389,8 +389,11 @@ class FlashAttentionMetadataBuilder(AttentionMetadataBuilder[FlashAttentionMetad
|
||||
slot_mapping = common_attn_metadata.slot_mapping
|
||||
causal = common_attn_metadata.causal
|
||||
|
||||
# the overhead of the aot schedule is not worth it for spec-decode
|
||||
aot_schedule = self.aot_schedule and not fast_build
|
||||
# Disable AOT schedule for spec-decode proposer (not worth the overhead)
|
||||
# and for batch invariance (schedule varies with max_seqlen_q/k).
|
||||
aot_schedule = (
|
||||
self.aot_schedule and not fast_build and not envs.VLLM_BATCH_INVARIANT
|
||||
)
|
||||
|
||||
if self.aot_sliding_window is None:
|
||||
self.aot_sliding_window = (-1, -1)
|
||||
|
||||
Reference in New Issue
Block a user