[V1] Add tree drafting tests for eagle spec decoding (#22705)

Signed-off-by: Giancarlo Delfin <gdelfin@meta.com>
This commit is contained in:
Giancarlo Delfin
2025-08-13 04:11:28 -07:00
committed by GitHub
parent 3f52738dce
commit d94e3026de
4 changed files with 178 additions and 55 deletions

View File

@@ -39,12 +39,6 @@ def test_eagle_max_len(monkeypatch: pytest.MonkeyPatch,
num_speculative_tokens: int, attn_backend: str):
with monkeypatch.context() as m:
m.setenv("VLLM_USE_V1", "1")
if attn_backend == "TREE_ATTN" and num_speculative_tokens > 1:
# TREE_ATTN fails the test with multi-token spec decode
# TODO: Investigate why
pytest.skip("TREE_ATTN fails the test")
m.setenv("VLLM_ATTENTION_BACKEND", attn_backend)
if (attn_backend == "TRITON_ATTN_VLLM_V1"