[V1] Add tree drafting tests for eagle spec decoding (#22705)
Signed-off-by: Giancarlo Delfin <gdelfin@meta.com>
This commit is contained in:
@@ -39,12 +39,6 @@ def test_eagle_max_len(monkeypatch: pytest.MonkeyPatch,
|
||||
num_speculative_tokens: int, attn_backend: str):
|
||||
with monkeypatch.context() as m:
|
||||
m.setenv("VLLM_USE_V1", "1")
|
||||
|
||||
if attn_backend == "TREE_ATTN" and num_speculative_tokens > 1:
|
||||
# TREE_ATTN fails the test with multi-token spec decode
|
||||
# TODO: Investigate why
|
||||
pytest.skip("TREE_ATTN fails the test")
|
||||
|
||||
m.setenv("VLLM_ATTENTION_BACKEND", attn_backend)
|
||||
|
||||
if (attn_backend == "TRITON_ATTN_VLLM_V1"
|
||||
|
||||
Reference in New Issue
Block a user