[Model] Support deepseek with eagle (#21086)

Signed-off-by: Xin Yang <xyangx@amazon.com>
2025-08-20 04:01:31 -07:00
parent 3aa8c10038
commit 83e69a09d6
4 changed files with 255 additions and 1 deletions
--- a/tests/v1/e2e/test_spec_decode.py
+++ b/tests/v1/e2e/test_spec_decode.py
@@ -144,6 +144,8 @@ def test_ngram_correctness(
             "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4),
            True,
            marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")),
+        (("eagle", "eagle618/deepseek-v3-random",
+          "eagle618/eagle-deepseek-v3-random", 1), False),
    ],
    ids=[
        # TODO: Re-enable this once tests/models/test_initialization.py is fixed, see PR #22333 #22611  # noqa: E501
@@ -151,7 +153,8 @@ def test_ngram_correctness(
        "llama3_eagle",
        "llama3_eagle3",
        "llama4_eagle",
-        "llama4_eagle_mm"
+        "llama4_eagle_mm",
+        "deepseek_eagle"
    ])
@pytest.mark.parametrize("attn_backend",
                         get_attn_backend_list_based_on_platform())
@@ -177,6 +180,7 @@ def test_eagle_correctness(
    '''
    with monkeypatch.context() as m:
        m.setenv("VLLM_USE_V1", "1")
+        m.setenv("VLLM_MLA_DISABLE", "1")
        m.setenv("VLLM_ATTENTION_BACKEND", attn_backend)

        if (attn_backend == "TRITON_ATTN_VLLM_V1"