[Speculators][Speculative Decoding] Add Qwen Eagle3 Support (#21835)

Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com>
2025-08-01 22:43:37 -04:00
parent a65f46be5e
commit 9f9c38c392
4 changed files with 46 additions and 11 deletions
--- a/tests/speculative_decoding/speculators/test_eagle3.py
+++ b/tests/speculative_decoding/speculators/test_eagle3.py
@@ -6,11 +6,21 @@ import torch

@pytest.mark.parametrize(
    "model_path",
-    [("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717"),
-     ("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717-quantized")])
+    [("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717-quantized")])
 def test_llama(vllm_runner, example_prompts, model_path):
    with vllm_runner(model_path, dtype=torch.bfloat16) as vllm_model:
        vllm_outputs = vllm_model.generate_greedy(example_prompts,
                                                  max_tokens=20)
        print(vllm_outputs)
        assert vllm_outputs
+
+
+@pytest.mark.parametrize(
+    "model_path",
+    [("nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized")])
+def test_qwen(vllm_runner, example_prompts, model_path):
+    with vllm_runner(model_path, dtype=torch.bfloat16) as vllm_model:
+        vllm_outputs = vllm_model.generate_greedy(example_prompts,
+                                                  max_tokens=20)
+        print(vllm_outputs)
+        assert vllm_outputs