[Speculators][Speculative Decoding] Add Qwen Eagle3 Support (#21835)

Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com>
This commit is contained in:
Dipika Sikka
2025-08-01 22:43:37 -04:00
committed by GitHub
parent a65f46be5e
commit 9f9c38c392
4 changed files with 46 additions and 11 deletions

View File

@@ -6,11 +6,21 @@ import torch
@pytest.mark.parametrize(
"model_path",
[("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717"),
("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717-quantized")])
[("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717-quantized")])
def test_llama(vllm_runner, example_prompts, model_path):
with vllm_runner(model_path, dtype=torch.bfloat16) as vllm_model:
vllm_outputs = vllm_model.generate_greedy(example_prompts,
max_tokens=20)
print(vllm_outputs)
assert vllm_outputs
@pytest.mark.parametrize(
"model_path",
[("nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized")])
def test_qwen(vllm_runner, example_prompts, model_path):
with vllm_runner(model_path, dtype=torch.bfloat16) as vllm_model:
vllm_outputs = vllm_model.generate_greedy(example_prompts,
max_tokens=20)
print(vllm_outputs)
assert vllm_outputs