[Speculators] Move tests + fix integration (#27308)
Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com> Signed-off-by: Rahul Tuli <rtuli@redhat.com> Signed-off-by: rahul-tuli <rtuli@redhat.com> Co-authored-by: Rahul Tuli <rtuli@redhat.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
This commit is contained in:
65
tests/v1/spec_decode/test_speculators_eagle3.py
Normal file
65
tests/v1/spec_decode/test_speculators_eagle3.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from vllm.config import SpeculativeConfig
|
||||
from vllm.model_executor.models.interfaces import supports_eagle3
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model_path",
|
||||
[
|
||||
pytest.param(
|
||||
"nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717-quantized",
|
||||
id="llama3-eagle3-speculator",
|
||||
),
|
||||
pytest.param(
|
||||
"nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized",
|
||||
id="qwen3-eagle3-speculator",
|
||||
),
|
||||
pytest.param(
|
||||
"nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized-w4a16",
|
||||
id="qwen3-eagle3-speculator-w4a16-verifier",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_eagle3_speculators_model(
|
||||
vllm_runner, example_prompts, model_path, monkeypatch
|
||||
):
|
||||
"""
|
||||
Test Eagle3 speculators models properly initialize speculative decoding.
|
||||
|
||||
This test verifies:
|
||||
1. Eagle3 support is detected for the model
|
||||
2. Speculative config is automatically initialized from embedded config
|
||||
3. The draft model path is correctly set to the speculators model
|
||||
4. Speculative tokens count is valid
|
||||
5. Text generation works with speculative decoding enabled
|
||||
"""
|
||||
# Set environment variable for V1 engine serialization
|
||||
monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
|
||||
|
||||
with vllm_runner(model_path, dtype=torch.bfloat16) as vllm_model:
|
||||
# Verify Eagle3 support is detected
|
||||
eagle3_supported = vllm_model.apply_model(supports_eagle3)
|
||||
assert eagle3_supported, f"Eagle3 should be supported for {model_path}"
|
||||
|
||||
vllm_config = vllm_model.llm.llm_engine.vllm_config
|
||||
|
||||
assert isinstance(vllm_config.speculative_config, SpeculativeConfig), (
|
||||
"Speculative config should be initialized for speculators model"
|
||||
)
|
||||
|
||||
spec_config = vllm_config.speculative_config
|
||||
assert spec_config.num_speculative_tokens > 0, (
|
||||
f"Expected positive speculative tokens, "
|
||||
f"got {spec_config.num_speculative_tokens}"
|
||||
)
|
||||
|
||||
assert spec_config.model == model_path, (
|
||||
f"Draft model should be {model_path}, got {spec_config.model}"
|
||||
)
|
||||
|
||||
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens=20)
|
||||
assert vllm_outputs, f"No outputs generated for speculators model {model_path}"
|
||||
Reference in New Issue
Block a user