[V1][Usage] Refactor speculative decoding configuration and tests (#14434)
Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
This commit is contained in:
@@ -7,28 +7,39 @@ from vllm import SamplingParams
|
||||
from .conftest import get_output_from_llm_generator
|
||||
|
||||
|
||||
@pytest.mark.parametrize("common_llm_kwargs", [{
|
||||
"model": "meta-llama/Llama-3.2-1B-Instruct",
|
||||
"speculative_model": "JackFram/llama-68m",
|
||||
"num_speculative_tokens": 5,
|
||||
}])
|
||||
@pytest.mark.parametrize("common_llm_kwargs",
|
||||
[{
|
||||
"model": "meta-llama/Llama-3.2-1B-Instruct",
|
||||
}])
|
||||
@pytest.mark.parametrize(
|
||||
"per_test_common_llm_kwargs",
|
||||
[
|
||||
{
|
||||
# Speculative max model len > overridden max model len should raise.
|
||||
"speculative_config": {
|
||||
"model": "JackFram/llama-68m",
|
||||
"num_speculative_tokens": 5,
|
||||
"max_model_len": 129,
|
||||
},
|
||||
"max_model_len": 128,
|
||||
"speculative_max_model_len": 129,
|
||||
},
|
||||
{
|
||||
# Speculative max model len > draft max model len should raise.
|
||||
# https://huggingface.co/JackFram/llama-68m/blob/3b606af5198a0b26762d589a3ee3d26ee6fa6c85/config.json#L12
|
||||
"speculative_max_model_len": 2048 + 1,
|
||||
"speculative_config": {
|
||||
"model": "JackFram/llama-68m",
|
||||
"num_speculative_tokens": 5,
|
||||
"max_model_len": 2048 + 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Speculative max model len > target max model len should raise.
|
||||
# https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/blob/9213176726f574b556790deb65791e0c5aa438b6/config.json#L18
|
||||
"speculative_max_model_len": 131072 + 1,
|
||||
# https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/9213176726f574b556790deb65791e0c5aa438b6/config.json#L18
|
||||
"speculative_config": {
|
||||
"model": "JackFram/llama-68m",
|
||||
"num_speculative_tokens": 5,
|
||||
"max_model_len": 131072 + 1,
|
||||
},
|
||||
},
|
||||
])
|
||||
@pytest.mark.parametrize("test_llm_kwargs", [{}])
|
||||
|
||||
Reference in New Issue
Block a user