[CI] And PPL test for Qwen3.5. (#35853)
Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io> Signed-off-by: wang.yuqi <noooop@126.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -8,14 +8,20 @@ from tests.models.utils import GenerateModelInfo
|
||||
from .ppl_utils import wikitext_ppl_test
|
||||
|
||||
MODELS = [
|
||||
GenerateModelInfo("Qwen/Qwen3-0.6B"),
|
||||
GenerateModelInfo("Qwen/Qwen3-0.6B-FP8"),
|
||||
# transformers:
|
||||
# Loading a GPTQ quantized model requires optimum, gptqmodel
|
||||
# GenerateModelInfo("Qwen/Qwen3-0.6B-GPTQ-Int8"),
|
||||
# for Qwen3
|
||||
GenerateModelInfo("Qwen/Qwen3-0.6B", hf_ppl=23.864173889160156),
|
||||
GenerateModelInfo("Qwen/Qwen3-0.6B-FP8", hf_ppl=24.313045501708984),
|
||||
# for Qwen3.5
|
||||
GenerateModelInfo("Qwen/Qwen3.5-0.8B", hf_ppl=19.38858413696289),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_info", MODELS)
|
||||
def test_ppl(hf_runner, vllm_runner, model_info: GenerateModelInfo):
|
||||
wikitext_ppl_test(hf_runner, vllm_runner, model_info)
|
||||
vllm_extra_kwargs = {}
|
||||
if model_info.name == "Qwen/Qwen3.5-0.8B":
|
||||
vllm_extra_kwargs["language_model_only"] = True
|
||||
|
||||
wikitext_ppl_test(
|
||||
hf_runner, vllm_runner, model_info, vllm_extra_kwargs=vllm_extra_kwargs
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user