diff --git a/tests/models/language/generation_ppl_test/test_gemma.py b/tests/models/language/generation_ppl_test/test_gemma.py index 5324de143..b846bb702 100644 --- a/tests/models/language/generation_ppl_test/test_gemma.py +++ b/tests/models/language/generation_ppl_test/test_gemma.py @@ -7,9 +7,9 @@ from tests.models.utils import GenerateModelInfo from .ppl_utils import wikitext_ppl_test MODELS = [ - GenerateModelInfo("google/gemma-2b"), - GenerateModelInfo("google/gemma-2-2b"), - GenerateModelInfo("google/gemma-3-4b-it"), + GenerateModelInfo("google/gemma-2b", hf_ppl=21.48524284362793), + GenerateModelInfo("google/gemma-2-2b", hf_ppl=102.59290313720703), + GenerateModelInfo("google/gemma-3-4b-it", hf_ppl=27.79648208618164), ] diff --git a/tests/models/language/generation_ppl_test/test_gpt.py b/tests/models/language/generation_ppl_test/test_gpt.py index f3f9e55a2..784f3e85a 100644 --- a/tests/models/language/generation_ppl_test/test_gpt.py +++ b/tests/models/language/generation_ppl_test/test_gpt.py @@ -6,7 +6,7 @@ from tests.models.utils import GenerateModelInfo from .ppl_utils import wikitext_ppl_test -MODELS = [GenerateModelInfo("openai-community/gpt2-large")] +MODELS = [GenerateModelInfo("openai-community/gpt2-large", hf_ppl=19.457056045532227)] @pytest.mark.parametrize("model_info", MODELS) diff --git a/tests/models/language/generation_ppl_test/test_qwen.py b/tests/models/language/generation_ppl_test/test_qwen.py index 0d3127cba..60e69c3f8 100644 --- a/tests/models/language/generation_ppl_test/test_qwen.py +++ b/tests/models/language/generation_ppl_test/test_qwen.py @@ -8,14 +8,20 @@ from tests.models.utils import GenerateModelInfo from .ppl_utils import wikitext_ppl_test MODELS = [ - GenerateModelInfo("Qwen/Qwen3-0.6B"), - GenerateModelInfo("Qwen/Qwen3-0.6B-FP8"), - # transformers: - # Loading a GPTQ quantized model requires optimum, gptqmodel - # GenerateModelInfo("Qwen/Qwen3-0.6B-GPTQ-Int8"), + # for Qwen3 + GenerateModelInfo("Qwen/Qwen3-0.6B", hf_ppl=23.864173889160156), + GenerateModelInfo("Qwen/Qwen3-0.6B-FP8", hf_ppl=24.313045501708984), + # for Qwen3.5 + GenerateModelInfo("Qwen/Qwen3.5-0.8B", hf_ppl=19.38858413696289), ] @pytest.mark.parametrize("model_info", MODELS) def test_ppl(hf_runner, vllm_runner, model_info: GenerateModelInfo): - wikitext_ppl_test(hf_runner, vllm_runner, model_info) + vllm_extra_kwargs = {} + if model_info.name == "Qwen/Qwen3.5-0.8B": + vllm_extra_kwargs["language_model_only"] = True + + wikitext_ppl_test( + hf_runner, vllm_runner, model_info, vllm_extra_kwargs=vllm_extra_kwargs + )