Consolidate Llama model usage in tests (#13094)

2025-02-14 06:18:03 +00:00
parent 40932d7a05
commit f2b20fe491
22 changed files with 45 additions and 53 deletions
--- a/tests/spec_decode/e2e/test_compatibility.py
+++ b/tests/spec_decode/e2e/test_compatibility.py
@@ -8,7 +8,7 @@ from .conftest import get_output_from_llm_generator


@pytest.mark.parametrize("common_llm_kwargs", [{
-    "model": "meta-llama/Llama-2-7b-chat-hf",
+    "model": "meta-llama/Llama-3.2-1B-Instruct",
    "speculative_model": "JackFram/llama-68m",
    "num_speculative_tokens": 5,
 }])
@@ -27,8 +27,8 @@ from .conftest import get_output_from_llm_generator
        },
        {
            # Speculative max model len > target max model len should raise.
-            # https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/f5db02db724555f92da89c216ac04704f23d4590/config.json#L12
-            "speculative_max_model_len": 4096 + 1,
+            # https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/blob/9213176726f574b556790deb65791e0c5aa438b6/config.json#L18
+            "speculative_max_model_len": 131072 + 1,
        },
    ])
@pytest.mark.parametrize("test_llm_kwargs", [{}])