Consolidate Llama model usage in tests (#13094)

This commit is contained in:
Harry Mellor
2025-02-14 06:18:03 +00:00
committed by GitHub
parent 40932d7a05
commit f2b20fe491
22 changed files with 45 additions and 53 deletions

View File

@@ -8,7 +8,7 @@ from .conftest import get_output_from_llm_generator
@pytest.mark.parametrize("common_llm_kwargs", [{
"model": "meta-llama/Llama-2-7b-chat-hf",
"model": "meta-llama/Llama-3.2-1B-Instruct",
"speculative_model": "JackFram/llama-68m",
"num_speculative_tokens": 5,
}])
@@ -27,8 +27,8 @@ from .conftest import get_output_from_llm_generator
},
{
# Speculative max model len > target max model len should raise.
# https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/f5db02db724555f92da89c216ac04704f23d4590/config.json#L12
"speculative_max_model_len": 4096 + 1,
# https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/blob/9213176726f574b556790deb65791e0c5aa438b6/config.json#L18
"speculative_max_model_len": 131072 + 1,
},
])
@pytest.mark.parametrize("test_llm_kwargs", [{}])