Consolidate Llama model usage in tests (#13094)

This commit is contained in:
Harry Mellor
2025-02-14 06:18:03 +00:00
committed by GitHub
parent 40932d7a05
commit f2b20fe491
22 changed files with 45 additions and 53 deletions

View File

@@ -11,7 +11,7 @@ RTOL = 0.03
EXPECTED_VALUE = 0.62
# FIXME(rob): enable prefix caching once supported.
MODEL = "meta-llama/Llama-3.2-1B"
MODEL = "meta-llama/Llama-3.2-1B-Instruct"
MODEL_ARGS = f"pretrained={MODEL},enforce_eager=True,enable_prefix_caching=False" # noqa: E501
SERVER_ARGS = [
"--enforce_eager", "--no_enable_prefix_caching", "--disable-log-requests"