Consolidate Llama model usage in tests (#13094)
This commit is contained in:
@@ -20,7 +20,7 @@ from ..utils import multi_gpu_test
|
||||
|
||||
MODELS = [
|
||||
"facebook/opt-125m",
|
||||
"meta-llama/Llama-3.2-1B",
|
||||
"meta-llama/Llama-3.2-1B-Instruct",
|
||||
]
|
||||
|
||||
|
||||
@@ -92,7 +92,7 @@ def test_models_distributed(
|
||||
) -> None:
|
||||
override_backend_env_variable(monkeypatch, attention_backend)
|
||||
|
||||
if (model == "meta-llama/Llama-2-7b-hf"
|
||||
if (model == "meta-llama/Llama-3.2-1B-Instruct"
|
||||
and distributed_executor_backend == "ray"):
|
||||
# test ray adag
|
||||
os.environ['VLLM_USE_RAY_SPMD_WORKER'] = "1"
|
||||
@@ -221,7 +221,7 @@ def test_with_prefix_caching(
|
||||
Checks exact match decode with and without prefix caching
|
||||
with chunked prefill enabled.
|
||||
"""
|
||||
model = "meta-llama/Llama-2-7b-chat-hf"
|
||||
model = "meta-llama/Llama-3.2-1B-Instruct"
|
||||
# The common prompt has 142 tokens with Llama-2 tokenizer.
|
||||
common_prompt = "You are a helpful AI assistant " * 20
|
||||
unique_prompts = [
|
||||
|
||||
Reference in New Issue
Block a user