[CI] Replace large models with tiny alternatives in tests (#24057)

Signed-off-by: Tahsin Tunan <tahsintunan@gmail.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Nick Hill <nhill@redhat.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-16 20:51:27 +06:00
parent 02d709a6f1
commit 43721bc67f
17 changed files with 118 additions and 59 deletions
--- a/tests/v1/shutdown/test_startup_error.py
+++ b/tests/v1/shutdown/test_startup_error.py
@@ -16,7 +16,7 @@ from vllm.model_executor.models.llama import LlamaForCausalLM
 from vllm.utils import cuda_device_count_stateless
 from vllm.v1.engine.async_llm import AsyncLLM

-MODELS = ["meta-llama/Llama-3.2-1B"]
+MODELS = ["hmellor/tiny-random-LlamaForCausalLM"]


 def evil_method(self, *args, **kwargs):
@@ -76,8 +76,10 @@ def test_llm_startup_error(
    Test profiling (forward()) and load weights failures.
    TODO(andy) - LLM without multiprocessing.
    """
-    if model != "meta-llama/Llama-3.2-1B":
-        pytest.skip(reason="Only test meta-llama/Llama-3.2-1B")
+    # Skip non-Llama models since we monkeypatch LlamaForCausalLM specifically.
+    # If MODELS list grows, each architecture needs its own test variant.
+    if model != "JackFram/llama-68m":
+        pytest.skip(reason="Only test JackFram/llama-68m")
    if cuda_device_count_stateless() < tensor_parallel_size:
        pytest.skip(reason="Not enough CUDA devices")