[CI] Replace large models with tiny alternatives in tests (#24057)
Signed-off-by: Tahsin Tunan <tahsintunan@gmail.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Nick Hill <nhill@redhat.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -20,7 +20,7 @@ from ..models.utils import check_outputs_equal
|
||||
from ..utils import multi_gpu_test
|
||||
|
||||
MODELS = [
|
||||
"google/gemma-2-2b-it",
|
||||
"hmellor/tiny-random-Gemma2ForCausalLM",
|
||||
"meta-llama/Llama-3.2-1B-Instruct",
|
||||
]
|
||||
|
||||
@@ -29,7 +29,7 @@ TARGET_TEST_SUITE = os.environ.get("TARGET_TEST_SUITE", "L4")
|
||||
|
||||
def test_vllm_gc_ed():
|
||||
"""Verify vllm instance is GC'ed when it is deleted"""
|
||||
llm = LLM("distilbert/distilgpt2")
|
||||
llm = LLM("hmellor/tiny-random-LlamaForCausalLM")
|
||||
weak_llm = weakref.ref(llm)
|
||||
del llm
|
||||
# If there's any circular reference to vllm, this fails
|
||||
@@ -125,14 +125,14 @@ def test_models(
|
||||
@pytest.mark.parametrize(
|
||||
"model, distributed_executor_backend, attention_backend, test_suite, extra_env",
|
||||
[
|
||||
("distilbert/distilgpt2", "ray", "", "L4", {}),
|
||||
("distilbert/distilgpt2", "mp", "", "L4", {}),
|
||||
("distilbert/distilgpt2", "ray", "", "L4", {"VLLM_SLEEP_WHEN_IDLE": "1"}),
|
||||
("distilbert/distilgpt2", "mp", "", "L4", {"VLLM_SLEEP_WHEN_IDLE": "1"}),
|
||||
("facebook/opt-125m", "ray", "", "L4", {}),
|
||||
("facebook/opt-125m", "mp", "", "L4", {}),
|
||||
("facebook/opt-125m", "ray", "", "L4", {"VLLM_SLEEP_WHEN_IDLE": "1"}),
|
||||
("facebook/opt-125m", "mp", "", "L4", {"VLLM_SLEEP_WHEN_IDLE": "1"}),
|
||||
("meta-llama/Llama-3.2-1B-Instruct", "ray", "", "L4", {}),
|
||||
("meta-llama/Llama-3.2-1B-Instruct", "mp", "", "L4", {}),
|
||||
("distilbert/distilgpt2", "ray", "", "A100", {}),
|
||||
("distilbert/distilgpt2", "mp", "", "A100", {}),
|
||||
("facebook/opt-125m", "ray", "", "A100", {}),
|
||||
("facebook/opt-125m", "mp", "", "A100", {}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("enable_prompt_embeds", [True, False])
|
||||
|
||||
@@ -6,5 +6,5 @@ from ..utils import compare_two_settings
|
||||
|
||||
def test_cpu_offload():
|
||||
compare_two_settings(
|
||||
"meta-llama/Llama-3.2-1B-Instruct", [], ["--cpu-offload-gb", "1"]
|
||||
"hmellor/tiny-random-LlamaForCausalLM", [], ["--cpu-offload-gb", "1"]
|
||||
)
|
||||
|
||||
@@ -120,7 +120,7 @@ def test_cumem_with_cudagraph():
|
||||
"model",
|
||||
[
|
||||
# sleep mode with safetensors
|
||||
"meta-llama/Llama-3.2-1B",
|
||||
"hmellor/tiny-random-LlamaForCausalLM",
|
||||
# sleep mode with pytorch checkpoint
|
||||
"facebook/opt-125m",
|
||||
],
|
||||
@@ -174,7 +174,7 @@ def test_end_to_end(model: str):
|
||||
|
||||
@create_new_process_for_each_test()
|
||||
def test_deep_sleep():
|
||||
model = "Qwen/Qwen3-0.6B"
|
||||
model = "hmellor/tiny-random-LlamaForCausalLM"
|
||||
free, total = torch.cuda.mem_get_info()
|
||||
used_bytes_baseline = total - free # in case other process is running
|
||||
llm = LLM(model, enable_sleep_mode=True)
|
||||
|
||||
Reference in New Issue
Block a user