[CI] Replace large models with tiny alternatives in tests (#24057)

Signed-off-by: Tahsin Tunan <tahsintunan@gmail.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Nick Hill <nhill@redhat.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Tahsin Tunan
2025-10-16 20:51:27 +06:00
committed by GitHub
parent 02d709a6f1
commit 43721bc67f
17 changed files with 118 additions and 59 deletions

View File

@@ -24,9 +24,11 @@ from ...utils import create_new_process_for_each_test, multi_gpu_test
if not current_platform.is_cuda():
pytest.skip(reason="V1 currently only supported on CUDA.", allow_module_level=True)
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)
PROMPT = "Hello my name is Robert and I love quantization kernels"
# test_engine_core_concurrent_batches assumes exactly 12 tokens per prompt.
# Adjust prompt if changing model to maintain 12-token length.
PROMPT = "I am Gyoubu Masataka Oniwa"
PROMPT_TOKENS = TOKENIZER(PROMPT).input_ids