[CI] Replace large models with tiny alternatives in tests (#24057)
Signed-off-by: Tahsin Tunan <tahsintunan@gmail.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Nick Hill <nhill@redhat.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -24,9 +24,11 @@ from ...utils import create_new_process_for_each_test, multi_gpu_test
|
||||
if not current_platform.is_cuda():
|
||||
pytest.skip(reason="V1 currently only supported on CUDA.", allow_module_level=True)
|
||||
|
||||
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
|
||||
MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"
|
||||
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)
|
||||
PROMPT = "Hello my name is Robert and I love quantization kernels"
|
||||
# test_engine_core_concurrent_batches assumes exactly 12 tokens per prompt.
|
||||
# Adjust prompt if changing model to maintain 12-token length.
|
||||
PROMPT = "I am Gyoubu Masataka Oniwa"
|
||||
PROMPT_TOKENS = TOKENIZER(PROMPT).input_ids
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user