diff --git a/tests/compile/test_async_tp.py b/tests/compile/test_async_tp.py index d396d3940..102a929bf 100644 --- a/tests/compile/test_async_tp.py +++ b/tests/compile/test_async_tp.py @@ -332,7 +332,7 @@ def async_tp_pass_on_test_model( # this is a fake model name to construct the model config # in the vllm_config, it's not really used. - model_name = "nm-testing/TinyLlama-1.1B-Chat-v1.0-FP8-e2e" + model_name = "RedHatAI/Llama-3.2-1B-Instruct-FP8" vllm_config.model_config = ModelConfig( model=model_name, trust_remote_code=True, dtype=dtype, seed=42 ) diff --git a/tests/compile/test_fusion_all_reduce.py b/tests/compile/test_fusion_all_reduce.py index 7e5c460db..455d1bb03 100644 --- a/tests/compile/test_fusion_all_reduce.py +++ b/tests/compile/test_fusion_all_reduce.py @@ -229,7 +229,7 @@ def all_reduce_fusion_pass_on_test_model( # this is a fake model name to construct the model config # in the vllm_config, it's not really used. - model_name = "nm-testing/TinyLlama-1.1B-Chat-v1.0-FP8-e2e" + model_name = "RedHatAI/Llama-3.2-1B-Instruct-FP8" vllm_config.model_config = ModelConfig( model=model_name, trust_remote_code=True, dtype=dtype, seed=42 ) diff --git a/tests/compile/test_sequence_parallelism.py b/tests/compile/test_sequence_parallelism.py index afb31cb95..6abab88e6 100644 --- a/tests/compile/test_sequence_parallelism.py +++ b/tests/compile/test_sequence_parallelism.py @@ -278,7 +278,7 @@ def sequence_parallelism_pass_on_test_model( # this is a fake model name to construct the model config # in the vllm_config, it's not really used. - model_name = "nm-testing/TinyLlama-1.1B-Chat-v1.0-FP8-e2e" + model_name = "RedHatAI/Llama-3.2-1B-Instruct-FP8" vllm_config.model_config = ModelConfig( model=model_name, trust_remote_code=True, dtype=dtype, seed=42 )