diff --git a/vllm/config/model.py b/vllm/config/model.py index 7d2409d70..b12202f9c 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -2021,6 +2021,15 @@ def _get_and_verify_max_len( if rope_type == "yarn": derived_max_model_len = rp["original_max_position_embeddings"] + if scaling_factor is None: + # Fallback the factor to 1.0 if a user assigned `null` + logger.warning_once( + "The model's RoPE configuration has a null scaling " + "factor which is unexpected. This likely indicates a bug " + "in the model's HuggingFace config.json. Please notify the " + "model vendor. Falling back the value to 1.0. " + ) + scaling_factor = 1.0 # Do this outside loop since all layer types should have the same scaling derived_max_model_len *= scaling_factor