Support Longchat and RoPE scaling (#555)

Co-authored-by: Wing Lian <wing.lian@gmail.com>
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Lily Liu
2023-09-27 03:36:02 -07:00
committed by GitHub
parent cf5cb1e33e
commit 21877b0d75
4 changed files with 211 additions and 40 deletions

View File

@@ -351,6 +351,17 @@ def _get_and_verify_max_len(
if max_len_key is not None:
derived_max_model_len = min(derived_max_model_len, max_len_key)
rope_scaling = getattr(hf_config, "rope_scaling", None)
if rope_scaling is not None:
if derived_max_model_len == float("inf"):
raise ValueError(
"When using rope_scaling, the model's config.json must "
"contain one of the following keys to determine the original "
f"maximum length of the model: {possible_keys}")
assert "factor" in rope_scaling
scaling_factor = rope_scaling["factor"]
derived_max_model_len *= scaling_factor
if max_model_len is None:
max_model_len = derived_max_model_len
elif max_model_len > derived_max_model_len: