Add --max-model-len auto to auto-fit context to available memory (#29431)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-12-24 00:37:14 -05:00
committed by GitHub
parent d7e05ac743
commit 8ee90c83f8
7 changed files with 313 additions and 31 deletions

View File

@@ -511,6 +511,16 @@ def test_human_readable_model_len():
args = parser.parse_args(["--max-model-len", "10.2123451234567t"])
assert args.max_model_len == 10212345123456
# Special value -1 for auto-fit to GPU memory
args = parser.parse_args(["--max-model-len", "-1"])
assert args.max_model_len == -1
# 'auto' is an alias for -1
args = parser.parse_args(["--max-model-len", "auto"])
assert args.max_model_len == -1
args = parser.parse_args(["--max-model-len", "AUTO"])
assert args.max_model_len == -1
# Invalid (do not allow decimals with binary multipliers)
for invalid in ["1a", "pwd", "10.24", "1.23M", "1.22T"]:
with pytest.raises(ArgumentError):