Align max_tokens behavior with openai (#852)

This commit is contained in:
Wen Sun
2023-09-24 09:10:13 +08:00
committed by GitHub
parent 9f6be8692e
commit bbbf86565f
2 changed files with 3 additions and 1 deletions

View File

@@ -130,6 +130,8 @@ async def check_length(
input_ids = tokenizer(prompt).input_ids
token_num = len(input_ids)
if request.max_tokens is None:
request.max_tokens = max_model_len - token_num
if token_num + request.max_tokens > max_model_len:
return input_ids, create_error_response(
HTTPStatus.BAD_REQUEST,