[Bugfix] Fix vLLM UsageInfo and logprobs None AssertionError with empty token_ids (#9034)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
This commit is contained in:
Chang Su
2024-10-15 15:40:43 -07:00
committed by GitHub
parent 22f8a69549
commit ba30942240
4 changed files with 140 additions and 2 deletions

View File

@@ -435,6 +435,12 @@ class OpenAIServingChat(OpenAIServing):
logprobs = None
delta_text = output.text
if not delta_text and not output.token_ids and \
not previous_num_tokens[i]:
# Chunked prefill case, don't return empty chunks
continue
delta_message: Optional[DeltaMessage]
# handle streaming deltas for tools with named tool_choice

View File

@@ -274,8 +274,6 @@ class OpenAIServingCompletion(OpenAIServing):
for output in res.outputs:
i = output.index + prompt_idx * num_choices
# TODO(simon): optimize the performance by avoiding full
# text O(n^2) sending.
assert request.max_tokens is not None
if request.echo and request.max_tokens == 0:
@@ -307,6 +305,11 @@ class OpenAIServingCompletion(OpenAIServing):
delta_token_ids = output.token_ids
out_logprobs = output.logprobs
if not delta_text and not delta_token_ids \
and not previous_num_tokens[i]:
# Chunked prefill case, don't return empty chunks
continue
if request.logprobs is not None:
assert out_logprobs is not None, (
"Did not output logprobs")