[Bugfix] Multiple fixes to tool streaming with hermes and mistral (#10979)

Signed-off-by: cedonley <clayton@donley.io>
This commit is contained in:
Clayton
2024-12-11 17:10:12 -08:00
committed by GitHub
parent 4e11683368
commit 7439a8b5fc
3 changed files with 69 additions and 21 deletions

View File

@@ -496,21 +496,33 @@ class OpenAIServingChat(OpenAIServing):
if self._should_check_for_unstreamed_tool_arg_tokens(
delta_message, output) and tool_parser:
latest_delta_len = 0
if ((isinstance(
delta_message.tool_calls[0].function,
DeltaFunctionCall)) and isinstance(
delta_message.tool_calls[0].function.
arguments, str)):
latest_delta_len = len(
delta_message.tool_calls[0].function.
arguments)
# get the expected call based on partial JSON
# parsing which "autocompletes" the JSON
expected_call = json.dumps(
tool_parser.prev_tool_call_arr[index].get(
"arguments", {}))
"arguments", {}),
ensure_ascii=False)
# get what we've streamed so far for arguments
# for the current tool
actual_call = tool_parser.streamed_args_for_tool[
index]
if (latest_delta_len > 0):
actual_call = actual_call[:-latest_delta_len]
# check to see if there's anything left to stream
remaining_call = expected_call.replace(
actual_call, "", 1)
# set that as a delta message
delta_message = DeltaMessage(tool_calls=[
DeltaToolCall(index=index,