[Bugfix] Multiple fixes to tool streaming with hermes and mistral (#10979)
Signed-off-by: cedonley <clayton@donley.io>
This commit is contained in:
@@ -496,21 +496,33 @@ class OpenAIServingChat(OpenAIServing):
|
||||
|
||||
if self._should_check_for_unstreamed_tool_arg_tokens(
|
||||
delta_message, output) and tool_parser:
|
||||
latest_delta_len = 0
|
||||
if ((isinstance(
|
||||
delta_message.tool_calls[0].function,
|
||||
DeltaFunctionCall)) and isinstance(
|
||||
delta_message.tool_calls[0].function.
|
||||
arguments, str)):
|
||||
latest_delta_len = len(
|
||||
delta_message.tool_calls[0].function.
|
||||
arguments)
|
||||
|
||||
# get the expected call based on partial JSON
|
||||
# parsing which "autocompletes" the JSON
|
||||
expected_call = json.dumps(
|
||||
tool_parser.prev_tool_call_arr[index].get(
|
||||
"arguments", {}))
|
||||
"arguments", {}),
|
||||
ensure_ascii=False)
|
||||
|
||||
# get what we've streamed so far for arguments
|
||||
# for the current tool
|
||||
actual_call = tool_parser.streamed_args_for_tool[
|
||||
index]
|
||||
if (latest_delta_len > 0):
|
||||
actual_call = actual_call[:-latest_delta_len]
|
||||
|
||||
# check to see if there's anything left to stream
|
||||
remaining_call = expected_call.replace(
|
||||
actual_call, "", 1)
|
||||
|
||||
# set that as a delta message
|
||||
delta_message = DeltaMessage(tool_calls=[
|
||||
DeltaToolCall(index=index,
|
||||
|
||||
Reference in New Issue
Block a user