[gpt-oss][1][bugfix] fix streaming final output (#24466)
Signed-off-by: Andrew Xia <axia@meta.com>
This commit is contained in:
@@ -151,6 +151,9 @@ class HarmonyContext(ConversationContext):
|
||||
self._update_decode_token_usage(output)
|
||||
# Move current turn to previous turn for next turn's calculations
|
||||
self.previous_turn = self.current_turn.copy()
|
||||
# append_output is called only once before tool calling
|
||||
# in non-streaming case
|
||||
# so we can append all the parser messages to _messages
|
||||
output_msgs = self.parser.messages
|
||||
# The responses finish reason is set in the last message
|
||||
self.finish_reason = output.outputs[0].finish_reason
|
||||
@@ -387,7 +390,7 @@ class StreamingHarmonyContext(HarmonyContext):
|
||||
|
||||
@property
|
||||
def messages(self) -> list:
|
||||
return self.parser.messages
|
||||
return self._messages
|
||||
|
||||
def append_output(self, output: Union[RequestOutput,
|
||||
list[Message]]) -> None:
|
||||
@@ -412,6 +415,11 @@ class StreamingHarmonyContext(HarmonyContext):
|
||||
# Check if the current token is part of reasoning content
|
||||
self._update_num_reasoning_tokens()
|
||||
self.last_tok = tok
|
||||
if len(self._messages) - self.num_init_messages < len(
|
||||
self.parser.messages):
|
||||
self._messages.extend(
|
||||
self.parser.messages[len(self._messages) -
|
||||
self.num_init_messages:])
|
||||
else:
|
||||
# Handle the case of tool output in direct message format
|
||||
assert len(output) == 1, "Tool output should be a single message"
|
||||
@@ -424,6 +432,7 @@ class StreamingHarmonyContext(HarmonyContext):
|
||||
for tok in toks:
|
||||
self.parser.process(tok)
|
||||
self.last_tok = toks[-1]
|
||||
# TODO: add tool_output messages to self._messages
|
||||
|
||||
def is_expecting_start(self) -> bool:
|
||||
return self.parser.state == StreamState.EXPECT_START
|
||||
|
||||
Reference in New Issue
Block a user