[gpt-oss][1][bugfix] fix streaming final output (#24466)

Signed-off-by: Andrew Xia <axia@meta.com>
This commit is contained in:
Andrew Xia
2025-09-16 12:56:16 -07:00
committed by GitHub
parent dcf2f3ec06
commit 86daa875fe
3 changed files with 91 additions and 5 deletions

View File

@@ -151,6 +151,9 @@ class HarmonyContext(ConversationContext):
self._update_decode_token_usage(output)
# Move current turn to previous turn for next turn's calculations
self.previous_turn = self.current_turn.copy()
# append_output is called only once before tool calling
# in non-streaming case
# so we can append all the parser messages to _messages
output_msgs = self.parser.messages
# The responses finish reason is set in the last message
self.finish_reason = output.outputs[0].finish_reason
@@ -387,7 +390,7 @@ class StreamingHarmonyContext(HarmonyContext):
@property
def messages(self) -> list:
return self.parser.messages
return self._messages
def append_output(self, output: Union[RequestOutput,
list[Message]]) -> None:
@@ -412,6 +415,11 @@ class StreamingHarmonyContext(HarmonyContext):
# Check if the current token is part of reasoning content
self._update_num_reasoning_tokens()
self.last_tok = tok
if len(self._messages) - self.num_init_messages < len(
self.parser.messages):
self._messages.extend(
self.parser.messages[len(self._messages) -
self.num_init_messages:])
else:
# Handle the case of tool output in direct message format
assert len(output) == 1, "Tool output should be a single message"
@@ -424,6 +432,7 @@ class StreamingHarmonyContext(HarmonyContext):
for tok in toks:
self.parser.process(tok)
self.last_tok = toks[-1]
# TODO: add tool_output messages to self._messages
def is_expecting_start(self) -> bool:
return self.parser.state == StreamState.EXPECT_START