[gpt-oss] Add IncompleteDetails to ResponsesRepsonse (#24561)

Signed-off-by: Andrew Xia <axia@meta.com>
This commit is contained in:
Andrew Xia
2025-09-15 13:07:55 -07:00
committed by GitHub
parent 94b03f88dd
commit 25aba2b6a3
7 changed files with 67 additions and 25 deletions

View File

@@ -112,6 +112,7 @@ class HarmonyContext(ConversationContext):
available_tools: list[str],
):
self._messages = messages
self.finish_reason: Optional[str] = None
self.available_tools = available_tools
self._tool_sessions: dict[str, Union[ClientSession, Tool]] = {}
self.called_tools: set[str] = set()
@@ -135,7 +136,8 @@ class HarmonyContext(ConversationContext):
if self.parser.current_channel in {"analysis", "commentary"}:
self.num_reasoning_tokens += 1
def append_output(self, output) -> None:
def append_output(self, output: Union[RequestOutput,
list[Message]]) -> None:
if isinstance(output, RequestOutput):
output_token_ids = output.outputs[0].token_ids
self.parser = get_streamable_parser_for_assistant()
@@ -150,6 +152,8 @@ class HarmonyContext(ConversationContext):
# Move current turn to previous turn for next turn's calculations
self.previous_turn = self.current_turn.copy()
output_msgs = self.parser.messages
# The responses finish reason is set in the last message
self.finish_reason = output.outputs[0].finish_reason
else:
# Tool output.
output_msgs = output
@@ -157,18 +161,18 @@ class HarmonyContext(ConversationContext):
def _update_prefill_token_usage(self, output: RequestOutput) -> None:
"""Update token usage statistics for the prefill phase of generation.
The prefill phase processes the input prompt tokens. This method:
1. Counts the prompt tokens for this turn
2. Calculates tool output tokens for multi-turn conversations
3. Updates cached token counts
4. Tracks state for next turn calculations
Tool output tokens are calculated as:
current_prompt_tokens - last_turn_prompt_tokens -
current_prompt_tokens - last_turn_prompt_tokens -
last_turn_output_tokens
This represents tokens added between turns (typically tool responses).
Args:
output: The RequestOutput containing prompt token information
"""
@@ -214,18 +218,18 @@ class HarmonyContext(ConversationContext):
def _update_decode_token_usage(self, output: RequestOutput) -> int:
"""Update token usage statistics for the decode phase of generation.
The decode phase processes the generated output tokens. This method:
1. Counts output tokens from all completion outputs
2. Updates the total output token count
3. Tracks tokens generated in the current turn
In streaming mode, this is called for each token generated.
In non-streaming mode, this is called once with all output tokens.
Args:
output: The RequestOutput containing generated token information
Returns:
int: Number of output tokens processed in this call
"""
@@ -385,7 +389,8 @@ class StreamingHarmonyContext(HarmonyContext):
def messages(self) -> list:
return self.parser.messages
def append_output(self, output) -> None:
def append_output(self, output: Union[RequestOutput,
list[Message]]) -> None:
if isinstance(output, RequestOutput):
# append_output is called for each output token in streaming case,
# so we only want to add the prompt tokens once for each message.