tests/entrypoints/openai/responses/test_parsable_context.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import importlib.util
import json
import logging

import pytest
import pytest_asyncio
from openai import OpenAI

from tests.utils import RemoteOpenAIServer

from .conftest import (
    BASE_TEST_ENV,
    has_output_type,
    log_response_diagnostics,
    retry_for_tool_call,
)

logger = logging.getLogger(__name__)

MODEL_NAME = "Qwen/Qwen3-8B"

_PYTHON_TOOL_INSTRUCTION = (
    "You must use the Python tool to execute code. "
    "Never simulate execution. You must print the final answer."
)


@pytest.fixture(scope="module")
def server():
    assert importlib.util.find_spec("gpt_oss") is not None, (
        "Harmony tests require gpt_oss package to be installed"
    )

    args = [
        "--reasoning-parser",
        "qwen3",
        "--max_model_len",
        "5000",
        "--structured-outputs-config.backend",
        "xgrammar",
        "--enable-auto-tool-choice",
        "--tool-call-parser",
        "hermes",
        "--tool-server",
        "demo",
    ]
    env_dict = {
        **BASE_TEST_ENV,
        "VLLM_ENABLE_RESPONSES_API_STORE": "1",
        "VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT": "1",
        "PYTHON_EXECUTION_BACKEND": "dangerously_use_uv",
    }
    with RemoteOpenAIServer(MODEL_NAME, args, env_dict=env_dict) as remote_server:
        yield remote_server


@pytest_asyncio.fixture
async def client(server):
    async with server.get_async_client() as async_client:
        yield async_client


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_basic(client: OpenAI, model_name: str):
    response = await client.responses.create(
        model=model_name,
        input="What is 123 * 456?",
        temperature=0.0,
    )
    assert response is not None
    print("response: ", response)
    assert response.status == "completed"
    assert response.incomplete_details is None


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_reasoning_and_function_items(client: OpenAI, model_name: str):
    response = await client.responses.create(
        model=model_name,
        input=[
            {"type": "message", "content": "Hello.", "role": "user"},
            {
                "type": "reasoning",
                "id": "lol",
                "content": [
                    {
                        "type": "reasoning_text",
                        "text": "We need to respond: greeting.",
                    }
                ],
                "summary": [],
            },
            {
                "arguments": '{"location": "Paris", "unit": "celsius"}',
                "call_id": "call_5f7b38f3b81e4b8380fd0ba74f3ca3ab",
                "name": "get_weather",
                "type": "function_call",
                "id": "fc_4fe5d6fc5b6c4d6fa5f24cc80aa27f78",
                "status": "completed",
            },
            {
                "call_id": "call_5f7b38f3b81e4b8380fd0ba74f3ca3ab",
                "id": "fc_4fe5d6fc5b6c4d6fa5f24cc80aa27f78",
                "output": "The weather in Paris is 20 Celsius",
                "status": "completed",
                "type": "function_call_output",
            },
        ],
        temperature=0.0,
    )
    assert response is not None
    assert response.status == "completed"

    output_types = [getattr(o, "type", None) for o in response.output]
    assert "reasoning" in output_types, (
        f"Expected reasoning in output, got: {output_types}"
    )
    assert "message" in output_types, f"Expected message in output, got: {output_types}"

    msg = next(o for o in response.output if o.type == "message")
    assert type(msg.content[0].text) is str


def get_horoscope(sign):
    return f"{sign}: Next Tuesday you will befriend a baby otter."


def call_function(name, args):
    logger.info("Calling function %s with args %s", name, args)
    if name == "get_horoscope":
        return get_horoscope(**args)
    raise ValueError(f"Unknown function: {name}")


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_function_call_first_turn(client: OpenAI, model_name: str):
    tools = [
        {
            "type": "function",
            "name": "get_horoscope",
            "description": "Get today's horoscope for an astrological sign.",
            "parameters": {
                "type": "object",
                "properties": {
                    "sign": {"type": "string"},
                },
                "required": ["sign"],
                "additionalProperties": False,
            },
            "strict": True,
        }
    ]

    response = await retry_for_tool_call(
        client,
        model=model_name,
        expected_tool_type="function_call",
        input="What is the horoscope for Aquarius today?",
        tools=tools,
        temperature=0.0,
    )
    assert response is not None
    assert response.status == "completed"

    output_types = [getattr(o, "type", None) for o in response.output]
    assert "reasoning" in output_types, (
        f"Expected reasoning in output, got: {output_types}"
    )
    assert has_output_type(response, "function_call"), (
        f"Expected function_call in output, got: {output_types}"
    )

    function_call = next(o for o in response.output if o.type == "function_call")
    assert function_call.name == "get_horoscope"
    assert function_call.call_id is not None

    args = json.loads(function_call.arguments)
    assert "sign" in args


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_mcp_tool_call(client: OpenAI, model_name: str):
    """MCP tool calling with code_interpreter.

    The model may make one or more tool calls before producing a final
    message.  We validate server invariants (mcp_call items have correct
    fields) with hard assertions.  Output indices are never hardcoded
    since the model can produce multiple tool-call rounds.
    """
    # MCP + container init + code execution can be slow
    client_with_timeout = client.with_options(timeout=client.timeout * 3)

    response = await retry_for_tool_call(
        client_with_timeout,
        model=model_name,
        expected_tool_type="mcp_call",
        input=(
            "What is 123 * 456? Use python to calculate the result. "
            "Print the result with print()."
        ),
        tools=[{"type": "code_interpreter", "container": {"type": "auto"}}],
        instructions=_PYTHON_TOOL_INSTRUCTION,
        temperature=0.0,
        extra_body={"enable_response_messages": True},
    )

    assert response is not None

    output_types = [getattr(o, "type", None) for o in response.output]
    log_response_diagnostics(response, label="test_mcp_tool_call")

    assert response.status == "completed", (
        f"Response status={response.status} "
        f"(details={getattr(response, 'incomplete_details', None)}). "
        f"Output types: {output_types}."
    )

    assert "reasoning" in output_types, (
        f"Expected reasoning in output, got: {output_types}"
    )
    assert "mcp_call" in output_types, (
        f"Expected mcp_call in output, got: {output_types}"
    )

    # Every mcp_call item must have well-typed fields
    for item in response.output:
        if getattr(item, "type", None) == "mcp_call":
            assert type(item.arguments) is str, (
                f"mcp_call.arguments should be str, got {type(item.arguments)}"
            )
            assert type(item.output) is str, (
                f"mcp_call.output should be str, got {type(item.output)}"
            )

    # The model may make 1+ tool-call rounds but must still produce
    # a final message for a trivial calculation like 123 * 456.
    message_outputs = [
        o for o in response.output if getattr(o, "type", None) == "message"
    ]
    assert message_outputs, (
        f"Model did not produce a final message. Output types: {output_types}"
    )

    final_message = message_outputs[-1]
    assert any(s in final_message.content[0].text for s in ("56088", "56,088")), (
        f"Expected 56088 in final message, got: {final_message.content[0].text!r}"
    )

    # Validate raw input_messages / output_messages
    assert len(response.input_messages) >= 1, "Expected at least 1 input message"
    assert len(response.output_messages) >= 1, "Expected at least 1 output message"
    assert any(
        any(s in str(msg) for s in ("56088", "56,088"))
        for msg in response.output_messages
    ), (
        f"Expected 56088 in at least one output_message, "
        f"got {len(response.output_messages)} messages"
    )


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_max_tokens(client: OpenAI, model_name: str):
    response = await client.responses.create(
        model=model_name,
        input="What is the first paragraph of Moby Dick?",
        reasoning={"effort": "low"},
        max_output_tokens=30,
        temperature=0.0,
    )
    assert response is not None
    assert response.status == "incomplete"
    assert response.incomplete_details.reason == "max_output_tokens"
[responsesAPI][3] ResponsesParser to set up non harmony MCP (#29413) Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-02 08:24:45 -08:00			`# SPDX-License-Identifier: Apache-2.0`
			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`

[CI/Build] Relax `test_mcp_tool_call` (#34204) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-02-10 13:18:57 +08:00			`import importlib.util`
[responsesAPI][5] ResponsesParser with tools for full MCP python loop (#29798) Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <axia@meta.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-05 08:11:50 -08:00			`import json`
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`import logging`
[responsesAPI][3] ResponsesParser to set up non harmony MCP (#29413) Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-02 08:24:45 -08:00
			`import pytest`
			`import pytest_asyncio`
			`from openai import OpenAI`

[Refactor] Relocate responses API tests (#37241) Signed-off-by: sfeng33 <4florafeng@gmail.com> 2026-03-17 01:14:52 -04:00			`from tests.utils import RemoteOpenAIServer`

[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`from .conftest import (`
			`BASE_TEST_ENV,`
			`has_output_type,`
			`log_response_diagnostics,`
			`retry_for_tool_call,`
			`)`

			`logger = logging.getLogger(__name__)`
[responsesAPI][3] ResponsesParser to set up non harmony MCP (#29413) Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-02 08:24:45 -08:00
			`MODEL_NAME = "Qwen/Qwen3-8B"`

[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`_PYTHON_TOOL_INSTRUCTION = (`
			`"You must use the Python tool to execute code. "`
			`"Never simulate execution. You must print the final answer."`
			`)`

[responsesAPI][3] ResponsesParser to set up non harmony MCP (#29413) Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-02 08:24:45 -08:00
			`@pytest.fixture(scope="module")`
			`def server():`
[responsesAPI][5] ResponsesParser with tools for full MCP python loop (#29798) Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <axia@meta.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-05 08:11:50 -08:00			`assert importlib.util.find_spec("gpt_oss") is not None, (`
			`"Harmony tests require gpt_oss package to be installed"`
			`)`

			`args = [`
			`"--reasoning-parser",`
			`"qwen3",`
			`"--max_model_len",`
			`"5000",`
			`"--structured-outputs-config.backend",`
			`"xgrammar",`
			`"--enable-auto-tool-choice",`
			`"--tool-call-parser",`
			`"hermes",`
			`"--tool-server",`
			`"demo",`
			`]`
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`env_dict = {`
			`**BASE_TEST_ENV,`
			`"VLLM_ENABLE_RESPONSES_API_STORE": "1",`
			`"VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT": "1",`
			`"PYTHON_EXECUTION_BACKEND": "dangerously_use_uv",`
			`}`
[responsesAPI][3] ResponsesParser to set up non harmony MCP (#29413) Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-02 08:24:45 -08:00			`with RemoteOpenAIServer(MODEL_NAME, args, env_dict=env_dict) as remote_server:`
			`yield remote_server`


			`@pytest_asyncio.fixture`
			`async def client(server):`
			`async with server.get_async_client() as async_client:`
			`yield async_client`


			`@pytest.mark.asyncio`
			`@pytest.mark.parametrize("model_name", [MODEL_NAME])`
			`async def test_basic(client: OpenAI, model_name: str):`
			`response = await client.responses.create(`
			`model=model_name,`
[CI][Bugfix]: return McpCall for built-in MCP tools in non-streaming mode (#32762) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-04 21:14:06 -06:00			`input="What is 123 * 456?",`
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`temperature=0.0,`
[responsesAPI][3] ResponsesParser to set up non harmony MCP (#29413) Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-02 08:24:45 -08:00			`)`
			`assert response is not None`
			`print("response: ", response)`
			`assert response.status == "completed"`
[responsesAPI] fix incomplete_messages for simple/parsable context (#31836) Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com> 2026-01-09 16:00:57 -05:00			`assert response.incomplete_details is None`
[responsesAPI][3] ResponsesParser to set up non harmony MCP (#29413) Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-02 08:24:45 -08:00

			`@pytest.mark.asyncio`
			`@pytest.mark.parametrize("model_name", [MODEL_NAME])`
			`async def test_reasoning_and_function_items(client: OpenAI, model_name: str):`
			`response = await client.responses.create(`
			`model=model_name,`
			`input=[`
			`{"type": "message", "content": "Hello.", "role": "user"},`
			`{`
			`"type": "reasoning",`
			`"id": "lol",`
			`"content": [`
			`{`
			`"type": "reasoning_text",`
			`"text": "We need to respond: greeting.",`
			`}`
			`],`
			`"summary": [],`
			`},`
			`{`
			`"arguments": '{"location": "Paris", "unit": "celsius"}',`
			`"call_id": "call_5f7b38f3b81e4b8380fd0ba74f3ca3ab",`
			`"name": "get_weather",`
			`"type": "function_call",`
			`"id": "fc_4fe5d6fc5b6c4d6fa5f24cc80aa27f78",`
			`"status": "completed",`
			`},`
			`{`
			`"call_id": "call_5f7b38f3b81e4b8380fd0ba74f3ca3ab",`
			`"id": "fc_4fe5d6fc5b6c4d6fa5f24cc80aa27f78",`
			`"output": "The weather in Paris is 20 Celsius",`
			`"status": "completed",`
			`"type": "function_call_output",`
			`},`
			`],`
			`temperature=0.0,`
			`)`
			`assert response is not None`
			`assert response.status == "completed"`
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00
			`output_types = [getattr(o, "type", None) for o in response.output]`
			`assert "reasoning" in output_types, (`
			`f"Expected reasoning in output, got: {output_types}"`
			`)`
			`assert "message" in output_types, f"Expected message in output, got: {output_types}"`

			`msg = next(o for o in response.output if o.type == "message")`
			`assert type(msg.content[0].text) is str`
[responsesAPI][5] ResponsesParser with tools for full MCP python loop (#29798) Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <axia@meta.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-05 08:11:50 -08:00

			`def get_horoscope(sign):`
			`return f"{sign}: Next Tuesday you will befriend a baby otter."`


			`def call_function(name, args):`
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`logger.info("Calling function %s with args %s", name, args)`
[responsesAPI][5] ResponsesParser with tools for full MCP python loop (#29798) Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <axia@meta.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-05 08:11:50 -08:00			`if name == "get_horoscope":`
			`return get_horoscope(**args)`
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`raise ValueError(f"Unknown function: {name}")`
[responsesAPI][5] ResponsesParser with tools for full MCP python loop (#29798) Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <axia@meta.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-05 08:11:50 -08:00

			`@pytest.mark.asyncio`
			`@pytest.mark.parametrize("model_name", [MODEL_NAME])`
			`async def test_function_call_first_turn(client: OpenAI, model_name: str):`
			`tools = [`
			`{`
			`"type": "function",`
			`"name": "get_horoscope",`
			`"description": "Get today's horoscope for an astrological sign.",`
			`"parameters": {`
			`"type": "object",`
			`"properties": {`
			`"sign": {"type": "string"},`
			`},`
			`"required": ["sign"],`
			`"additionalProperties": False,`
			`},`
			`"strict": True,`
			`}`
			`]`

[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`response = await retry_for_tool_call(`
			`client,`
[responsesAPI][5] ResponsesParser with tools for full MCP python loop (#29798) Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <axia@meta.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-05 08:11:50 -08:00			`model=model_name,`
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`expected_tool_type="function_call",`
[responsesAPI][5] ResponsesParser with tools for full MCP python loop (#29798) Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <axia@meta.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-05 08:11:50 -08:00			`input="What is the horoscope for Aquarius today?",`
			`tools=tools,`
			`temperature=0.0,`
			`)`
			`assert response is not None`
			`assert response.status == "completed"`

[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`output_types = [getattr(o, "type", None) for o in response.output]`
			`assert "reasoning" in output_types, (`
			`f"Expected reasoning in output, got: {output_types}"`
			`)`
			`assert has_output_type(response, "function_call"), (`
			`f"Expected function_call in output, got: {output_types}"`
			`)`

			`function_call = next(o for o in response.output if o.type == "function_call")`
[responsesAPI][5] ResponsesParser with tools for full MCP python loop (#29798) Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <axia@meta.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-05 08:11:50 -08:00			`assert function_call.name == "get_horoscope"`
			`assert function_call.call_id is not None`

			`args = json.loads(function_call.arguments)`
			`assert "sign" in args`


			`@pytest.mark.asyncio`
			`@pytest.mark.parametrize("model_name", [MODEL_NAME])`
			`async def test_mcp_tool_call(client: OpenAI, model_name: str):`
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`"""MCP tool calling with code_interpreter.`

			`The model may make one or more tool calls before producing a final`
			`message. We validate server invariants (mcp_call items have correct`
			`fields) with hard assertions. Output indices are never hardcoded`
			`since the model can produce multiple tool-call rounds.`
			`"""`
			`# MCP + container init + code execution can be slow`
			`client_with_timeout = client.with_options(timeout=client.timeout * 3)`

			`response = await retry_for_tool_call(`
			`client_with_timeout,`
[responsesAPI][5] ResponsesParser with tools for full MCP python loop (#29798) Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <axia@meta.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-05 08:11:50 -08:00			`model=model_name,`
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`expected_tool_type="mcp_call",`
			`input=(`
			`"What is 123 * 456? Use python to calculate the result. "`
			`"Print the result with print()."`
			`),`
[responsesAPI][5] ResponsesParser with tools for full MCP python loop (#29798) Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <axia@meta.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-05 08:11:50 -08:00			`tools=[{"type": "code_interpreter", "container": {"type": "auto"}}],`
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`instructions=_PYTHON_TOOL_INSTRUCTION,`
[responsesAPI][5] ResponsesParser with tools for full MCP python loop (#29798) Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <axia@meta.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-05 08:11:50 -08:00			`temperature=0.0,`
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`extra_body={"enable_response_messages": True},`
[responsesAPI][5] ResponsesParser with tools for full MCP python loop (#29798) Signed-off-by: Andrew Xia <axia@fb.com> Signed-off-by: Andrew Xia <axia@meta.com> Co-authored-by: Andrew Xia <axia@fb.com> 2025-12-05 08:11:50 -08:00			`)`

			`assert response is not None`
[CI] Fix flaky test_parsable_context (#34717) Signed-off-by: sfeng33 <4florafeng@gmail.com> 2026-02-17 13:42:52 -05:00
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`output_types = [getattr(o, "type", None) for o in response.output]`
			`log_response_diagnostics(response, label="test_mcp_tool_call")`

			`assert response.status == "completed", (`
			`f"Response status={response.status} "`
			`f"(details={getattr(response, 'incomplete_details', None)}). "`
			`f"Output types: {output_types}."`
			`)`

			`assert "reasoning" in output_types, (`
			`f"Expected reasoning in output, got: {output_types}"`
			`)`
			`assert "mcp_call" in output_types, (`
			`f"Expected mcp_call in output, got: {output_types}"`
			`)`

			`# Every mcp_call item must have well-typed fields`
			`for item in response.output:`
			`if getattr(item, "type", None) == "mcp_call":`
			`assert type(item.arguments) is str, (`
			`f"mcp_call.arguments should be str, got {type(item.arguments)}"`
			`)`
			`assert type(item.output) is str, (`
			`f"mcp_call.output should be str, got {type(item.output)}"`
			`)`

			`# The model may make 1+ tool-call rounds but must still produce`
			`# a final message for a trivial calculation like 123 * 456.`
			`message_outputs = [`
			`o for o in response.output if getattr(o, "type", None) == "message"`
			`]`
			`assert message_outputs, (`
			`f"Model did not produce a final message. Output types: {output_types}"`
			`)`

			`final_message = message_outputs[-1]`
			`assert any(s in final_message.content[0].text for s in ("56088", "56,088")), (`
			`f"Expected 56088 in final message, got: {final_message.content[0].text!r}"`
			`)`

			`# Validate raw input_messages / output_messages`
			`assert len(response.input_messages) >= 1, "Expected at least 1 input message"`
			`assert len(response.output_messages) >= 1, "Expected at least 1 output message"`
[CI] Fix flaky test_parsable_context (#34717) Signed-off-by: sfeng33 <4florafeng@gmail.com> 2026-02-17 13:42:52 -05:00			`assert any(`
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`any(s in str(msg) for s in ("56088", "56,088"))`
			`for msg in response.output_messages`
			`), (`
			`f"Expected 56088 in at least one output_message, "`
			`f"got {len(response.output_messages)} messages"`
[CI] Fix flaky test_parsable_context (#34717) Signed-off-by: sfeng33 <4florafeng@gmail.com> 2026-02-17 13:42:52 -05:00			`)`
[responsesAPI] fix incomplete_messages for simple/parsable context (#31836) Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com> 2026-01-09 16:00:57 -05:00

			`@pytest.mark.asyncio`
			`@pytest.mark.parametrize("model_name", [MODEL_NAME])`
			`async def test_max_tokens(client: OpenAI, model_name: str):`
			`response = await client.responses.create(`
			`model=model_name,`
			`input="What is the first paragraph of Moby Dick?",`
			`reasoning={"effort": "low"},`
			`max_output_tokens=30,`
[CI][MCP][Harmony] Heavy refactoring Harmony & MCP response tests and stabilizing with deterministic test infrastructure (#33949) Signed-off-by: Andreas Karatzas <akaratza@amd.com> 2026-02-20 22:03:32 -06:00			`temperature=0.0,`
[responsesAPI] fix incomplete_messages for simple/parsable context (#31836) Signed-off-by: Andrew Xia <axia@fb.com> Co-authored-by: Andrew Xia <axia@fb.com> 2026-01-09 16:00:57 -05:00			`)`
			`assert response is not None`
			`assert response.status == "incomplete"`
			`assert response.incomplete_details.reason == "max_output_tokens"`