[Frontend] Add sampling parameters to Responses API (#32609)

Signed-off-by: Daniel Mescheder <dmesch@amazon.com>
Co-authored-by: Daniel Mescheder <dmesch@amazon.com>
This commit is contained in:
Daniel Mescheder
2026-02-03 06:51:10 +01:00
committed by GitHub
parent 10546f925a
commit 4c4b6f7a97
3 changed files with 165 additions and 2 deletions

View File

@@ -1,7 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
import pytest_asyncio
from openai import OpenAI
@@ -147,3 +146,27 @@ async def test_max_tokens(client: OpenAI, model_name: str):
assert response is not None
assert response.status == "incomplete"
assert response.incomplete_details.reason == "max_output_tokens"
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_extra_sampling_params(client: OpenAI, model_name: str):
"""Test that extra sampling parameters are accepted and work."""
# Test with multiple sampling parameters - just verify they're accepted
response = await client.responses.create(
model=model_name,
input="Write a short sentence",
max_output_tokens=50,
temperature=0.7,
top_p=0.9,
extra_body={
"top_k": 40,
"repetition_penalty": 1.2,
"seed": 42,
},
)
# Verify request succeeded and parameters were accepted
assert response.status in ["completed", "incomplete"]
assert len(response.output) > 0
assert response.output[0].content[0].text # Has text output