[Frontend] Add sampling parameters to Responses API (#32609)

Signed-off-by: Daniel Mescheder <dmesch@amazon.com> Co-authored-by: Daniel Mescheder <dmesch@amazon.com>
2026-02-03 06:51:10 +01:00
parent 10546f925a
commit 4c4b6f7a97
3 changed files with 165 additions and 2 deletions
--- a/tests/entrypoints/openai/responses/test_simple.py
+++ b/tests/entrypoints/openai/responses/test_simple.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

-
 import pytest
 import pytest_asyncio
 from openai import OpenAI
@@ -147,3 +146,27 @@ async def test_max_tokens(client: OpenAI, model_name: str):
    assert response is not None
    assert response.status == "incomplete"
    assert response.incomplete_details.reason == "max_output_tokens"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+async def test_extra_sampling_params(client: OpenAI, model_name: str):
+    """Test that extra sampling parameters are accepted and work."""
+    # Test with multiple sampling parameters - just verify they're accepted
+    response = await client.responses.create(
+        model=model_name,
+        input="Write a short sentence",
+        max_output_tokens=50,
+        temperature=0.7,
+        top_p=0.9,
+        extra_body={
+            "top_k": 40,
+            "repetition_penalty": 1.2,
+            "seed": 42,
+        },
+    )
+
+    # Verify request succeeded and parameters were accepted
+    assert response.status in ["completed", "incomplete"]
+    assert len(response.output) > 0
+    assert response.output[0].content[0].text  # Has text output