[Feature][Frontend]: Add support for stream_options in ChatCompletionRequest (#5135)

2024-06-07 06:29:24 +03:00
parent 15063741e3
commit baa15a9ec3
3 changed files with 149 additions and 10 deletions
--- a/tests/entrypoints/test_openai_server.py
+++ b/tests/entrypoints/test_openai_server.py
@@ -1343,5 +1343,106 @@ async def test_batch_embedding(embedding_server, client: openai.AsyncOpenAI,
    assert embeddings.usage.total_tokens == 17


+@pytest.mark.parametrize(
+    "model_name",
+    [MODEL_NAME],
+)
+async def test_stream_options(server, client: openai.AsyncOpenAI,
+                              model_name: str):
+    prompt = "What is the capital of France?"
+
+    # Test stream=True, stream_options=None
+    stream = await client.completions.create(
+        model=model_name,
+        prompt=prompt,
+        max_tokens=5,
+        temperature=0.0,
+        stream=True,
+        stream_options=None,
+    )
+    chunks = []
+    async for chunk in stream:
+        chunks.append(chunk.choices[0].text)
+    assert len(chunks) > 0
+    assert "usage" not in chunk
+
+    # Test stream=True, stream_options={"include_usage": False}
+    stream = await client.completions.create(
+        model=model_name,
+        prompt=prompt,
+        max_tokens=5,
+        temperature=0.0,
+        stream=True,
+        stream_options={"include_usage": False},
+    )
+    chunks = []
+    async for chunk in stream:
+        chunks.append(chunk.choices[0].text)
+    assert len(chunks) > 0
+    assert "usage" not in chunk
+
+    # Test stream=True, stream_options={"include_usage": True}
+    stream = await client.completions.create(
+        model=model_name,
+        prompt=prompt,
+        max_tokens=5,
+        temperature=0.0,
+        stream=True,
+        stream_options={"include_usage": True},
+    )
+    chunks = []
+    finish_reason_count = 0
+    async for chunk in stream:
+        if chunk.choices[0].finish_reason is None:
+            assert chunk.usage is None
+            chunks.append(chunk.choices[0].text)
+        else:
+            assert chunk.usage is None
+            finish_reason_count += 1
+
+    # The last message should have usage and no choices
+    last_message = await stream.__anext__()
+    assert last_message.usage is not None
+    assert last_message.usage.prompt_tokens > 0
+    assert last_message.usage.completion_tokens > 0
+    assert last_message.usage.total_tokens == (
+        last_message.usage.prompt_tokens +
+        last_message.usage.completion_tokens)
+    assert last_message.choices == []
+
+    # Test stream=False, stream_options={"include_usage": None}
+    with pytest.raises(BadRequestError):
+        await client.completions.create(
+            model=model_name,
+            prompt=prompt,
+            max_tokens=5,
+            temperature=0.0,
+            stream=False,
+            stream_options={"include_usage": None},
+        )
+
+    # Test stream=False, stream_options={"include_usage": False}
+    with pytest.raises(BadRequestError):
+        await client.completions.create(
+            model=model_name,
+            prompt=prompt,
+            max_tokens=5,
+            temperature=0.0,
+            stream=False,
+            stream_options={"include_usage": False},
+        )
+
+    # Test stream=False, stream_options={"include_usage": True}
+    with pytest.raises(BadRequestError):
+        await client.completions.create(
+            model=model_name,
+            prompt=prompt,
+            max_tokens=5,
+            temperature=0.0,
+            stream=False,
+            stream_options={"include_usage": True},
+        )
+
+
 if __name__ == "__main__":
    pytest.main([__file__])