[Bugfix] OpenAI entrypoint limits logprobs while ignoring server defined --max-logprobs (#5312)

Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
maor-ps
2024-06-11 05:30:31 +03:00
committed by GitHub
parent a008629807
commit 351d5e7b82
4 changed files with 12 additions and 9 deletions

View File

@@ -264,7 +264,9 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI,
prompt=[0, 0, 0, 0, 0],
max_tokens=5,
temperature=0.0,
logprobs=6,
# vLLM has higher default max_logprobs (20 instead of 5) to support
# both Completion API and Chat Completion API
logprobs=21,
)
...
with pytest.raises(
@@ -274,7 +276,9 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI,
prompt=[0, 0, 0, 0, 0],
max_tokens=5,
temperature=0.0,
logprobs=6,
# vLLM has higher default max_logprobs (20 instead of 5) to support
# both Completion API and Chat Completion API
logprobs=30,
stream=True,
)
async for chunk in stream: