[Frontend] Do prompt_logprobs clamping for chat as well as completions (#14225)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-03-04 21:13:06 +01:00
committed by GitHub
parent 9badee53de
commit e5b2f1601a
3 changed files with 22 additions and 11 deletions

View File

@@ -24,7 +24,8 @@ from vllm.entrypoints.openai.protocol import (
RequestResponseMetadata, ToolCall, UsageInfo)
from vllm.entrypoints.openai.reasoning_parsers import (ReasoningParser,
ReasoningParserManager)
from vllm.entrypoints.openai.serving_engine import OpenAIServing
from vllm.entrypoints.openai.serving_engine import (OpenAIServing,
clamp_prompt_logprobs)
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import (
@@ -844,7 +845,7 @@ class OpenAIServingChat(OpenAIServing):
model=model_name,
choices=choices,
usage=usage,
prompt_logprobs=final_res.prompt_logprobs,
prompt_logprobs=clamp_prompt_logprobs(final_res.prompt_logprobs),
)
return response