[Bugfix] Fix crash when tool_choice=required exceeds max_tokens (#36841)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
Chauncey
2026-03-12 18:28:45 +08:00
committed by GitHub
parent f0d3658c0f
commit 5a71cdd76e
3 changed files with 36 additions and 9 deletions

View File

@@ -1507,7 +1507,7 @@ class OpenAIServingChat(OpenAIServing):
elif request.tool_choice and request.tool_choice == "required":
tool_call_class_items = []
assert tool_calls is not None and len(tool_calls) > 0
tool_calls = tool_calls or []
for idx, tool_call in enumerate(tool_calls):
# Use native ID if available,
# otherwise generate ID with correct id_type

View File

@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio
import contextlib
import json
import time
from collections.abc import AsyncGenerator, Callable, Mapping, Sequence
@@ -13,7 +14,7 @@ from fastapi import Request
from openai.types.responses import (
ToolChoiceFunction,
)
from pydantic import ConfigDict, TypeAdapter
from pydantic import ConfigDict, TypeAdapter, ValidationError
from starlette.datastructures import Headers
import vllm.envs as envs
@@ -1125,17 +1126,19 @@ class OpenAIServing:
)
content = None # Clear content since tool is called.
elif request.tool_choice == "required":
assert content is not None
tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(content)
function_calls.extend(
[
tool_calls = []
with contextlib.suppress(ValidationError):
content = content or ""
tool_calls = TypeAdapter(list[FunctionDefinition]).validate_json(
content
)
for tool_call in tool_calls:
function_calls.append(
FunctionCall(
name=tool_call.name,
arguments=json.dumps(tool_call.parameters, ensure_ascii=False),
)
for tool_call in tool_calls
]
)
)
content = None # Clear content since tool is called.
elif (
tool_parser_cls