more speculative decoding fixes
This commit is contained in:
@@ -57,6 +57,7 @@ from vllm.entrypoints.openai.engine.protocol import (
|
||||
FunctionCall,
|
||||
ToolCall,
|
||||
)
|
||||
from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
|
||||
from vllm.logger import init_logger
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
from vllm.tool_parsers.abstract_tool_parser import (
|
||||
@@ -171,6 +172,20 @@ class KimiK2ToolParser(ToolParser):
|
||||
"Successfully initialized %s", self.__class__.__name__
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Request adjustment
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def adjust_request(
|
||||
self, request: ChatCompletionRequest | ResponsesRequest
|
||||
) -> ChatCompletionRequest | ResponsesRequest:
|
||||
request = super().adjust_request(request)
|
||||
if request.tools and request.tool_choice != "none":
|
||||
# Ensure tool-call tokens (<|tool_calls_section_begin|>,
|
||||
# <|tool_call_begin|>, etc.) are not stripped during decoding.
|
||||
request.skip_special_tokens = False
|
||||
return request
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user