From 7bec4351305f7b99ff6385b778760d301e29a6f0 Mon Sep 17 00:00:00 2001
From: Chauncey <chaunceyjiang@gmail.com>
Date: Sat, 7 Feb 2026 01:23:44 +0800
Subject: [PATCH] [Bugfix] Fix the issue where tool calling does not work when
 using fast detokenization with dsv32 (#33964)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/tool_parsers/deepseekv32_tool_parser.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py
index 49c9540d6..30e23ed9f 100644
--- a/vllm/tool_parsers/deepseekv32_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv32_tool_parser.py
@@ -110,6 +110,18 @@ class DeepSeekV32ToolParser(ToolParser):
         """Generate a unique tool call ID."""
         return f"call_{uuid.uuid4().hex[:24]}"
 
+    def adjust_request(self, request):
+        request = super().adjust_request(request)
+        if request.tools and request.tool_choice != "none":
+            # Ensure tool call tokens
+            # (<｜DSML｜function_calls>, </｜DSML｜function_calls>)
+            # are not skippedduring decoding.
+            # Even though they are not marked as special tokens,
+            # setting skip_special_tokens=False ensures proper handling in
+            # transformers 5.x where decoding behavior may have changed.
+            request.skip_special_tokens = False
+        return request
+
     def _reset_streaming_state(self):
         """Reset all streaming state."""
         self.current_tool_index = 0