Add think chunk (#21333)

Signed-off-by: Julien Denize <julien.denize@mistral.ai>
2025-07-24 06:51:32 +02:00
parent 11ef7a611e
commit 6d8d0a24c0
11 changed files with 682 additions and 13 deletions
--- a/vllm/transformers_utils/tokenizers/mistral.py
+++ b/vllm/transformers_utils/tokenizers/mistral.py
@@ -145,6 +145,21 @@ def find_tokenizer_file(files: list[str]):
    return matched_files[0]


+def _aggregate_content(content: list) -> list[dict[str, Any]]:
+    aggregated_content: list[dict[str, Any]] = []
+    for chunk in content:
+        if chunk.get("type"
+                     ) == "text" and aggregated_content and aggregated_content[
+                         -1].get("type") == "text":
+            aggregated_content[-1]["text"] += "\n\n" + chunk.get("text")
+        else:
+            aggregated_content.append(chunk)
+    if len(aggregated_content) == 1 and aggregated_content[0].get(
+            "type") == "text":
+        content = aggregated_content[0]["text"]
+    return content
+
+
 def make_mistral_chat_completion_request(
        messages: list["ChatCompletionMessageParam"],
        tools: Optional[list[dict[str,
@@ -162,10 +177,10 @@ def make_mistral_chat_completion_request(

        # Convert list text content to string
        if message.get("role") in ("assistant", "tool"):
-            content = message.get("content")
+            content: Any = message.get("content")
            if isinstance(content, list):
-                content = "\n".join(chunk.get("text") for chunk in content)
-                message["content"] = content
+                content = _aggregate_content(content)
+            message["content"] = content

    # The Mistral client, in comparison to the OpenAI client, requires the
    # "parameters" dict to be present, even if it's empty.
@@ -465,6 +480,8 @@ class MistralTokenizer(TokenizerBase):
        skip_special_tokens: bool = True,
    ) -> list[str]:
        from mistral_common.tokens.tokenizers.base import SpecialTokens
+        from mistral_common.tokens.tokenizers.instruct import (
+            InstructTokenizerV13)

        # TODO(Patrick) - potentially allow special tokens to not be skipped
        assert (
@@ -474,10 +491,18 @@ class MistralTokenizer(TokenizerBase):
        assert self.is_tekken or self.is_spm, type(self.tokenizer)

        if self.is_tekken:
-            # skip special tokens except tool call
-            ids = [
-                i for i in ids if i > self.tokenizer.num_special_tokens or i ==
+            # skip special tokens except tool call and think tokens
+            non_skip_special_tokens = {
                self.tokenizer.get_control_token(SpecialTokens.tool_calls)
+            }
+            if isinstance(self.instruct, InstructTokenizerV13):
+                if self.instruct.BEGIN_THINK:
+                    non_skip_special_tokens.add(self.instruct.BEGIN_THINK)
+                if self.instruct.END_THINK:
+                    non_skip_special_tokens.add(self.instruct.END_THINK)
+            ids = [
+                i for i in ids if i > self.tokenizer.num_special_tokens
+                or i in non_skip_special_tokens
            ]

        tokens = [self.tokenizer.id_to_piece(id) for id in ids]