Add think chunk (#21333)

Signed-off-by: Julien Denize <julien.denize@mistral.ai>
2025-07-24 06:51:32 +02:00
parent 11ef7a611e
commit 6d8d0a24c0
11 changed files with 682 additions and 13 deletions
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -6,6 +6,10 @@ from collections.abc import Mapping
 from typing import Literal, Optional

 import pytest
+from mistral_common.tokens.tokenizers.base import (SpecialTokenPolicy,
+                                                   SpecialTokens)
+from mistral_common.tokens.tokenizers.tekken import (SpecialTokenInfo,
+                                                     Tekkenizer)

 from vllm.assets.audio import AudioAsset
 from vllm.assets.image import ImageAsset
@@ -21,6 +25,7 @@ from vllm.multimodal import MultiModalDataDict
 from vllm.multimodal.utils import (encode_audio_base64, encode_image_base64,
                                   encode_video_base64)
 from vllm.transformers_utils.tokenizer_group import TokenizerGroup
+from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer

 from ..models.registry import HF_EXAMPLE_MODELS
 from ..utils import VLLM_PATH
@@ -1374,3 +1379,165 @@ def test_resolve_content_format_examples(template_path, expected_format):
    )

    assert resolved_format == expected_format
+
+
+def test_parse_chat_messages_include_thinking_chunk(mistral_model_config,
+                                                    mistral_tokenizer):
+    messages = [{
+        "role":
+        "system",
+        "content": [{
+            "type": "text",
+            "text": "You are a helpful assistant."
+        }, {
+            "type":
+            "thinking",
+            "closed":
+            True,
+            "thinking":
+            "Only return the answer when you are confident."
+        }]
+    }, {
+        "role": "user",
+        "content": "What is 2+2?"
+    }, {
+        "role":
+        "assistant",
+        "content": [{
+            "type": "text",
+            "text": "Let me think about it."
+        }, {
+            "type": "thinking",
+            "closed": True,
+            "thinking": "2+2 = 4"
+        }, {
+            "type": "text",
+            "text": "The answer is 4.",
+        }],
+    }]
+
+    conversation_with_thinking, _ = parse_chat_messages(
+        messages,
+        mistral_model_config,
+        mistral_tokenizer,
+        content_format="openai",
+    )
+
+    expected_conversation = [{
+        "role":
+        "system",
+        "content": [{
+            "type": "text",
+            "text": "You are a helpful assistant."
+        }, {
+            "type": "text",
+            "text": "Only return the answer when you are confident."
+        }],
+    }, {
+        "role":
+        "user",
+        "content": [{
+            "type": "text",
+            "text": "What is 2+2?"
+        }],
+    }, {
+        "role":
+        "assistant",
+        "content": [
+            {
+                "type": "text",
+                "text": "Let me think about it."
+            },
+            {
+                "type": "text",
+                "text": "2+2 = 4"
+            },
+            {
+                "type": "text",
+                "text": "The answer is 4."
+            },
+        ]
+    }]
+
+    assert conversation_with_thinking == expected_conversation
+
+
+def test_apply_mistral_chat_template_thinking_chunk():
+    # Moved import here to avoid yapf and isort conflicts
+    from vllm.entrypoints.chat_utils import apply_mistral_chat_template
+    messages = [{
+        "role":
+        "system",
+        "content": [{
+            "type": "text",
+            "text": "You are a helpful assistant."
+        }, {
+            "type":
+            "thinking",
+            "closed":
+            True,
+            "thinking":
+            "Only return the answer when you are confident."
+        }]
+    }, {
+        "role": "user",
+        "content": "What is 2+2?"
+    }, {
+        "role":
+        "assistant",
+        "content": [{
+            "type": "text",
+            "text": "Let me think about it."
+        }, {
+            "type": "thinking",
+            "closed": True,
+            "thinking": "2+2 = 4"
+        }, {
+            "type": "text",
+            "text": "The answer is 4.",
+        }],
+    }, {
+        "role": "user",
+        "content": "Thanks, what is 3+3?"
+    }]
+
+    # TODO(Julien): upon model release change to a tokenizer already configured.
+    # =================================================================
+    mistral_tokenizer = MistralTokenizer.from_pretrained(
+        "mistralai/Devstral-Small-2507")
+    assert isinstance(mistral_tokenizer.tokenizer, Tekkenizer)
+    # Add think special tokens to the tokenizer
+    mistral_tokenizer.tokenizer._all_special_tokens[35] = SpecialTokenInfo(
+        rank=35, is_control=True, token_str=SpecialTokens.begin_think.value)
+    mistral_tokenizer.tokenizer._all_special_tokens[36] = SpecialTokenInfo(
+        rank=36, is_control=True, token_str=SpecialTokens.end_think.value)
+    mistral_tokenizer.tokenizer._special_tokens_reverse_vocab = {
+        k: v
+        for k, v in
+        mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
+        if v not in {35, 36}
+    }
+    mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
+        SpecialTokens.begin_think.value] = 35
+    mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
+        SpecialTokens.end_think.value] = 36
+    mistral_tokenizer.instruct.BEGIN_THINK = 35
+    mistral_tokenizer.instruct.END_THINK = 36
+    # =================================================================
+
+    tokens_ids = apply_mistral_chat_template(mistral_tokenizer,
+                                             messages,
+                                             chat_template=None,
+                                             tools=None)
+
+    string_tokens = mistral_tokenizer.mistral.decode(
+        tokens_ids, special_token_policy=SpecialTokenPolicy.KEEP)
+
+    expected_tokens = (
+        r"<s>[SYSTEM_PROMPT]You are a helpful assistant.[THINK]Only return the"
+        r" answer when you are confident.[/THINK][/SYSTEM_PROMPT]"
+        r"[INST]What is 2+2?[/INST]"
+        r"Let me think about it.[THINK]2+2 = 4[/THINK]The answer is 4.</s>"
+        r"[INST]Thanks, what is 3+3?[/INST]")
+
+    assert string_tokens == expected_tokens