tests/renderers/test_mistral.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import asyncio
import time
from dataclasses import dataclass
from typing import Any
from unittest.mock import Mock

import pytest
from mistral_common.tokens.tokenizers.base import SpecialTokenPolicy

from vllm.renderers import ChatParams
from vllm.renderers.mistral import MistralRenderer, safe_apply_chat_template
from vllm.tokenizers.mistral import MistralTokenizer

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"


@dataclass
class MockHFConfig:
    model_type: str = "any"


@dataclass
class MockModelConfig:
    runner_type = "generate"
    model: str = MODEL_NAME
    tokenizer: str = MODEL_NAME
    trust_remote_code: bool = False
    max_model_len: int = 100
    tokenizer_revision = None
    tokenizer_mode = "mistral"
    hf_config = MockHFConfig()
    encoder_config: dict[str, Any] | None = None
    enable_prompt_embeds: bool = True
    skip_tokenizer_init: bool = False
    is_encoder_decoder: bool = False
    is_multimodal_model: bool = False


@dataclass
class MockVllmConfig:
    model_config: MockModelConfig


@pytest.mark.asyncio
async def test_async_mistral_tokenizer_does_not_block_event_loop():
    expected_tokens = [1, 2, 3]

    # Mock the blocking version to sleep
    def mocked_apply_chat_template(*_args, **_kwargs):
        time.sleep(2)
        return expected_tokens

    mock_model_config = MockModelConfig(skip_tokenizer_init=True)
    mock_tokenizer = Mock(spec=MistralTokenizer)
    mock_tokenizer.apply_chat_template = mocked_apply_chat_template
    mock_renderer = MistralRenderer(
        MockVllmConfig(mock_model_config),
        tokenizer=mock_tokenizer,
    )

    task = mock_renderer.render_messages_async([], ChatParams())

    # Ensure the event loop is not blocked
    blocked_count = 0
    for _i in range(20):  # Check over ~2 seconds
        start = time.perf_counter()
        await asyncio.sleep(0)
        elapsed = time.perf_counter() - start

        # an overly generous elapsed time for slow machines
        if elapsed >= 0.5:
            blocked_count += 1

        await asyncio.sleep(0.1)

    # Ensure task completes
    _, prompt = await task
    assert prompt["prompt_token_ids"] == expected_tokens, (
        "Mocked blocking tokenizer was not called"
    )
    assert blocked_count == 0, "Event loop blocked during tokenization"


def test_apply_mistral_chat_template_thinking_chunk():
    messages = [
        {
            "role": "system",
            "content": [
                {"type": "text", "text": "You are a helpful assistant."},
                {
                    "type": "thinking",
                    "closed": True,
                    "thinking": "Only return the answer when you are confident.",
                },
            ],
        },
        {"role": "user", "content": "What is 2+2?"},
        {
            "role": "assistant",
            "content": [
                {"type": "text", "text": "Let me think about it."},
                {"type": "thinking", "closed": True, "thinking": "2+2 = 4"},
                {
                    "type": "text",
                    "text": "The answer is 4.",
                },
            ],
        },
        {"role": "user", "content": "Thanks, what is 3+3?"},
    ]
    mistral_tokenizer = MistralTokenizer.from_pretrained(
        "mistralai/Magistral-Small-2509"
    )

    tokens_ids = safe_apply_chat_template(
        mistral_tokenizer, messages, chat_template=None, tools=None
    )

    string_tokens = mistral_tokenizer.mistral.decode(
        tokens_ids, special_token_policy=SpecialTokenPolicy.KEEP
    )

    expected_tokens = (
        r"<s>[SYSTEM_PROMPT]You are a helpful assistant.[THINK]Only return the"
        r" answer when you are confident.[/THINK][/SYSTEM_PROMPT]"
        r"[INST]What is 2+2?[/INST]"
        r"Let me think about it.[THINK]2+2 = 4[/THINK]The answer is 4.</s>"
        r"[INST]Thanks, what is 3+3?[/INST]"
    )

    assert string_tokens == expected_tokens
[Frontend] Introduce Renderer for processing chat messages (using `ModelConfig`) (#30200) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-22 20:44:22 +08:00			`# SPDX-License-Identifier: Apache-2.0`
			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`

			`import asyncio`
			`import time`
[Refactor] Consolidate sequence normalization and enc-dec parsing (#33928) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-02-06 23:43:47 +08:00			`from dataclasses import dataclass`
			`from typing import Any`
[Frontend] Introduce Renderer for processing chat messages (using `ModelConfig`) (#30200) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-22 20:44:22 +08:00			`from unittest.mock import Mock`

			`import pytest`
			`from mistral_common.tokens.tokenizers.base import SpecialTokenPolicy`

[Frontend] Use new Renderer for Completions and Tokenize API (#32863) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-31 20:51:15 +08:00			`from vllm.renderers import ChatParams`
[Frontend] Introduce Renderer for processing chat messages (using `ModelConfig`) (#30200) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-22 20:44:22 +08:00			`from vllm.renderers.mistral import MistralRenderer, safe_apply_chat_template`
			`from vllm.tokenizers.mistral import MistralTokenizer`

[Refactor] Consolidate sequence normalization and enc-dec parsing (#33928) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-02-06 23:43:47 +08:00			`MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"`


			`@dataclass`
			`class MockHFConfig:`
			`model_type: str = "any"`


			`@dataclass`
			`class MockModelConfig:`
			`runner_type = "generate"`
			`model: str = MODEL_NAME`
			`tokenizer: str = MODEL_NAME`
			`trust_remote_code: bool = False`
			`max_model_len: int = 100`
			`tokenizer_revision = None`
			`tokenizer_mode = "mistral"`
			`hf_config = MockHFConfig()`
			`encoder_config: dict[str, Any] \| None = None`
			`enable_prompt_embeds: bool = True`
			`skip_tokenizer_init: bool = False`
			`is_encoder_decoder: bool = False`
[Renderer] Move InputPreprocessor into Renderer (1/2) (#34510) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com> 2026-02-15 02:14:21 +08:00			`is_multimodal_model: bool = False`
[Refactor] Consolidate sequence normalization and enc-dec parsing (#33928) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-02-06 23:43:47 +08:00
[Frontend] Introduce Renderer for processing chat messages (using `ModelConfig`) (#30200) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-22 20:44:22 +08:00
[Refactor] Pass full VllmConfig to Renderer (#34485) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-02-13 14:48:38 +08:00			`@dataclass`
			`class MockVllmConfig:`
			`model_config: MockModelConfig`


[Frontend] Introduce Renderer for processing chat messages (using `ModelConfig`) (#30200) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-22 20:44:22 +08:00			`@pytest.mark.asyncio`
			`async def test_async_mistral_tokenizer_does_not_block_event_loop():`
			`expected_tokens = [1, 2, 3]`

			`# Mock the blocking version to sleep`
			`def mocked_apply_chat_template(_args, *_kwargs):`
			`time.sleep(2)`
			`return expected_tokens`

[Refactor] Consolidate sequence normalization and enc-dec parsing (#33928) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-02-06 23:43:47 +08:00			`mock_model_config = MockModelConfig(skip_tokenizer_init=True)`
[Frontend] Introduce Renderer for processing chat messages (using `ModelConfig`) (#30200) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-22 20:44:22 +08:00			`mock_tokenizer = Mock(spec=MistralTokenizer)`
			`mock_tokenizer.apply_chat_template = mocked_apply_chat_template`
[Refactor] Pass full VllmConfig to Renderer (#34485) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-02-13 14:48:38 +08:00			`mock_renderer = MistralRenderer(`
			`MockVllmConfig(mock_model_config),`
[Renderer] Move InputPreprocessor into Renderer (1/2) (#34510) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Cyrus Leung <cyrus.tl.leung@gmail.com> 2026-02-15 02:14:21 +08:00			`tokenizer=mock_tokenizer,`
[Refactor] Pass full VllmConfig to Renderer (#34485) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-02-13 14:48:38 +08:00			`)`
[Frontend] Introduce Renderer for processing chat messages (using `ModelConfig`) (#30200) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-22 20:44:22 +08:00
[Frontend] Use new Renderer for Completions and Tokenize API (#32863) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-31 20:51:15 +08:00			`task = mock_renderer.render_messages_async([], ChatParams())`
[Frontend] Introduce Renderer for processing chat messages (using `ModelConfig`) (#30200) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2026-01-22 20:44:22 +08:00
			`# Ensure the event loop is not blocked`
			`blocked_count = 0`
			`for _i in range(20): # Check over ~2 seconds`
			`start = time.perf_counter()`
			`await asyncio.sleep(0)`
			`elapsed = time.perf_counter() - start`

			`# an overly generous elapsed time for slow machines`
			`if elapsed >= 0.5:`
			`blocked_count += 1`

			`await asyncio.sleep(0.1)`

			`# Ensure task completes`
			`_, prompt = await task`
			`assert prompt["prompt_token_ids"] == expected_tokens, (`
			`"Mocked blocking tokenizer was not called"`
			`)`
			`assert blocked_count == 0, "Event loop blocked during tokenization"`


			`def test_apply_mistral_chat_template_thinking_chunk():`
			`messages = [`
			`{`
			`"role": "system",`
			`"content": [`
			`{"type": "text", "text": "You are a helpful assistant."},`
			`{`
			`"type": "thinking",`
			`"closed": True,`
			`"thinking": "Only return the answer when you are confident.",`
			`},`
			`],`
			`},`
			`{"role": "user", "content": "What is 2+2?"},`
			`{`
			`"role": "assistant",`
			`"content": [`
			`{"type": "text", "text": "Let me think about it."},`
			`{"type": "thinking", "closed": True, "thinking": "2+2 = 4"},`
			`{`
			`"type": "text",`
			`"text": "The answer is 4.",`
			`},`
			`],`
			`},`
			`{"role": "user", "content": "Thanks, what is 3+3?"},`
			`]`
			`mistral_tokenizer = MistralTokenizer.from_pretrained(`
			`"mistralai/Magistral-Small-2509"`
			`)`

			`tokens_ids = safe_apply_chat_template(`
			`mistral_tokenizer, messages, chat_template=None, tools=None`
			`)`

			`string_tokens = mistral_tokenizer.mistral.decode(`
			`tokens_ids, special_token_policy=SpecialTokenPolicy.KEEP`
			`)`

			`expected_tokens = (`
			`r"<s>[SYSTEM_PROMPT]You are a helpful assistant.[THINK]Only return the"`
			`r" answer when you are confident.[/THINK][/SYSTEM_PROMPT]"`
			`r"[INST]What is 2+2?[/INST]"`
			`r"Let me think about it.[THINK]2+2 = 4[/THINK]The answer is 4.</s>"`
			`r"[INST]Thanks, what is 3+3?[/INST]"`
			`)`

			`assert string_tokens == expected_tokens`