[Frontend] Introduce Renderer for processing chat messages (using ModelConfig) (#30200)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-01-22 20:44:22 +08:00
parent 421012b63a
commit d117a4d1a9
48 changed files with 2141 additions and 1585 deletions
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -7,21 +7,14 @@ from typing import Literal

 import pytest
 import torch
-from mistral_common.tokens.tokenizers.base import SpecialTokenPolicy

 from vllm.assets.audio import AudioAsset
 from vllm.assets.image import ImageAsset
 from vllm.assets.video import VideoAsset
 from vllm.config import ModelConfig
 from vllm.entrypoints.chat_utils import (
-    _try_extract_ast,
-    apply_mistral_chat_template,
-    load_chat_template,
    parse_chat_messages,
-    parse_chat_messages_futures,
-    resolve_chat_template_content_format,
-    resolve_chat_template_kwargs,
-    resolve_hf_chat_template,
+    parse_chat_messages_async,
 )
 from vllm.multimodal import MultiModalDataDict, MultiModalUUIDDict
 from vllm.multimodal.utils import (
@@ -29,24 +22,11 @@ from vllm.multimodal.utils import (
    encode_image_url,
    encode_video_url,
 )
-from vllm.tokenizers import get_tokenizer
-from vllm.tokenizers.mistral import MistralTokenizer
 from vllm.utils.serial_utils import tensor2base64

-from ..models.registry import HF_EXAMPLE_MODELS
-from ..utils import VLLM_PATH
-
-EXAMPLES_DIR = VLLM_PATH / "examples"
-
 PHI3V_MODEL_ID = "microsoft/Phi-3.5-vision-instruct"
-ULTRAVOX_MODEL_ID = "fixie-ai/ultravox-v0_5-llama-3_2-1b"
 QWEN2AUDIO_MODEL_ID = "Qwen/Qwen2-Audio-7B-Instruct"
-QWEN2VL_MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
-QWEN25VL_MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
 QWEN25OMNI_MODEL_ID = "Qwen/Qwen2.5-Omni-7B"
-QWEN3_MODEL_ID = "Qwen/Qwen3-8B"
-LLAMA_GUARD_MODEL_ID = "meta-llama/Llama-Guard-3-1B"
-HERMES_MODEL_ID = "NousResearch/Hermes-3-Llama-3.1-8B"
 MISTRAL_MODEL_ID = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"


@@ -469,7 +449,7 @@ async def test_parse_chat_messages_single_image_with_uuid_async(
    image_url,
 ):
    image_uuid = str(hash(image_url))
-    conversation, mm_future, mm_uuids = parse_chat_messages_futures(
+    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        [
            {
                "role": "user",
@@ -490,7 +470,7 @@ async def test_parse_chat_messages_single_image_with_uuid_async(
    assert conversation == [
        {"role": "user", "content": "<|image_1|>\nWhat's in the image?"}
    ]
-    _assert_mm_data_is_image_input(await mm_future, 1)
+    _assert_mm_data_is_image_input(mm_data, 1)
    _assert_mm_uuids(mm_uuids, 1, expected_uuids=[image_uuid])


@@ -500,7 +480,7 @@ async def test_parse_chat_messages_empty_image_with_uuid_async(
    image_url,
 ):
    image_uuid = str(hash(image_url))
-    conversation, mm_future, mm_uuids = parse_chat_messages_futures(
+    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        [
            {
                "role": "user",
@@ -521,7 +501,7 @@ async def test_parse_chat_messages_empty_image_with_uuid_async(
    assert conversation == [
        {"role": "user", "content": "<|image_1|>\nWhat's in the image?"}
    ]
-    _assert_mm_data_is_image_input(await mm_future, 1, skipped_image_indices=[0])
+    _assert_mm_data_is_image_input(mm_data, 1, skipped_image_indices=[0])
    _assert_mm_uuids(mm_uuids, 1, expected_uuids=[image_uuid])


@@ -533,7 +513,7 @@ async def test_parse_chat_messages_multiple_images_with_uuids_async(
    image_uuid1 = "my_uuid_1"
    image_uuid2 = "my_uuid_2"

-    conversation, mm_future, mm_uuids = parse_chat_messages_futures(
+    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        [
            {
                "role": "user",
@@ -562,7 +542,7 @@ async def test_parse_chat_messages_multiple_images_with_uuids_async(
            "content": "<|image_1|>\n<|image_2|>\nWhat's in these images?",
        }
    ]
-    _assert_mm_data_is_image_input(await mm_future, 2)
+    _assert_mm_data_is_image_input(mm_data, 2)
    _assert_mm_uuids(mm_uuids, 2, expected_uuids=[image_uuid1, image_uuid2])


@@ -574,7 +554,7 @@ async def test_parse_chat_messages_multiple_empty_images_with_uuids_async(
    image_uuid1 = "my_uuid_1"
    image_uuid2 = "my_uuid_2"

-    conversation, mm_future, mm_uuids = parse_chat_messages_futures(
+    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        [
            {
                "role": "user",
@@ -603,7 +583,7 @@ async def test_parse_chat_messages_multiple_empty_images_with_uuids_async(
            "content": "<|image_1|>\n<|image_2|>\nWhat's in these images?",
        }
    ]
-    _assert_mm_data_is_image_input(await mm_future, 2, skipped_image_indices=[0, 1])
+    _assert_mm_data_is_image_input(mm_data, 2, skipped_image_indices=[0, 1])
    _assert_mm_uuids(mm_uuids, 2, expected_uuids=[image_uuid1, image_uuid2])


@@ -614,7 +594,7 @@ async def test_parse_chat_messages_multiple_images_with_partial_uuids_async(
 ):
    image_uuid2 = "my_uuid_2"

-    conversation, mm_future, mm_uuids = parse_chat_messages_futures(
+    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        [
            {
                "role": "user",
@@ -642,7 +622,7 @@ async def test_parse_chat_messages_multiple_images_with_partial_uuids_async(
            "content": "<|image_1|>\n<|image_2|>\nWhat's in these images?",
        }
    ]
-    _assert_mm_data_is_image_input(await mm_future, 2)
+    _assert_mm_data_is_image_input(mm_data, 2)
    _assert_mm_uuids(mm_uuids, 2, expected_uuids=[None, image_uuid2])


@@ -689,7 +669,7 @@ async def test_parse_chat_messages_single_image_async(
    phi3v_model_config,
    image_url,
 ):
-    conversation, mm_future, mm_uuids = parse_chat_messages_futures(
+    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        [
            {
                "role": "user",
@@ -706,7 +686,7 @@ async def test_parse_chat_messages_single_image_async(
    assert conversation == [
        {"role": "user", "content": "<|image_1|>\nWhat's in the image?"}
    ]
-    _assert_mm_data_is_image_input(await mm_future, 1)
+    _assert_mm_data_is_image_input(mm_data, 1)
    _assert_mm_uuids(mm_uuids, 1, expected_uuids=[None])


@@ -890,7 +870,7 @@ async def test_parse_chat_messages_audio_embeds_async(
    # Encode it as base64
    base64_audio_embedding = tensor2base64(audio_embedding)

-    conversation, mm_future, mm_uuids = parse_chat_messages_futures(
+    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        [
            {
                "role": "user",
@@ -908,7 +888,6 @@ async def test_parse_chat_messages_audio_embeds_async(
    )

    # Should have audio embedding in mm_data (single tensor, not a list)
-    mm_data = await mm_future
    assert mm_data is not None
    assert "audio" in mm_data
    assert isinstance(mm_data["audio"], torch.Tensor)
@@ -1050,7 +1029,7 @@ async def test_parse_chat_messages_multiple_image_embeds_async(
    base64_image_embedding_1 = tensor2base64(image_embedding_1)
    base64_image_embedding_2 = tensor2base64(image_embedding_2)

-    conversation, mm_future, mm_uuids = parse_chat_messages_futures(
+    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        [
            {
                "role": "user",
@@ -1080,7 +1059,6 @@ async def test_parse_chat_messages_multiple_image_embeds_async(
    ]

    # Await the future and verify mm_data
-    mm_data = await mm_future
    assert mm_data is not None
    assert "image" in mm_data
    assert isinstance(mm_data["image"], list)
@@ -1101,7 +1079,7 @@ async def test_parse_chat_messages_empty_image_embeds_with_uuid_async(
    phi3v_model_config_image_embeds,
 ):
    uuid = "abcd"
-    conversation, mm_future, mm_uuids = parse_chat_messages_futures(
+    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        [
            {
                "role": "user",
@@ -1121,7 +1099,6 @@ async def test_parse_chat_messages_empty_image_embeds_with_uuid_async(
            "content": "<|image_1|>\nWhat's in this image?",
        }
    ]
-    mm_data = await mm_future
    assert mm_data is not None
    assert "image" in mm_data
    assert isinstance(mm_data["image"], list)
@@ -1228,7 +1205,7 @@ async def test_parse_chat_messages_multiple_images_async(
    phi3v_model_config,
    image_url,
 ):
-    conversation, mm_future, mm_uuids = parse_chat_messages_futures(
+    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        [
            {
                "role": "user",
@@ -1252,7 +1229,7 @@ async def test_parse_chat_messages_multiple_images_async(
            "content": "<|image_1|>\n<|image_2|>\nWhat's in these images?",
        }
    ]
-    _assert_mm_data_is_image_input(await mm_future, 2)
+    _assert_mm_data_is_image_input(mm_data, 2)
    _assert_mm_uuids(mm_uuids, 2, expected_uuids=[None, None])


@@ -1582,7 +1559,7 @@ async def test_parse_chat_messages_multiple_images_interleave_async(
    phi3v_model_config_mm_interleaved,
    image_url,
 ):
-    conversation, mm_data, mm_uuids = parse_chat_messages_futures(
+    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        [
            {
                "role": "user",
@@ -1609,7 +1586,7 @@ async def test_parse_chat_messages_multiple_images_interleave_async(
            "Do they have differences?",
        }
    ]
-    _assert_mm_data_is_image_input(await mm_data, 2)
+    _assert_mm_data_is_image_input(mm_data, 2)
    _assert_mm_uuids(mm_uuids, 2, expected_uuids=[None, None])


@@ -1619,7 +1596,7 @@ async def test_parse_chat_messages_multiple_images_with_uuids_interleave_async(
    image_url,
 ):
    image_uuid = str(hash(image_url))
-    conversation, mm_data, mm_uuids = parse_chat_messages_futures(
+    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        [
            {
                "role": "user",
@@ -1654,7 +1631,7 @@ async def test_parse_chat_messages_multiple_images_with_uuids_interleave_async(
            "Do they have differences?",
        }
    ]
-    _assert_mm_data_is_image_input(await mm_data, 2)
+    _assert_mm_data_is_image_input(mm_data, 2)
    _assert_mm_uuids(mm_uuids, 2, expected_uuids=[image_uuid, image_uuid])


@@ -2030,377 +2007,6 @@ def test_parse_chat_messages_multiple_images_interleave_with_placeholders(
        )


-@pytest.mark.parametrize(
-    "model",
-    [
-        QWEN2VL_MODEL_ID,  # tokenizer.chat_template is of type str
-        HERMES_MODEL_ID,  # tokenizer.chat_template is of type dict
-    ],
-)
-@pytest.mark.parametrize("use_tools", [True, False])
-def test_resolve_hf_chat_template(sample_json_schema, model, use_tools):
-    """checks that chat_template is a dict type for HF models."""
-    model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
-    model_info.check_available_online(on_fail="skip")
-
-    model_config = ModelConfig(
-        model,
-        tokenizer=model_info.tokenizer or model,
-        tokenizer_mode=model_info.tokenizer_mode,
-        revision=model_info.revision,
-        trust_remote_code=model_info.trust_remote_code,
-        hf_overrides=model_info.hf_overrides,
-        skip_tokenizer_init=model_info.require_embed_inputs,
-        enable_prompt_embeds=model_info.require_embed_inputs,
-        enable_mm_embeds=model_info.require_embed_inputs,
-        enforce_eager=model_info.enforce_eager,
-        dtype=model_info.dtype,
-    )
-
-    # Build the tokenizer
-    tokenizer = get_tokenizer(
-        model,
-        trust_remote_code=model_config.trust_remote_code,
-    )
-
-    tools = (
-        [
-            {
-                "type": "function",
-                "function": {
-                    "name": "dummy_function_name",
-                    "description": "This is a dummy function",
-                    "parameters": sample_json_schema,
-                },
-            }
-        ]
-        if use_tools
-        else None
-    )
-
-    # Test detecting the tokenizer's chat_template
-    chat_template = resolve_hf_chat_template(
-        tokenizer,
-        chat_template=None,
-        tools=tools,
-        model_config=model_config,
-    )
-    assert isinstance(chat_template, str)
-
-
-@pytest.mark.parametrize(
-    "model, expected_kwargs",
-    [
-        (
-            QWEN2VL_MODEL_ID,
-            {
-                "add_vision_id",
-                "add_generation_prompt",
-                "continue_final_message",
-                "tools",
-            },
-        ),
-        (
-            QWEN3_MODEL_ID,
-            {
-                "enable_thinking",
-                "add_generation_prompt",
-                "continue_final_message",
-                "tools",
-            },
-        ),
-    ],
-)
-def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwargs):
-    """checks that chat_template is a dict type for HF models."""
-    model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
-    model_info.check_available_online(on_fail="skip")
-
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "dummy_function_name",
-                "description": "This is a dummy function",
-                "parameters": sample_json_schema,
-            },
-        }
-    ]
-
-    chat_template_kwargs = {
-        # both unused
-        "unsed_kwargs_1": 123,
-        "unsed_kwargs_2": "abc",
-        # should not appear
-        "chat_template": "{% Hello world! %}",
-        "tokenize": True,
-        # used by tokenizer
-        "continue_final_message": True,
-        "tools": tools,
-        # both used by Qwen2-VL and Qwen3
-        "add_generation_prompt": True,
-        # only used by Qwen2-VL
-        "add_vision_id": True,
-        # only used by Qwen3
-        "enable_thinking": True,
-    }
-
-    model_config = ModelConfig(
-        model,
-        tokenizer=model_info.tokenizer or model,
-        tokenizer_mode=model_info.tokenizer_mode,
-        revision=model_info.revision,
-        trust_remote_code=model_info.trust_remote_code,
-        hf_overrides=model_info.hf_overrides,
-        skip_tokenizer_init=model_info.require_embed_inputs,
-        enable_prompt_embeds=model_info.require_embed_inputs,
-        enable_mm_embeds=model_info.require_embed_inputs,
-        enforce_eager=model_info.enforce_eager,
-        dtype=model_info.dtype,
-    )
-
-    # Build the tokenizer
-    tokenizer = get_tokenizer(
-        model,
-        trust_remote_code=model_config.trust_remote_code,
-    )
-
-    # Test detecting the tokenizer's chat_template
-    chat_template = resolve_hf_chat_template(
-        tokenizer,
-        chat_template=None,
-        tools=tools,
-        model_config=model_config,
-    )
-    with pytest.raises(
-        ValueError, match="Found unexpected chat template kwargs from request"
-    ):
-        # should raise error if `chat_template_kwargs` contains
-        # `chat_template` or `tokenize`
-        resolve_chat_template_kwargs(
-            tokenizer,
-            chat_template=chat_template,
-            chat_template_kwargs=chat_template_kwargs,
-        )
-    resolved_chat_template_kwargs = resolve_chat_template_kwargs(
-        tokenizer,
-        chat_template=chat_template,
-        chat_template_kwargs=chat_template_kwargs,
-        raise_on_unexpected=False,
-    )
-    assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs
-
-    # Additional test: Verify HF base parameters work with **kwargs tokenizers
-    # This validates the fix for tokenizers like Kimi K2 that use **kwargs
-    # to receive standard HuggingFace parameters instead of declaring them explicitly
-    from vllm.entrypoints.chat_utils import _get_hf_base_chat_template_params
-
-    hf_base_params = _get_hf_base_chat_template_params()
-    # Verify common HF parameters are in the base class
-    assert {"add_generation_prompt", "tools", "continue_final_message"}.issubset(
-        hf_base_params
-    ), f"Expected HF base params not found in {hf_base_params}"
-
-    # Test with a mock tokenizer that uses **kwargs (like Kimi K2)
-    class MockTokenizerWithKwargs:
-        def apply_chat_template(self, conversation, **kwargs):
-            return "mocked_output"
-
-    mock_tokenizer = MockTokenizerWithKwargs()
-    mock_kwargs = {
-        "add_generation_prompt": True,
-        "tools": tools,
-        "continue_final_message": False,
-        "unknown_param": "should_be_filtered",
-    }
-    resolved_mock = resolve_chat_template_kwargs(
-        mock_tokenizer, chat_template, mock_kwargs, raise_on_unexpected=False
-    )
-    # HF base params should pass through even with **kwargs tokenizer
-    assert "add_generation_prompt" in resolved_mock
-    assert "tools" in resolved_mock
-    assert "continue_final_message" in resolved_mock
-    # Unknown params should be filtered out
-    assert "unknown_param" not in resolved_mock
-
-
-# NOTE: Qwen2-Audio default chat template is specially defined inside
-# processor class instead of using `tokenizer_config.json`
-@pytest.mark.parametrize(
-    ("model", "expected_format"),
-    [
-        (PHI3V_MODEL_ID, "string"),
-        (QWEN2VL_MODEL_ID, "openai"),
-        (QWEN25VL_MODEL_ID, "openai"),
-        (ULTRAVOX_MODEL_ID, "string"),
-        (QWEN2AUDIO_MODEL_ID, "openai"),
-        (LLAMA_GUARD_MODEL_ID, "openai"),
-    ],
-)
-def test_resolve_content_format_hf_defined(model, expected_format):
-    model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
-    model_info.check_available_online(on_fail="skip")
-
-    model_config = ModelConfig(
-        model,
-        tokenizer=model_info.tokenizer or model,
-        tokenizer_mode=model_info.tokenizer_mode,
-        revision=model_info.revision,
-        trust_remote_code=model_info.trust_remote_code,
-        hf_overrides=model_info.hf_overrides,
-        skip_tokenizer_init=model_info.require_embed_inputs,
-        enable_prompt_embeds=model_info.require_embed_inputs,
-        enable_mm_embeds=model_info.require_embed_inputs,
-        enforce_eager=model_info.enforce_eager,
-        dtype=model_info.dtype,
-    )
-
-    tokenizer = get_tokenizer(
-        model,
-        trust_remote_code=model_config.trust_remote_code,
-    )
-
-    # Test detecting the tokenizer's chat_template
-    chat_template = resolve_hf_chat_template(
-        tokenizer,
-        chat_template=None,
-        tools=None,
-        model_config=model_config,
-    )
-    assert isinstance(chat_template, str)
-
-    print("[TEXT]")
-    print(chat_template)
-    print("[AST]")
-    print(_try_extract_ast(chat_template))
-
-    resolved_format = resolve_chat_template_content_format(
-        None,  # Test detecting the tokenizer's chat_template
-        None,
-        "auto",
-        tokenizer,
-        model_config=model_config,
-    )
-
-    assert resolved_format == expected_format
-
-
-@pytest.mark.parametrize(
-    ("model", "expected_format"),
-    [
-        ("Salesforce/blip2-opt-2.7b", "string"),
-        ("facebook/chameleon-7b", "string"),
-        ("deepseek-ai/deepseek-vl2-tiny", "string"),
-        ("adept/fuyu-8b", "string"),
-        ("google/paligemma-3b-mix-224", "string"),
-        ("Qwen/Qwen-VL", "string"),
-        ("Qwen/Qwen-VL-Chat", "string"),
-    ],
-)
-def test_resolve_content_format_fallbacks(model, expected_format):
-    model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
-    model_info.check_available_online(on_fail="skip")
-
-    model_config = ModelConfig(
-        model,
-        tokenizer=model_info.tokenizer or model,
-        tokenizer_mode=model_info.tokenizer_mode,
-        revision=model_info.revision,
-        trust_remote_code=model_info.trust_remote_code,
-        hf_overrides=model_info.hf_overrides,
-        skip_tokenizer_init=model_info.require_embed_inputs,
-        enable_prompt_embeds=model_info.require_embed_inputs,
-        enable_mm_embeds=model_info.require_embed_inputs,
-        enforce_eager=model_info.enforce_eager,
-        dtype=model_info.dtype,
-    )
-
-    tokenizer = get_tokenizer(
-        model_config.tokenizer,
-        trust_remote_code=model_config.trust_remote_code,
-    )
-
-    # Test detecting the tokenizer's chat_template
-    chat_template = resolve_hf_chat_template(
-        tokenizer,
-        chat_template=None,
-        tools=None,
-        model_config=model_config,
-    )
-    assert isinstance(chat_template, str)
-
-    print("[TEXT]")
-    print(chat_template)
-    print("[AST]")
-    print(_try_extract_ast(chat_template))
-
-    resolved_format = resolve_chat_template_content_format(
-        None,  # Test detecting the tokenizer's chat_template
-        None,
-        "auto",
-        tokenizer,
-        model_config=model_config,
-    )
-
-    assert resolved_format == expected_format
-
-
-@pytest.mark.parametrize(
-    ("template_path", "expected_format"),
-    [
-        ("template_alpaca.jinja", "string"),
-        ("template_baichuan.jinja", "string"),
-        ("template_chatglm.jinja", "string"),
-        ("template_chatglm2.jinja", "string"),
-        ("template_chatml.jinja", "string"),
-        ("template_dse_qwen2_vl.jinja", "openai"),
-        ("template_falcon_180b.jinja", "string"),
-        ("template_falcon.jinja", "string"),
-        ("template_inkbot.jinja", "string"),
-        ("template_teleflm.jinja", "string"),
-        ("template_vlm2vec_phi3v.jinja", "openai"),
-        ("template_vlm2vec_qwen2vl.jinja", "openai"),
-        ("tool_chat_template_granite_20b_fc.jinja", "string"),
-        ("tool_chat_template_hermes.jinja", "string"),
-        ("tool_chat_template_internlm2_tool.jinja", "string"),
-        ("tool_chat_template_llama3.1_json.jinja", "openai"),
-        ("tool_chat_template_llama3.2_json.jinja", "openai"),
-        ("tool_chat_template_mistral_parallel.jinja", "string"),
-        ("tool_chat_template_mistral.jinja", "string"),
-    ],
-)
-def test_resolve_content_format_examples(template_path, expected_format):
-    model_config = ModelConfig(
-        PHI3V_MODEL_ID,  # Dummy
-        tokenizer=PHI3V_MODEL_ID,  # Dummy
-        trust_remote_code=True,
-    )
-
-    dummy_tokenizer = get_tokenizer(
-        PHI3V_MODEL_ID,  # Dummy
-        trust_remote_code=model_config.trust_remote_code,
-    )
-    dummy_tokenizer.chat_template = None
-
-    chat_template = load_chat_template(EXAMPLES_DIR / template_path)
-    assert isinstance(chat_template, str)
-
-    print("[TEXT]")
-    print(chat_template)
-    print("[AST]")
-    print(_try_extract_ast(chat_template))
-
-    resolved_format = resolve_chat_template_content_format(
-        chat_template,
-        None,
-        "auto",
-        dummy_tokenizer,
-        model_config=model_config,
-    )
-
-    assert resolved_format == expected_format
-
-
 def test_parse_chat_messages_include_thinking_chunk(mistral_model_config):
    messages = [
        {
@@ -2462,56 +2068,6 @@ def test_parse_chat_messages_include_thinking_chunk(mistral_model_config):
    assert conversation_with_thinking == expected_conversation


-def test_apply_mistral_chat_template_thinking_chunk():
-    messages = [
-        {
-            "role": "system",
-            "content": [
-                {"type": "text", "text": "You are a helpful assistant."},
-                {
-                    "type": "thinking",
-                    "closed": True,
-                    "thinking": "Only return the answer when you are confident.",
-                },
-            ],
-        },
-        {"role": "user", "content": "What is 2+2?"},
-        {
-            "role": "assistant",
-            "content": [
-                {"type": "text", "text": "Let me think about it."},
-                {"type": "thinking", "closed": True, "thinking": "2+2 = 4"},
-                {
-                    "type": "text",
-                    "text": "The answer is 4.",
-                },
-            ],
-        },
-        {"role": "user", "content": "Thanks, what is 3+3?"},
-    ]
-    mistral_tokenizer = MistralTokenizer.from_pretrained(
-        "mistralai/Magistral-Small-2509"
-    )
-
-    tokens_ids = apply_mistral_chat_template(
-        mistral_tokenizer, messages, chat_template=None, tools=None
-    )
-
-    string_tokens = mistral_tokenizer.mistral.decode(
-        tokens_ids, special_token_policy=SpecialTokenPolicy.KEEP
-    )
-
-    expected_tokens = (
-        r"<s>[SYSTEM_PROMPT]You are a helpful assistant.[THINK]Only return the"
-        r" answer when you are confident.[/THINK][/SYSTEM_PROMPT]"
-        r"[INST]What is 2+2?[/INST]"
-        r"Let me think about it.[THINK]2+2 = 4[/THINK]The answer is 4.</s>"
-        r"[INST]Thanks, what is 3+3?[/INST]"
-    )
-
-    assert string_tokens == expected_tokens
-
-
 def test_parse_chat_messages_single_empty_audio_with_uuid(
    qwen2_audio_model_config,
 ):
@@ -2550,7 +2106,7 @@ async def test_parse_chat_messages_single_empty_audio_with_uuid_async(
    qwen2_audio_model_config,
 ):
    audio_uuid = "abcd"
-    conversation, mm_future, mm_uuids = parse_chat_messages_futures(
+    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        [
            {
                "role": "user",
@@ -2575,5 +2131,5 @@ async def test_parse_chat_messages_single_empty_audio_with_uuid_async(
            "audio say?",
        }
    ]
-    _assert_mm_data_inputs(await mm_future, {"audio": 1})
+    _assert_mm_data_inputs(mm_data, {"audio": 1})
    _assert_mm_uuids(mm_uuids, 1, modality="audio", expected_uuids=[audio_uuid])