Remove all references to yapf as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 17:18:11 +01:00
parent d6953beb91
commit 4e256cadc2
78 changed files with 1992 additions and 1717 deletions
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -15,6 +15,7 @@ from vllm.assets.video import VideoAsset
 from vllm.config import ModelConfig
 from vllm.entrypoints.chat_utils import (
    _try_extract_ast,
+    apply_mistral_chat_template,
    load_chat_template,
    parse_chat_messages,
    parse_chat_messages_futures,
@@ -1855,17 +1856,17 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa

 # NOTE: Qwen2-Audio default chat template is specially defined inside
 # processor class instead of using `tokenizer_config.json`
-# yapf: disable
@pytest.mark.parametrize(
    ("model", "expected_format"),
-    [(PHI3V_MODEL_ID, "string"),
-     (QWEN2VL_MODEL_ID, "openai"),
-     (QWEN25VL_MODEL_ID, "openai"),
-     (ULTRAVOX_MODEL_ID, "string"),
-     (QWEN2AUDIO_MODEL_ID, "openai"),
-     (LLAMA_GUARD_MODEL_ID, "openai")],
+    [
+        (PHI3V_MODEL_ID, "string"),
+        (QWEN2VL_MODEL_ID, "openai"),
+        (QWEN25VL_MODEL_ID, "openai"),
+        (ULTRAVOX_MODEL_ID, "string"),
+        (QWEN2AUDIO_MODEL_ID, "openai"),
+        (LLAMA_GUARD_MODEL_ID, "openai"),
+    ],
 )
-# yapf: enable
 def test_resolve_content_format_hf_defined(model, expected_format):
    model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
    model_info.check_available_online(on_fail="skip")
@@ -1879,7 +1880,8 @@ def test_resolve_content_format_hf_defined(model, expected_format):
        hf_overrides=model_info.hf_overrides,
        skip_tokenizer_init=model_info.skip_tokenizer_init,
        enforce_eager=model_info.enforce_eager,
-        dtype=model_info.dtype)
+        dtype=model_info.dtype,
+    )

    tokenizer = get_tokenizer(
        model,
@@ -1911,18 +1913,18 @@ def test_resolve_content_format_hf_defined(model, expected_format):
    assert resolved_format == expected_format


-# yapf: disable
@pytest.mark.parametrize(
    ("model", "expected_format"),
-    [("Salesforce/blip2-opt-2.7b", "string"),
-     ("facebook/chameleon-7b", "string"),
-     ("deepseek-ai/deepseek-vl2-tiny", "string"),
-     ("adept/fuyu-8b", "string"),
-     ("google/paligemma-3b-mix-224", "string"),
-     ("Qwen/Qwen-VL", "string"),
-     ("Qwen/Qwen-VL-Chat", "string")],
+    [
+        ("Salesforce/blip2-opt-2.7b", "string"),
+        ("facebook/chameleon-7b", "string"),
+        ("deepseek-ai/deepseek-vl2-tiny", "string"),
+        ("adept/fuyu-8b", "string"),
+        ("google/paligemma-3b-mix-224", "string"),
+        ("Qwen/Qwen-VL", "string"),
+        ("Qwen/Qwen-VL-Chat", "string"),
+    ],
 )
-# yapf: enable
 def test_resolve_content_format_fallbacks(model, expected_format):
    model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
    model_info.check_available_online(on_fail="skip")
@@ -1936,7 +1938,8 @@ def test_resolve_content_format_fallbacks(model, expected_format):
        hf_overrides=model_info.hf_overrides,
        skip_tokenizer_init=model_info.skip_tokenizer_init,
        enforce_eager=model_info.enforce_eager,
-        dtype=model_info.dtype)
+        dtype=model_info.dtype,
+    )

    tokenizer = get_tokenizer(
        model_config.tokenizer,
@@ -1968,30 +1971,30 @@ def test_resolve_content_format_fallbacks(model, expected_format):
    assert resolved_format == expected_format


-# yapf: disable
@pytest.mark.parametrize(
    ("template_path", "expected_format"),
-    [("template_alpaca.jinja", "string"),
-     ("template_baichuan.jinja", "string"),
-     ("template_chatglm.jinja", "string"),
-     ("template_chatglm2.jinja", "string"),
-     ("template_chatml.jinja", "string"),
-     ("template_dse_qwen2_vl.jinja", "openai"),
-     ("template_falcon_180b.jinja", "string"),
-     ("template_falcon.jinja", "string"),
-     ("template_inkbot.jinja", "string"),
-     ("template_teleflm.jinja", "string"),
-     ("template_vlm2vec_phi3v.jinja", "openai"),
-     ("template_vlm2vec_qwen2vl.jinja", "openai"),
-     ("tool_chat_template_granite_20b_fc.jinja", "string"),
-     ("tool_chat_template_hermes.jinja", "string"),
-     ("tool_chat_template_internlm2_tool.jinja", "string"),
-     ("tool_chat_template_llama3.1_json.jinja", "openai"),
-     ("tool_chat_template_llama3.2_json.jinja", "openai"),
-     ("tool_chat_template_mistral_parallel.jinja", "string"),
-     ("tool_chat_template_mistral.jinja", "string")],
+    [
+        ("template_alpaca.jinja", "string"),
+        ("template_baichuan.jinja", "string"),
+        ("template_chatglm.jinja", "string"),
+        ("template_chatglm2.jinja", "string"),
+        ("template_chatml.jinja", "string"),
+        ("template_dse_qwen2_vl.jinja", "openai"),
+        ("template_falcon_180b.jinja", "string"),
+        ("template_falcon.jinja", "string"),
+        ("template_inkbot.jinja", "string"),
+        ("template_teleflm.jinja", "string"),
+        ("template_vlm2vec_phi3v.jinja", "openai"),
+        ("template_vlm2vec_qwen2vl.jinja", "openai"),
+        ("tool_chat_template_granite_20b_fc.jinja", "string"),
+        ("tool_chat_template_hermes.jinja", "string"),
+        ("tool_chat_template_internlm2_tool.jinja", "string"),
+        ("tool_chat_template_llama3.1_json.jinja", "openai"),
+        ("tool_chat_template_llama3.2_json.jinja", "openai"),
+        ("tool_chat_template_mistral_parallel.jinja", "string"),
+        ("tool_chat_template_mistral.jinja", "string"),
+    ],
 )
-# yapf: enable
 def test_resolve_content_format_examples(template_path, expected_format):
    model_config = ModelConfig(
        PHI3V_MODEL_ID,  # Dummy
@@ -2024,40 +2027,34 @@ def test_resolve_content_format_examples(template_path, expected_format):
    assert resolved_format == expected_format


-def test_parse_chat_messages_include_thinking_chunk(mistral_model_config,
-                                                    mistral_tokenizer):
-    messages = [{
-        "role":
-        "system",
-        "content": [{
-            "type": "text",
-            "text": "You are a helpful assistant."
-        }, {
-            "type":
-            "thinking",
-            "closed":
-            True,
-            "thinking":
-            "Only return the answer when you are confident."
-        }]
-    }, {
-        "role": "user",
-        "content": "What is 2+2?"
-    }, {
-        "role":
-        "assistant",
-        "content": [{
-            "type": "text",
-            "text": "Let me think about it."
-        }, {
-            "type": "thinking",
-            "closed": True,
-            "thinking": "2+2 = 4"
-        }, {
-            "type": "text",
-            "text": "The answer is 4.",
-        }],
-    }]
+def test_parse_chat_messages_include_thinking_chunk(
+    mistral_model_config, mistral_tokenizer
+):
+    messages = [
+        {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": "You are a helpful assistant."},
+                {
+                    "type": "thinking",
+                    "closed": True,
+                    "thinking": "Only return the answer when you are confident.",
+                },
+            ],
+        },
+        {"role": "user", "content": "What is 2+2?"},
+        {
+            "role": "assistant",
+            "content": [
+                {"type": "text", "text": "Let me think about it."},
+                {"type": "thinking", "closed": True, "thinking": "2+2 = 4"},
+                {
+                    "type": "text",
+                    "text": "The answer is 4.",
+                },
+            ],
+        },
+    ]

    conversation_with_thinking, _, _ = parse_chat_messages(
        messages,
@@ -2066,122 +2063,105 @@ def test_parse_chat_messages_include_thinking_chunk(mistral_model_config,
        content_format="openai",
    )

-    expected_conversation = [{
-        "role":
-        "system",
-        "content": [{
-            "type": "text",
-            "text": "You are a helpful assistant."
-        }, {
-            "type": "text",
-            "text": "Only return the answer when you are confident."
-        }],
-    }, {
-        "role":
-        "user",
-        "content": [{
-            "type": "text",
-            "text": "What is 2+2?"
-        }],
-    }, {
-        "role":
-        "assistant",
-        "content": [
-            {
-                "type": "text",
-                "text": "Let me think about it."
-            },
-            {
-                "type": "text",
-                "text": "2+2 = 4"
-            },
-            {
-                "type": "text",
-                "text": "The answer is 4."
-            },
-        ]
-    }]
+    expected_conversation = [
+        {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": "You are a helpful assistant."},
+                {
+                    "type": "text",
+                    "text": "Only return the answer when you are confident.",
+                },
+            ],
+        },
+        {
+            "role": "user",
+            "content": [{"type": "text", "text": "What is 2+2?"}],
+        },
+        {
+            "role": "assistant",
+            "content": [
+                {"type": "text", "text": "Let me think about it."},
+                {"type": "text", "text": "2+2 = 4"},
+                {"type": "text", "text": "The answer is 4."},
+            ],
+        },
+    ]

    assert conversation_with_thinking == expected_conversation


 def test_apply_mistral_chat_template_thinking_chunk():
-    # Moved import here to avoid yapf and isort conflicts
-    from vllm.entrypoints.chat_utils import apply_mistral_chat_template
-    messages = [{
-        "role":
-        "system",
-        "content": [{
-            "type": "text",
-            "text": "You are a helpful assistant."
-        }, {
-            "type":
-            "thinking",
-            "closed":
-            True,
-            "thinking":
-            "Only return the answer when you are confident."
-        }]
-    }, {
-        "role": "user",
-        "content": "What is 2+2?"
-    }, {
-        "role":
-        "assistant",
-        "content": [{
-            "type": "text",
-            "text": "Let me think about it."
-        }, {
-            "type": "thinking",
-            "closed": True,
-            "thinking": "2+2 = 4"
-        }, {
-            "type": "text",
-            "text": "The answer is 4.",
-        }],
-    }, {
-        "role": "user",
-        "content": "Thanks, what is 3+3?"
-    }]
+    messages = [
+        {
+            "role": "system",
+            "content": [
+                {"type": "text", "text": "You are a helpful assistant."},
+                {
+                    "type": "thinking",
+                    "closed": True,
+                    "thinking": "Only return the answer when you are confident.",
+                },
+            ],
+        },
+        {"role": "user", "content": "What is 2+2?"},
+        {
+            "role": "assistant",
+            "content": [
+                {"type": "text", "text": "Let me think about it."},
+                {"type": "thinking", "closed": True, "thinking": "2+2 = 4"},
+                {
+                    "type": "text",
+                    "text": "The answer is 4.",
+                },
+            ],
+        },
+        {"role": "user", "content": "Thanks, what is 3+3?"},
+    ]

    # TODO(Julien): upon model release change to a tokenizer already configured.
    # =================================================================
    mistral_tokenizer = MistralTokenizer.from_pretrained(
-        "mistralai/Devstral-Small-2507")
+        "mistralai/Devstral-Small-2507"
+    )
    assert isinstance(mistral_tokenizer.tokenizer, Tekkenizer)
    # Add think special tokens to the tokenizer
    mistral_tokenizer.tokenizer._all_special_tokens[35] = SpecialTokenInfo(
-        rank=35, is_control=True, token_str=SpecialTokens.begin_think.value)
+        rank=35, is_control=True, token_str=SpecialTokens.begin_think.value
+    )
    mistral_tokenizer.tokenizer._all_special_tokens[36] = SpecialTokenInfo(
-        rank=36, is_control=True, token_str=SpecialTokens.end_think.value)
+        rank=36, is_control=True, token_str=SpecialTokens.end_think.value
+    )
    mistral_tokenizer.tokenizer._special_tokens_reverse_vocab = {
        k: v
-        for k, v in
-        mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
+        for k, v in mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
        if v not in {35, 36}
    }
    mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
-        SpecialTokens.begin_think.value] = 35
+        SpecialTokens.begin_think.value
+    ] = 35
    mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
-        SpecialTokens.end_think.value] = 36
+        SpecialTokens.end_think.value
+    ] = 36
    mistral_tokenizer.instruct.BEGIN_THINK = 35
    mistral_tokenizer.instruct.END_THINK = 36
    # =================================================================

-    tokens_ids = apply_mistral_chat_template(mistral_tokenizer,
-                                             messages,
-                                             chat_template=None,
-                                             tools=None)
+    tokens_ids = apply_mistral_chat_template(
+        mistral_tokenizer, messages, chat_template=None, tools=None
+    )

    string_tokens = mistral_tokenizer.mistral.decode(
-        tokens_ids, special_token_policy=SpecialTokenPolicy.KEEP)
+        tokens_ids, special_token_policy=SpecialTokenPolicy.KEEP
+    )

    expected_tokens = (
        r"<s>[SYSTEM_PROMPT]You are a helpful assistant.[THINK]Only return the"
        r" answer when you are confident.[/THINK][/SYSTEM_PROMPT]"
        r"[INST]What is 2+2?[/INST]"
        r"Let me think about it.[THINK]2+2 = 4[/THINK]The answer is 4.</s>"
-        r"[INST]Thanks, what is 3+3?[/INST]")
+        r"[INST]Thanks, what is 3+3?[/INST]"
+    )

    assert string_tokens == expected_tokens

@@ -2192,37 +2172,32 @@ def test_parse_chat_messages_single_empty_audio_with_uuid(
 ):
    audio_uuid = "abcd"
    conversation, mm_data, mm_uuids = parse_chat_messages(
-        [{
-            "role":
-            "user",
-            "content": [
-                {
-                    "type": "input_audio",
-                    "input_audio": {},
-                    "uuid": audio_uuid,
-                },
-                {
-                    "type": "text",
-                    "text": "What does the audio say?"
-                },
-            ],
-        }],
+        [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "input_audio",
+                        "input_audio": {},
+                        "uuid": audio_uuid,
+                    },
+                    {"type": "text", "text": "What does the audio say?"},
+                ],
+            }
+        ],
        qwen2_audio_model_config,
        qwen2_audio_tokenizer,
        content_format="string",
    )

-    assert conversation == [{
-        "role":
-        "user",
-        "content":
-        "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?"
-    }]
+    assert conversation == [
+        {
+            "role": "user",
+            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
+        }
+    ]
    _assert_mm_data_inputs(mm_data, {"audio": 1})
-    _assert_mm_uuids(mm_uuids,
-                     1,
-                     modality="audio",
-                     expected_uuids=[audio_uuid])
+    _assert_mm_uuids(mm_uuids, 1, modality="audio", expected_uuids=[audio_uuid])


@pytest.mark.asyncio
@@ -2232,34 +2207,29 @@ async def test_parse_chat_messages_single_empty_audio_with_uuid_async(
 ):
    audio_uuid = "abcd"
    conversation, mm_future, mm_uuids = parse_chat_messages_futures(
-        [{
-            "role":
-            "user",
-            "content": [
-                {
-                    "type": "input_audio",
-                    "input_audio": {},
-                    "uuid": audio_uuid,
-                },
-                {
-                    "type": "text",
-                    "text": "What does the audio say?"
-                },
-            ],
-        }],
+        [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "input_audio",
+                        "input_audio": {},
+                        "uuid": audio_uuid,
+                    },
+                    {"type": "text", "text": "What does the audio say?"},
+                ],
+            }
+        ],
        qwen2_audio_model_config,
        qwen2_audio_tokenizer,
        content_format="string",
    )

-    assert conversation == [{
-        "role":
-        "user",
-        "content":
-        "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?"
-    }]
+    assert conversation == [
+        {
+            "role": "user",
+            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
+        }
+    ]
    _assert_mm_data_inputs(await mm_future, {"audio": 1})
-    _assert_mm_uuids(mm_uuids,
-                     1,
-                     modality="audio",
-                     expected_uuids=[audio_uuid])
+    _assert_mm_uuids(mm_uuids, 1, modality="audio", expected_uuids=[audio_uuid])