tests/entrypoints/openai/test_chat_template.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import pytest

from vllm.config import ModelConfig
from vllm.entrypoints.chat_utils import apply_hf_chat_template, load_chat_template
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.transformers_utils.tokenizer import get_tokenizer

from ...models.registry import HF_EXAMPLE_MODELS
from ...utils import VLLM_PATH

chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja"
assert chatml_jinja_path.exists()

# Define models, templates, and their corresponding expected outputs
MODEL_TEMPLATE_GENERATION_OUTPUT = [
    (
        "facebook/opt-125m",
        chatml_jinja_path,
        True,
        False,
        """<|im_start|>user
Hello<|im_end|>
<|im_start|>assistant
Hi there!<|im_end|>
<|im_start|>user
What is the capital of<|im_end|>
<|im_start|>assistant
""",
    ),
    (
        "facebook/opt-125m",
        chatml_jinja_path,
        False,
        False,
        """<|im_start|>user
Hello<|im_end|>
<|im_start|>assistant
Hi there!<|im_end|>
<|im_start|>user
What is the capital of""",
    ),
    (
        "facebook/opt-125m",
        chatml_jinja_path,
        False,
        True,
        """<|im_start|>user
Hello<|im_end|>
<|im_start|>assistant
Hi there!<|im_end|>
<|im_start|>user
What is the capital of<|im_end|>
<|im_start|>assistant
The capital of""",
    ),
]

TEST_MESSAGES = [
    {"role": "user", "content": "Hello"},
    {"role": "assistant", "content": "Hi there!"},
    {"role": "user", "content": "What is the capital of"},
]
ASSISTANT_MESSAGE_TO_CONTINUE = {"role": "assistant", "content": "The capital of"}


def test_load_chat_template():
    # Testing chatml template
    template_content = load_chat_template(chat_template=chatml_jinja_path)

    # Test assertions
    assert template_content is not None
    # Hard coded value for template_chatml.jinja
    assert (
        template_content
        == """{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}"""  # noqa: E501
    )


def test_no_load_chat_template_filelike():
    # Testing chatml template
    template = "../../examples/does_not_exist"

    with pytest.raises(ValueError, match="looks like a file path"):
        load_chat_template(chat_template=template)


def test_no_load_chat_template_literallike():
    # Testing chatml template
    template = "{{ messages }}"

    template_content = load_chat_template(chat_template=template)

    assert template_content == template


@pytest.mark.parametrize(
    "model,template,add_generation_prompt,continue_final_message,expected_output",
    MODEL_TEMPLATE_GENERATION_OUTPUT,
)
def test_get_gen_prompt(
    model, template, add_generation_prompt, continue_final_message, expected_output
):
    model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
    model_info.check_available_online(on_fail="skip")

    model_config = ModelConfig(
        model,
        tokenizer=model_info.tokenizer or model,
        tokenizer_mode=model_info.tokenizer_mode,
        trust_remote_code=model_info.trust_remote_code,
        revision=model_info.revision,
        hf_overrides=model_info.hf_overrides,
        skip_tokenizer_init=model_info.skip_tokenizer_init,
        enforce_eager=model_info.enforce_eager,
        dtype=model_info.dtype,
    )

    # Initialize the tokenizer
    tokenizer = get_tokenizer(
        tokenizer_name=model_config.tokenizer,
        trust_remote_code=model_config.trust_remote_code,
    )
    template_content = load_chat_template(chat_template=template)

    # Create a mock request object using keyword arguments
    mock_request = ChatCompletionRequest(
        model=model,
        messages=TEST_MESSAGES + [ASSISTANT_MESSAGE_TO_CONTINUE]
        if continue_final_message
        else TEST_MESSAGES,
        add_generation_prompt=add_generation_prompt,
        continue_final_message=continue_final_message,
    )

    # Call the function and get the result
    result = apply_hf_chat_template(
        tokenizer=tokenizer,
        conversation=mock_request.messages,
        chat_template=mock_request.chat_template or template_content,
        model_config=model_config,
        tools=None,
        add_generation_prompt=mock_request.add_generation_prompt,
        continue_final_message=mock_request.continue_final_message,
    )

    # Test assertion
    assert result == expected_output, (
        f"The generated prompt does not match the expected output for "
        f"model {model} and template {template}"
    )
[Misc] Add SPDX-License-Identifier headers to python source files (#12628) - Add SPDX license headers to python source files - Check for SPDX headers using pre-commit commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745 Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:18:24 2025 -0500 Add SPDX license headers to python source files This commit adds SPDX license headers to python source files as recommended to the project by the Linux Foundation. These headers provide a concise way that is both human and machine readable for communicating license information for each source file. It helps avoid any ambiguity about the license of the code and can also be easily used by tools to help manage license compliance. The Linux Foundation runs license scans against the codebase to help ensure we are in compliance with the licenses of the code we use, including dependencies. Having these headers in place helps that tool do its job. More information can be found on the SPDX site: - https://spdx.dev/learn/handling-license-info/ Signed-off-by: Russell Bryant <rbryant@redhat.com> commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:36:32 2025 -0500 Check for SPDX headers using pre-commit Signed-off-by: Russell Bryant <rbryant@redhat.com> --------- Signed-off-by: Russell Bryant <rbryant@redhat.com> 2025-02-02 14:58:18 -05:00			`# SPDX-License-Identifier: Apache-2.0`
[Misc] Add SPDX-FileCopyrightText (#19100) Signed-off-by: simon-mo <simon.mo@hey.com> 2025-06-03 11:20:17 -07:00			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`
[Misc] Add SPDX-License-Identifier headers to python source files (#12628) - Add SPDX license headers to python source files - Check for SPDX headers using pre-commit commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745 Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:18:24 2025 -0500 Add SPDX license headers to python source files This commit adds SPDX license headers to python source files as recommended to the project by the Linux Foundation. These headers provide a concise way that is both human and machine readable for communicating license information for each source file. It helps avoid any ambiguity about the license of the code and can also be easily used by tools to help manage license compliance. The Linux Foundation runs license scans against the codebase to help ensure we are in compliance with the licenses of the code we use, including dependencies. Having these headers in place helps that tool do its job. More information can be found on the SPDX site: - https://spdx.dev/learn/handling-license-info/ Signed-off-by: Russell Bryant <rbryant@redhat.com> commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea Author: Russell Bryant <rbryant@redhat.com> Date: Fri Jan 31 14:36:32 2025 -0500 Check for SPDX headers using pre-commit Signed-off-by: Russell Bryant <rbryant@redhat.com> --------- Signed-off-by: Russell Bryant <rbryant@redhat.com> 2025-02-02 14:58:18 -05:00
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00			`import pytest`

[Frontend] Chat template fallbacks for multimodal models (#17805) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-05-08 14:05:54 +08:00			`from vllm.config import ModelConfig`
[Frontend] Clean up type annotations for mistral tokenizer (#8314) 2024-09-11 00:49:11 +08:00			`from vllm.entrypoints.chat_utils import apply_hf_chat_template, load_chat_template`
OpenAI Server refactoring (#2360) 2024-01-17 05:33:14 +00:00			`from vllm.entrypoints.openai.protocol import ChatCompletionRequest`
[CI] Try introducing isort. (#3495) 2024-03-25 23:59:47 +09:00			`from vllm.transformers_utils.tokenizer import get_tokenizer`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00
[Frontend] Chat template fallbacks for multimodal models (#17805) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-05-08 14:05:54 +08:00			`from ...models.registry import HF_EXAMPLE_MODELS`
[Core][Bugfix][Perf] Introduce `MQLLMEngine` to avoid `asyncio` OH (#8157) Co-authored-by: Nick Hill <nickhill@us.ibm.com> Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Co-authored-by: Simon Mo <simon.mo@hey.com> 2024-09-18 09:56:58 -04:00			`from ...utils import VLLM_PATH`
[Frontend] Gracefully handle missing chat template and fix CI failure (#7238) Co-authored-by: Roger Wang <ywang@roblox.com> 2024-08-07 17:12:05 +08:00
			`chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja"`
[CI] Add Buildkite (#2355) 2024-01-14 12:37:58 -08:00			`assert chatml_jinja_path.exists()`

Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00			`# Define models, templates, and their corresponding expected outputs`
[CI] change spell checker from codespell to typos (#18711) Signed-off-by: Andy Xie <andy.xning@gmail.com> 2025-06-12 10:57:10 +08:00			`MODEL_TEMPLATE_GENERATION_OUTPUT = [`
[Frontend] Added support for HF's new `continue_final_message` parameter (#8942) 2024-09-29 20:59:47 +03:00			`(`
			`"facebook/opt-125m",`
			`chatml_jinja_path,`
			`True,`
			`False,`
			`"""<\|im_start\|>user`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00			`Hello<\|im_end\|>`
			`<\|im_start\|>assistant`
			`Hi there!<\|im_end\|>`
			`<\|im_start\|>user`
			`What is the capital of<\|im_end\|>`
			`<\|im_start\|>assistant`
			`""",`
			`),`
[Frontend] Added support for HF's new `continue_final_message` parameter (#8942) 2024-09-29 20:59:47 +03:00			`(`
			`"facebook/opt-125m",`
			`chatml_jinja_path,`
			`False,`
			`False,`
			`"""<\|im_start\|>user`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00			`Hello<\|im_end\|>`
			`<\|im_start\|>assistant`
			`Hi there!<\|im_end\|>`
			`<\|im_start\|>user`
[Frontend] Added support for HF's new `continue_final_message` parameter (#8942) 2024-09-29 20:59:47 +03:00			`What is the capital of""",`
			`),`
			`(`
			`"facebook/opt-125m",`
			`chatml_jinja_path,`
			`False,`
			`True,`
			`"""<\|im_start\|>user`
			`Hello<\|im_end\|>`
			`<\|im_start\|>assistant`
			`Hi there!<\|im_end\|>`
			`<\|im_start\|>user`
			`What is the capital of<\|im_end\|>`
			`<\|im_start\|>assistant`
			`The capital of""",`
			`),`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00			`]`

			`TEST_MESSAGES = [`
			`{"role": "user", "content": "Hello"},`
			`{"role": "assistant", "content": "Hi there!"},`
			`{"role": "user", "content": "What is the capital of"},`
			`]`
[Frontend] Added support for HF's new `continue_final_message` parameter (#8942) 2024-09-29 20:59:47 +03:00			`ASSISTANT_MESSAGE_TO_CONTINUE = {"role": "assistant", "content": "The capital of"}`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00

[Frontend] Move async logic outside of constructor (#4674) 2024-05-09 13:48:33 +08:00			`def test_load_chat_template():`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00			`# Testing chatml template`
[BugFix][Frontend] Use LoRA tokenizer in OpenAI APIs (#6227) Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> 2024-07-18 00:13:30 -07:00			`template_content = load_chat_template(chat_template=chatml_jinja_path)`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00
			`# Test assertions`
			`assert template_content is not None`
			`# Hard coded value for template_chatml.jinja`
			`assert (`
			`template_content`
			`== """{% for message in messages %}{{'<\|im_start\|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<\|im_end\|>' + '\\n'}}{% endif %}{% endfor %}`
Fix per file ruff ignores related to line length (#26262) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-10-06 06:12:40 +01:00			`{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<\|im_start\|>assistant\\n' }}{% endif %}""" # noqa: E501`
			`)`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00

[Frontend] Move async logic outside of constructor (#4674) 2024-05-09 13:48:33 +08:00			`def test_no_load_chat_template_filelike():`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00			`# Testing chatml template`
			`template = "../../examples/does_not_exist"`
[Bugfix][Frontend] Raise exception when file-like chat template fails to be opened (#4292) 2024-04-24 02:19:03 +08:00
			`with pytest.raises(ValueError, match="looks like a file path"):`
[BugFix][Frontend] Use LoRA tokenizer in OpenAI APIs (#6227) Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> 2024-07-18 00:13:30 -07:00			`load_chat_template(chat_template=template)`
[Bugfix][Frontend] Raise exception when file-like chat template fails to be opened (#4292) 2024-04-24 02:19:03 +08:00

[Frontend] Move async logic outside of constructor (#4674) 2024-05-09 13:48:33 +08:00			`def test_no_load_chat_template_literallike():`
[Bugfix][Frontend] Raise exception when file-like chat template fails to be opened (#4292) 2024-04-24 02:19:03 +08:00			`# Testing chatml template`
			`template = "{{ messages }}"`

[BugFix][Frontend] Use LoRA tokenizer in OpenAI APIs (#6227) Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> 2024-07-18 00:13:30 -07:00			`template_content = load_chat_template(chat_template=template)`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00
[Bugfix][Frontend] Raise exception when file-like chat template fails to be opened (#4292) 2024-04-24 02:19:03 +08:00			`assert template_content == template`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00

			`@pytest.mark.parametrize(`
[Frontend] Added support for HF's new `continue_final_message` parameter (#8942) 2024-09-29 20:59:47 +03:00			`"model,template,add_generation_prompt,continue_final_message,expected_output",`
[CI] change spell checker from codespell to typos (#18711) Signed-off-by: Andy Xie <andy.xning@gmail.com> 2025-06-12 10:57:10 +08:00			`MODEL_TEMPLATE_GENERATION_OUTPUT,`
			`)`
[Frontend] Move async logic outside of constructor (#4674) 2024-05-09 13:48:33 +08:00			`def test_get_gen_prompt(`
			`model, template, add_generation_prompt, continue_final_message, expected_output`
[Frontend] Added support for HF's new `continue_final_message` parameter (#8942) 2024-09-29 20:59:47 +03:00			`):`
[Frontend] Chat template fallbacks for multimodal models (#17805) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-05-08 14:05:54 +08:00			`model_info = HF_EXAMPLE_MODELS.find_hf_info(model)`
			`model_info.check_available_online(on_fail="skip")`

			`model_config = ModelConfig(`
			`model,`
			`tokenizer=model_info.tokenizer or model,`
			`tokenizer_mode=model_info.tokenizer_mode,`
			`trust_remote_code=model_info.trust_remote_code,`
[Deprecation][2/N] Replace `--task` with `--runner` and `--convert` (#21470) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-07-28 10:42:40 +08:00			`revision=model_info.revision,`
[Frontend] Chat template fallbacks for multimodal models (#17805) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-05-08 14:05:54 +08:00			`hf_overrides=model_info.hf_overrides,`
[Core][Model] Terratorch backend integration (#23513) Signed-off-by: Michele Gazzetti <michele.gazzetti1@ibm.com> Signed-off-by: Christian Pinto <christian.pinto@ibm.com> Co-authored-by: Christian Pinto <christian.pinto@ibm.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk> 2025-09-04 08:22:41 +01:00			`skip_tokenizer_init=model_info.skip_tokenizer_init,`
			`enforce_eager=model_info.enforce_eager,`
			`dtype=model_info.dtype,`
			`)`
[Frontend] Chat template fallbacks for multimodal models (#17805) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-05-08 14:05:54 +08:00
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00			`# Initialize the tokenizer`
[Frontend] Chat template fallbacks for multimodal models (#17805) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-05-08 14:05:54 +08:00			`tokenizer = get_tokenizer(`
			`tokenizer_name=model_config.tokenizer,`
			`trust_remote_code=model_config.trust_remote_code,`
			`)`
[BugFix][Frontend] Use LoRA tokenizer in OpenAI APIs (#6227) Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> 2024-07-18 00:13:30 -07:00			`template_content = load_chat_template(chat_template=template)`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00
			`# Create a mock request object using keyword arguments`
			`mock_request = ChatCompletionRequest(`
			`model=model,`
[Frontend] Added support for HF's new `continue_final_message` parameter (#8942) 2024-09-29 20:59:47 +03:00			`messages=TEST_MESSAGES + [ASSISTANT_MESSAGE_TO_CONTINUE]`
			`if continue_final_message`
			`else TEST_MESSAGES,`
			`add_generation_prompt=add_generation_prompt,`
			`continue_final_message=continue_final_message,`
			`)`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00
			`# Call the function and get the result`
[Frontend] Clean up type annotations for mistral tokenizer (#8314) 2024-09-11 00:49:11 +08:00			`result = apply_hf_chat_template(`
[Fix] Move "model_config" as keyword args in chat_utils.py (#18098) Signed-off-by: Linkun <github@lkchen.net> 2025-05-13 23:27:26 -07:00			`tokenizer=tokenizer,`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00			`conversation=mock_request.messages,`
[Frontend] Gracefully handle missing chat template and fix CI failure (#7238) Co-authored-by: Roger Wang <ywang@roblox.com> 2024-08-07 17:12:05 +08:00			`chat_template=mock_request.chat_template or template_content,`
[Fix] Move "model_config" as keyword args in chat_utils.py (#18098) Signed-off-by: Linkun <github@lkchen.net> 2025-05-13 23:27:26 -07:00			`model_config=model_config,`
[Bugfix] Fix chat template loading (#15143) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Roger Wang <ywang@roblox.com> Co-authored-by: chaunceyjiang <chaunceyjiang@gmail.com> Co-authored-by: Roger Wang <ywang@roblox.com> 2025-03-24 21:50:09 +08:00			`tools=None,`
[BugFix][Frontend] Use LoRA tokenizer in OpenAI APIs (#6227) Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> 2024-07-18 00:13:30 -07:00			`add_generation_prompt=mock_request.add_generation_prompt,`
[Frontend] Added support for HF's new `continue_final_message` parameter (#8942) 2024-09-29 20:59:47 +03:00			`continue_final_message=mock_request.continue_final_message,`
[Frontend] Gracefully handle missing chat template and fix CI failure (#7238) Co-authored-by: Roger Wang <ywang@roblox.com> 2024-08-07 17:12:05 +08:00			`)`
Support chat template and `echo` for chat API (#1756) 2023-11-30 19:43:13 -05:00
			`# Test assertion`
Re-enable the 80 char line width limit (#3305) 2024-03-10 19:49:14 -07:00			`assert result == expected_output, (`
			`f"The generated prompt does not match the expected output for "`
			`f"model {model} and template {template}"`
			`)`