fix: resolve chat template names before kwargs detection (#36937)

Co-authored-by: giulio-leone <giulio.leone@users.noreply.github.com>
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Giulio Leone
2026-03-14 01:20:16 +01:00
committed by GitHub
parent 367cf5cd3e
commit b41aa264f9
2 changed files with 59 additions and 1 deletions

View File

@@ -299,6 +299,62 @@ def test_resolve_chat_template_kwargs(sample_json_schema, model, expected_kwargs
assert "unknown_param" not in resolved_mock
def test_resolve_chat_template_resolves_name():
"""When chat_template is a name, resolve_chat_template should return
the actual Jinja content so that kwargs detection works correctly."""
from unittest.mock import MagicMock
jinja_content = "{{ messages }}{% if tools %}{{ tools }}{% endif %}"
tokenizer = MagicMock()
tokenizer.get_chat_template.return_value = jinja_content
model_config = MagicMock()
result = resolve_chat_template(
tokenizer,
chat_template="tool_use",
tools=None,
model_config=model_config,
)
assert result == jinja_content
tokenizer.get_chat_template.assert_called_once_with("tool_use", tools=None)
def test_resolve_chat_template_kwargs_with_template_name():
"""Ensures template kwargs are not silently dropped when chat_template
was originally a template name that has been resolved to Jinja content."""
from unittest.mock import MagicMock
jinja_content = (
"{% for m in messages %}{{ m }}{% endfor %}"
"{% if tools %}{{ tools }}{% endif %}"
"{% if documents %}{{ documents }}{% endif %}"
)
tokenizer = MagicMock()
tokenizer.apply_chat_template = MagicMock()
kwargs = {
"tools": [{"type": "function", "function": {"name": "f"}}],
"documents": [{"title": "doc"}],
"unknown_param": "should be dropped",
}
resolved = resolve_chat_template_kwargs(
tokenizer,
chat_template=jinja_content,
chat_template_kwargs=kwargs,
raise_on_unexpected=False,
)
# template vars "tools" and "documents" should be preserved
assert "tools" in resolved
assert "documents" in resolved
# unknown param should be filtered
assert "unknown_param" not in resolved
# NOTE: Qwen2-Audio default chat template is specially defined inside
# processor class instead of using `tokenizer_config.json`
@pytest.mark.parametrize(