[Frontend] Added chat templates for LLaMa4 pythonic tool calling (#16463)

Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com> Co-authored-by: Kai Wu <kaiwu@meta.com>
2025-04-11 15:26:17 -07:00
parent cd77382ac1
commit 16eda8c43a
5 changed files with 182 additions and 2 deletions
--- a/tests/tool_use/conftest.py
+++ b/tests/tool_use/conftest.py
@@ -10,10 +10,33 @@ from vllm.platforms import current_platform
 from .utils import ARGS, CONFIGS, ServerConfig


+# select models to test based on command line arguments
+def pytest_addoption(parser):
+    parser.addoption("--models",
+                     nargs="+",
+                     help="Specify one or more models to test")
+    parser.addoption("--extended",
+                     action="store_true",
+                     default=False,
+                     help="invoke extended tests requiring large GPUs")
+
+
 # for each server config, download the model and return the config
@pytest.fixture(scope="session", params=CONFIGS.keys())
 def server_config(request):
-    config = CONFIGS[request.param]
+    extended = request.config.getoption("--extended")
+    models = request.config.getoption("--models")
+
+    config_keys_to_test = [
+        key for key in CONFIGS if (models is None or key in models) and (
+            extended or not CONFIGS[key].get("extended", False))
+    ]
+
+    config_key = request.param
+    if config_key not in config_keys_to_test:
+        pytest.skip(f"Skipping config '{config_key}'")
+
+    config = CONFIGS[config_key]

    if current_platform.is_rocm() and not config.get("supports_rocm", True):
        pytest.skip("The {} model can't be tested on the ROCm platform".format(
--- a/tests/tool_use/utils.py
+++ b/tests/tool_use/utils.py
@@ -16,6 +16,7 @@ class ServerConfig(TypedDict, total=False):
    system_prompt: Optional[str]
    supports_parallel: Optional[bool]
    supports_rocm: Optional[bool]
+    extended: Optional[bool]  # tests do not run in CI automatically


 def patch_system_prompt(messages: list[dict[str, Any]],
@@ -82,6 +83,21 @@ CONFIGS: dict[str, ServerConfig] = {
        "supports_parallel":
        False,
    },
+    "llama4": {
+        "model":
+        "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "arguments": [
+            "--enforce-eager", "--no-enable-prefix-caching",
+            "--tool-call-parser", "pythonic", "--chat-template",
+            str(VLLM_PATH /
+                "examples/tool_chat_template_llama4_pythonic.jinja"), "-tp",
+            "4"
+        ],
+        "supports_parallel":
+        False,
+        "extended":
+        True
+    },
    "mistral": {
        "model":
        "mistralai/Mistral-7B-Instruct-v0.3",