smollm3-3b-vllm/gen_template.py

#!/usr/bin/env python3
"""Generate the PRODUCTION fixed chat_template.jinja for SmolLM3-3B.

v2: Fixed thinking mode direction - /think now opens unga... tags
    in the generation prompt so the model actually generates reasoning.
"""
from transformers import AutoTokenizer
tok = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM3-3B")

THINK_S = tok.decode([128002])
THINK_E = tok.decode([128003])
RESP_S = tok.decode([128013])
RESP_E = tok.decode([128014])
TC_S = tok.decode([128015])
TC_E = tok.decode([128016])

T = []

# ─── defaults & system header ───
T.append(r"""{# ───── defaults ───── #}
{%- if enable_thinking is not defined -%}
{%- set enable_thinking = true -%}
{%- endif -%}

{# ───── reasoning mode ───── #}
{%- if enable_thinking -%}
  {%- set reasoning_mode = "/think" -%}
{%- else -%}
  {%- set reasoning_mode = "/no_think" -%}
{%- endif -%}

{# ───── header (system message) ───── #}
{{- "<|im_start|>system\n" -}}

{%- if messages[0].role == "system" -%}
  {%- set system_message = messages[0].content -%}
  {%- if "/no_think" in system_message -%}
    {%- set reasoning_mode = "/no_think" -%}
  {%- elif "/think" in system_message -%}
    {%- set reasoning_mode = "/think" -%}
  {%- endif -%}
  {%- set custom_instructions = system_message.replace("/no_think", "").replace("/think", "").rstrip() -%}
{%- endif -%}

{%- if "/system_override" in system_message -%}
  {{- custom_instructions.replace("/system_override", "").rstrip() -}}
{%- else -%}
  {{- "## Metadata\n\n" -}}
  {{- "Knowledge Cutoff Date: June 2025\n" -}}
  {%- set today = strftime_now("%d %B %Y") -%}
  {{- "Today Date: " ~ today ~ "\n" -}}
  {{- "Reasoning Mode: " + reasoning_mode + "\n\n" -}}

  {{- "## Custom Instructions\n\n" -}}
  {%- if custom_instructions -%}
    {{- custom_instructions + "\n\n" -}}
  {%- elif reasoning_mode == "/think" -%}
    {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face.\n\n" -}}
  {%- else -%}
    {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face.\n\n" -}}
  {%- endif -%}

  {%- if xml_tools or python_tools or tools -%}
    {{- "### Tools\n\n" -}}
    {%- if xml_tools or tools -%}
      {%- if tools -%}
        {%- set xml_tools = tools -%}
      {%- endif -%}
      {%- set ns = namespace(xml_tool_string="You may call one or more functions to assist with the user query.\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n") -%}
      {%- for tool in xml_tools[:] -%}
        {%- set ns.xml_tool_string = ns.xml_tool_string ~ (tool | tojson) ~ "\n" -%}
      {%- endfor -%}""")

# Tool calling format with special tokens
T.append('\n      {%- set xml_tool_string = ns.xml_tool_string + "</tools>\\n\\nFor each function call, return a json object with function name and arguments within ' + TC_S + ' XML tags:\\n' + TC_S + '\\n{\\"name\\": <function-name>, \\"arguments\\": <args-json-object>}\\n' + TC_E + '" -%}\n')

T.append(r"""      {{- xml_tool_string -}}
    {%- endif -%}
    {%- if python_tools -%}
      {%- set ns = namespace(python_tool_string="You may call one or more functions as python tools.\n<tools>\n") -%}
      {%- for tool in python_tools[:] -%}
        {%- set ns.python_tool_string = ns.python_tool_string ~ (tool | string) ~ "\n" -%}
      {%- endfor -%}
      {%- set python_tool_string = ns.python_tool_string + "</tools>\n\nThe state persists between code executions." -%}
      {{- python_tool_string -}}
    {%- endif -%}
    {{- "\n\n" -}}
  {%- endif -%}
{%- endif -%}
{{- "<|im_end|>\n" -}}""")

# ─── Main loop ───
T.append(r"""

{# ───── main loop ───── #}
{%- for message in messages -%}
    {%- if message.role == "user" -%}
        {{ "<|im_start|>user\n" + message.content + "<|im_end|>\n" }}
    {%- elif message.role == "assistant" -%}
        {% generation %}
        {%- if message.tool_calls -%}""")

# FIX: Render tool calls with TC_S/TC_E tokens
T.append('\n            {%- set ns = namespace(tc_text="") -%}\n            {%- for tc in message.tool_calls -%}\n                {%- set ns.tc_text = ns.tc_text ~ "' + TC_S + '\\n{\\"name\\": \\"" ~ tc.function.name ~ "\\", \\"arguments\\": " ~ tc.function.arguments ~ "}\\n' + TC_E + '" -%}\n            {%- endfor -%}\n            {{ "<|im_start|>assistant\\n" ~ (message.content if message.content is string else "") ~ ns.tc_text ~ "<|im_end|>\\n" }}\n')

T.append(r"""        {%- else -%}""")

# FIX v2: /think = think tags, /no_think = plain text (CORRECT direction now)
T.append('\n            {%- if reasoning_mode == "/think" -%}\n                {{ "<|im_start|>assistant\\n' + THINK_S + '\\n" ~ (message.content if message.content is string else "") ~ "\\n' + THINK_E + '<|im_end|>\\n" }}\n            {%- else -%}\n                {{ "<|im_start|>assistant\\n" ~ (message.content if message.content is string else "") ~ "<|im_end|>\\n" }}\n            {%- endif -%}\n')

T.append(r"""        {%- endif -%}
        {% endgeneration %}""")

# FIX: Tool role with RESP_S/RESP_E tokens
T.append('\n    {%- elif message.role == "tool" -%}\n        {{ "<|im_start|>user\\n' + RESP_S + '\\n" ~ (message.content if message.content is string else "") ~ "\\n' + RESP_E + '<|im_end|>\\n" }}\n')

T.append(r"""    {%- endif -%}
{%- endfor -%}""")

# ─── Generation prompt ───
# FIX v2: /think opens unga... so model generates reasoning, /no_think is bare
T.append('\n\n{# ───── generation prompt ───── #}\n{%- if add_generation_prompt -%}\n    {%- if reasoning_mode == "/think" -%}\n        {{ "<|im_start|>assistant\\n' + THINK_S + '\\n" }}\n    {%- else -%}\n        {{ "<|im_start|>assistant\\n" }}\n    {%- endif -%}\n{%- endif -%}\n')

template = ''.join(T)

with open('/root/chat_template.jinja', 'w', encoding='utf-8') as f:
    f.write(template)

print("Production template v2 written to /root/chat_template.jinja")
print(f"Length: {len(template)} bytes")