Fix: correct source dataset tag names for tool_call/tool_response

This commit is contained in:
Jinx
2026-04-10 06:37:43 +00:00
parent 99481ca127
commit eb0850bca6
2 changed files with 5 additions and 5 deletions

Binary file not shown.

View File

@@ -21,11 +21,11 @@ from datasets import load_dataset
VAL_FRACTION = 0.05
SEED = 42
# Hermes-style tags (used in the source datasets)
TC_OPEN = chr(60) + "tool" + chr(62) # <tool>
TC_CLOSE = chr(60) + "/tool" + chr(62) # </tool>
TR_OPEN = chr(60) + "tool_response" + chr(62) # <tool_response>
TR_CLOSE = chr(60) + "/tool_response" + chr(62) # </tool_response>
# Tags used in the source datasets
TC_OPEN = chr(60) + "tool_call" + chr(62)
TC_CLOSE = chr(60) + "/tool_call" + chr(62)
TR_OPEN = chr(60) + "tool_response" + chr(62)
TR_CLOSE = chr(60) + "/tool_response" + chr(62)
# SmolLM3 native tokens
SMOL_TC_START = "<|tool_call_start|>"