diff --git a/__pycache__/prepare_data.cpython-312.pyc b/__pycache__/prepare_data.cpython-312.pyc new file mode 100644 index 0000000..33a28f5 Binary files /dev/null and b/__pycache__/prepare_data.cpython-312.pyc differ diff --git a/prepare_data.py b/prepare_data.py index 8bd93a9..f088939 100644 --- a/prepare_data.py +++ b/prepare_data.py @@ -21,11 +21,11 @@ from datasets import load_dataset VAL_FRACTION = 0.05 SEED = 42 -# Hermes-style tags (used in the source datasets) -TC_OPEN = chr(60) + "tool" + chr(62) # -TC_CLOSE = chr(60) + "/tool" + chr(62) # -TR_OPEN = chr(60) + "tool_response" + chr(62) # -TR_CLOSE = chr(60) + "/tool_response" + chr(62) # +# Tags used in the source datasets +TC_OPEN = chr(60) + "tool_call" + chr(62) +TC_CLOSE = chr(60) + "/tool_call" + chr(62) +TR_OPEN = chr(60) + "tool_response" + chr(62) +TR_CLOSE = chr(60) + "/tool_response" + chr(62) # SmolLM3 native tokens SMOL_TC_START = "<|tool_call_start|>"