Fix: correct source dataset tag names for tool_call/tool_response
This commit is contained in:
BIN
__pycache__/prepare_data.cpython-312.pyc
Normal file
BIN
__pycache__/prepare_data.cpython-312.pyc
Normal file
Binary file not shown.
@@ -21,11 +21,11 @@ from datasets import load_dataset
|
||||
VAL_FRACTION = 0.05
|
||||
SEED = 42
|
||||
|
||||
# Hermes-style tags (used in the source datasets)
|
||||
TC_OPEN = chr(60) + "tool" + chr(62) # <tool>
|
||||
TC_CLOSE = chr(60) + "/tool" + chr(62) # </tool>
|
||||
TR_OPEN = chr(60) + "tool_response" + chr(62) # <tool_response>
|
||||
TR_CLOSE = chr(60) + "/tool_response" + chr(62) # </tool_response>
|
||||
# Tags used in the source datasets
|
||||
TC_OPEN = chr(60) + "tool_call" + chr(62)
|
||||
TC_CLOSE = chr(60) + "/tool_call" + chr(62)
|
||||
TR_OPEN = chr(60) + "tool_response" + chr(62)
|
||||
TR_CLOSE = chr(60) + "/tool_response" + chr(62)
|
||||
|
||||
# SmolLM3 native tokens
|
||||
SMOL_TC_START = "<|tool_call_start|>"
|
||||
|
||||
Reference in New Issue
Block a user