Fix prompt format: use DeepSeek V4 chat tokens

The model was trained with DeepSeek-specific chat tokens:
  <|User|> (128803), <|Assistant|> (128804), <|EOT|> (128805)
  Thinking: fi (128821), fl (128822)

Previous manual assembly just concatenated raw text without these tokens,
causing the model to not recognize user/assistant boundaries.

Format: <BOS><|User|>system prompt\n\nuser prompt<|Assistant|>
This commit is contained in:
2026-05-31 10:33:41 +00:00
parent f86742ef8e
commit d891ae7e96

View File

@@ -783,29 +783,27 @@ def main():
# ==== Phase 3: Inference ====
print(f"\n{'='*70}\nPhase 3: Inference\n{'='*70}")
# Apply chat template with system prompt
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": PROMPT},
]
if hasattr(tokenizer, 'apply_chat_template') and tokenizer.chat_template is not None:
input_ids = tokenizer.apply_chat_template(
messages, return_tensors="pt", add_generation_prompt=True
).cuda()
# Find where the user prompt starts for display
user_only_ids = tokenizer.encode(PROMPT, return_tensors="pt")
print(f"Chat template applied. Input: {input_ids.shape[1]} tokens")
else:
# Fallback: prepend system prompt manually
sys_ids = tokenizer.encode(SYSTEM_PROMPT, return_tensors="pt")[0]
user_ids = tokenizer.encode(PROMPT, return_tensors="pt")[0]
# Add BOS + system + newline + user
all_ids = [tokenizer.bos_token_id] if tokenizer.bos_token_id else []
all_ids += sys_ids.tolist() + user_ids.tolist()
input_ids = torch.tensor([all_ids], dtype=torch.long).cuda()
print(f"Manual prompt assembly. Input: {input_ids.shape[1]} tokens")
print(f"Prompt: '{PROMPT}'{input_ids.tolist()[:20]}...")
print(f"Decoded: '{tokenizer.decode(input_ids[0][:50])}'")
# DeepSeek V4 chat format: <begin▁of▁sentence><User>prompt<Assistant>
# For reasoning models: <User>prompt<Assistant>fithinking...flanswer
# Special token IDs: <User>=128803, <Assistant>=128804, <|EOT|>=128805
# Thinking tokens: fi=128821, fl=128822
USER_TOKEN = 128803
ASSISTANT_TOKEN = 128804
EOT_TOKEN = 128805
THINK_START = 128821 # fi
THINK_END = 128822 # fl
# Build input with proper DeepSeek chat format
bos_id = tokenizer.bos_token_id or 0
# <BOS> <User> System prompt \n\n User prompt <Assistant>
input_ids_list = [bos_id, USER_TOKEN]
input_ids_list += tokenizer.encode(SYSTEM_PROMPT, add_special_tokens=False)
input_ids_list += tokenizer.encode('\n\n' + PROMPT, add_special_tokens=False)
input_ids_list.append(ASSISTANT_TOKEN)
input_ids = torch.tensor([input_ids_list], dtype=torch.long).cuda()
print(f"DeepSeek chat format. Input: {input_ids.shape[1]} tokens", flush=True)
print(f"Decoded start: '{tokenizer.decode(input_ids[0][:20])}...'", flush=True)
print(f"Decoded end: '...{tokenizer.decode(input_ids[0][-5:])}'", flush=True)
generated = input_ids[0].tolist()