Fix prompt format: use DeepSeek V4 chat tokens
The model was trained with DeepSeek-specific chat tokens: <|User|> (128803), <|Assistant|> (128804), <|EOT|> (128805) Thinking: fi (128821), fl (128822) Previous manual assembly just concatenated raw text without these tokens, causing the model to not recognize user/assistant boundaries. Format: <BOS><|User|>system prompt\n\nuser prompt<|Assistant|>
This commit is contained in:
@@ -783,29 +783,27 @@ def main():
|
||||
|
||||
# ==== Phase 3: Inference ====
|
||||
print(f"\n{'='*70}\nPhase 3: Inference\n{'='*70}")
|
||||
# Apply chat template with system prompt
|
||||
messages = [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": PROMPT},
|
||||
]
|
||||
if hasattr(tokenizer, 'apply_chat_template') and tokenizer.chat_template is not None:
|
||||
input_ids = tokenizer.apply_chat_template(
|
||||
messages, return_tensors="pt", add_generation_prompt=True
|
||||
).cuda()
|
||||
# Find where the user prompt starts for display
|
||||
user_only_ids = tokenizer.encode(PROMPT, return_tensors="pt")
|
||||
print(f"Chat template applied. Input: {input_ids.shape[1]} tokens")
|
||||
else:
|
||||
# Fallback: prepend system prompt manually
|
||||
sys_ids = tokenizer.encode(SYSTEM_PROMPT, return_tensors="pt")[0]
|
||||
user_ids = tokenizer.encode(PROMPT, return_tensors="pt")[0]
|
||||
# Add BOS + system + newline + user
|
||||
all_ids = [tokenizer.bos_token_id] if tokenizer.bos_token_id else []
|
||||
all_ids += sys_ids.tolist() + user_ids.tolist()
|
||||
input_ids = torch.tensor([all_ids], dtype=torch.long).cuda()
|
||||
print(f"Manual prompt assembly. Input: {input_ids.shape[1]} tokens")
|
||||
print(f"Prompt: '{PROMPT}' → {input_ids.tolist()[:20]}...")
|
||||
print(f"Decoded: '{tokenizer.decode(input_ids[0][:50])}'")
|
||||
# DeepSeek V4 chat format: <|begin▁of▁sentence|><|User|>prompt<|Assistant|>
|
||||
# For reasoning models: <|User|>prompt<|Assistant|>fithinking...flanswer
|
||||
# Special token IDs: <|User|>=128803, <|Assistant|>=128804, <|EOT|>=128805
|
||||
# Thinking tokens: fi=128821, fl=128822
|
||||
USER_TOKEN = 128803
|
||||
ASSISTANT_TOKEN = 128804
|
||||
EOT_TOKEN = 128805
|
||||
THINK_START = 128821 # fi
|
||||
THINK_END = 128822 # fl
|
||||
|
||||
# Build input with proper DeepSeek chat format
|
||||
bos_id = tokenizer.bos_token_id or 0
|
||||
# <BOS> <|User|> System prompt \n\n User prompt <|Assistant|>
|
||||
input_ids_list = [bos_id, USER_TOKEN]
|
||||
input_ids_list += tokenizer.encode(SYSTEM_PROMPT, add_special_tokens=False)
|
||||
input_ids_list += tokenizer.encode('\n\n' + PROMPT, add_special_tokens=False)
|
||||
input_ids_list.append(ASSISTANT_TOKEN)
|
||||
input_ids = torch.tensor([input_ids_list], dtype=torch.long).cuda()
|
||||
print(f"DeepSeek chat format. Input: {input_ids.shape[1]} tokens", flush=True)
|
||||
print(f"Decoded start: '{tokenizer.decode(input_ids[0][:20])}...'", flush=True)
|
||||
print(f"Decoded end: '...{tokenizer.decode(input_ids[0][-5:])}'", flush=True)
|
||||
|
||||
generated = input_ids[0].tolist()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user