diff --git a/single_shot_inference.py b/single_shot_inference.py index 62d4786f..18d6bcde 100644 --- a/single_shot_inference.py +++ b/single_shot_inference.py @@ -1361,6 +1361,13 @@ def main(): input_ids = [bos, USER_TOKEN] input_ids += tokenizer.encode('\n\n' + PROMPT, add_special_tokens=False) input_ids.append(ASSISTANT_TOKEN) + # DSV4 reasoning model: must prime with ◇ (think_start) after Assistant token. + # Without this, the model is out-of-distribution — it expects to be inside a + # thinking block but never received the think-start sentinel. + # Symptom: degenerate output from step 0 (e.g. "France" instead of "Paris", + # looping on newlines/repeated tokens). With ◇, the model generates thinking + # content, emits ◇ (think_end), then produces the actual answer. + input_ids.append(THINK_START) generated = input_ids all_tokens = generated.copy() print(f"Input: {len(generated)} tokens")