From ca661d32e8e94de838d550da52e934dd7f5c6312 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sun, 31 May 2026 19:03:55 +0000 Subject: [PATCH] Empty system prompt for testing (was causing model to regurgitate AI assistant tokens) --- single_shot_inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/single_shot_inference.py b/single_shot_inference.py index 9f18dc9f..cbb76cde 100644 --- a/single_shot_inference.py +++ b/single_shot_inference.py @@ -48,7 +48,7 @@ def parse_args(): p = argparse.ArgumentParser(description='DSV4 Single-Shot Inference') p.add_argument('--no-inverse-rope', action='store_true', help='Skip inverse RoPE on attention output') p.add_argument('--skip-moe', action='store_true', help='Only use shared expert (skip routed)') - p.add_argument('--skip-mhc', action='store_true', help='Bypass mHC, use simple residual (diagnostic)') + p.add_argument('--no-thinking', action='store_true', help='Force model to skip thinking (use <|EOT|> instead of thinking tokens)') p.add_argument('--max-tokens', type=int, default=512, help='Max new tokens to generate') p.add_argument('--prompt', type=str, default=None, help='Override prompt') return p.parse_args() @@ -57,7 +57,7 @@ _args = parse_args() CHECKPOINT_DIR = "/root/nvidia-meeting/DeepSeek-V4-Pro-NVFP4" MAX_NEW_TOKENS = _args.max_tokens -SYSTEM_PROMPT = "You are a helpful, harmless, and honest AI assistant. Answer the user's questions accurately and concisely. If you're unsure about something, say so rather than guessing. Follow the user's instructions carefully and ask for clarification when needed. Always respond in the same language the user is writing in." +SYSTEM_PROMPT = "" # Empty system prompt for testing PROMPT = _args.prompt or "The capital of France is" NUM_GPUS = 8 SKIP_ROUTED_MOE = _args.skip_moe # If True, only use shared expert (debug)