diff --git a/single_shot_inference.py b/single_shot_inference.py
index 9bf945ca..9d10121e 100644
--- a/single_shot_inference.py
+++ b/single_shot_inference.py
@@ -9,6 +9,7 @@ NO PyTorch SDPA fallback. NO dequant+matmul for production projections.
 This is the ground truth for vLLM / SGLang integration.
 """
 import os, sys, time, json, math, argparse, logging
+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'  # Catch async CUDA errors immediately
 import torch
 import torch.nn.functional as F
 from pathlib import Path