diff --git a/single_shot_inference.py b/single_shot_inference.py
index d7236163..6be8ba92 100644
--- a/single_shot_inference.py
+++ b/single_shot_inference.py
@@ -17,6 +17,7 @@ This is the ground truth for vLLM / SGLang integration.
 """
 import os, sys, time, json, math, argparse, logging
 import torch
+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'  # Catch CUDA errors synchronously
 import torch.nn.functional as F
 from pathlib import Path