From dfbffa1df1e9f6d915daeec1dcd8a1c7866254df Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Sun, 31 May 2026 23:18:35 +0000
Subject: [PATCH] single_shot: CUDA_LAUNCH_BLOCKING for debugging

---
 single_shot_inference.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/single_shot_inference.py b/single_shot_inference.py
index d7236163..6be8ba92 100644
--- a/single_shot_inference.py
+++ b/single_shot_inference.py
@@ -17,6 +17,7 @@ This is the ground truth for vLLM / SGLang integration.
 """
 import os, sys, time, json, math, argparse, logging
 import torch
+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'  # Catch CUDA errors synchronously
 import torch.nn.functional as F
 from pathlib import Path