From dfbffa1df1e9f6d915daeec1dcd8a1c7866254df Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sun, 31 May 2026 23:18:35 +0000 Subject: [PATCH] single_shot: CUDA_LAUNCH_BLOCKING for debugging --- single_shot_inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/single_shot_inference.py b/single_shot_inference.py index d7236163..6be8ba92 100644 --- a/single_shot_inference.py +++ b/single_shot_inference.py @@ -17,6 +17,7 @@ This is the ground truth for vLLM / SGLang integration. """ import os, sys, time, json, math, argparse, logging import torch +os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # Catch CUDA errors synchronously import torch.nn.functional as F from pathlib import Path