Add CUDA_LAUNCH_BLOCKING=1 to catch async errors

2026-06-03 14:48:51 +00:00
parent f3bb0ca08c
commit 1121cd7b47
1 changed files with 1 additions and 0 deletions
--- a/single_shot_inference.py
+++ b/single_shot_inference.py
@@ -9,6 +9,7 @@ NO PyTorch SDPA fallback. NO dequant+matmul for production projections.
 This is the ground truth for vLLM / SGLang integration.
 """
 import os, sys, time, json, math, argparse, logging
+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'  # Catch async CUDA errors immediately
 import torch
 import torch.nn.functional as F
 from pathlib import Path