Add CUDA_LAUNCH_BLOCKING=1 to catch async errors

This commit is contained in:
2026-06-03 14:48:51 +00:00
parent f3bb0ca08c
commit 1121cd7b47

View File

@@ -9,6 +9,7 @@ NO PyTorch SDPA fallback. NO dequant+matmul for production projections.
This is the ground truth for vLLM / SGLang integration.
"""
import os, sys, time, json, math, argparse, logging
os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # Catch async CUDA errors immediately
import torch
import torch.nn.functional as F
from pathlib import Path