Add CUDA_LAUNCH_BLOCKING=1 to catch async errors
This commit is contained in:
@@ -9,6 +9,7 @@ NO PyTorch SDPA fallback. NO dequant+matmul for production projections.
|
||||
This is the ground truth for vLLM / SGLang integration.
|
||||
"""
|
||||
import os, sys, time, json, math, argparse, logging
|
||||
os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # Catch async CUDA errors immediately
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from pathlib import Path
|
||||
|
||||
Reference in New Issue
Block a user