diff --git a/single_shot_inference.py b/single_shot_inference.py index d7236163..6be8ba92 100644 --- a/single_shot_inference.py +++ b/single_shot_inference.py @@ -17,6 +17,7 @@ This is the ground truth for vLLM / SGLang integration. """ import os, sys, time, json, math, argparse, logging import torch +os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # Catch CUDA errors synchronously import torch.nn.functional as F from pathlib import Path