DEBUG: fix VERBOSE reference in shared_expert, always print L2 gsa debug
This commit is contained in:
@@ -368,8 +368,7 @@ class Nvfp4SharedExpert:
|
||||
x_fp4, x_sf, gsa_l2_gpu = quantize_nvfp4_gpu_fused(intermediate)
|
||||
# DEBUG: verify no CUDA errors from quantize kernel
|
||||
torch.cuda.synchronize() # DEBUG: catch async errors
|
||||
if VERBOSE >= 1:
|
||||
print(f" SE L2 gsa: gsa_gpu shape={tuple(gsa_l2_gpu.shape)} dtype={gsa_l2_gpu.dtype} dev={gsa_l2_gpu.device} _l2_gsa_buf shape={tuple(self._l2_gsa_buf.shape)} dev={self._l2_gsa_buf.device}", flush=True)
|
||||
print(f" SE L2 gsa: gsa_gpu shape={tuple(gsa_l2_gpu.shape)} dtype={gsa_l2_gpu.dtype} dev={gsa_l2_gpu.device} _l2_gsa_buf shape={tuple(self._l2_gsa_buf.shape)} dev={self._l2_gsa_buf.device}", flush=True)
|
||||
self._l2_gsa_buf.copy_(gsa_l2_gpu[:1].reshape(1)) # GPU → GPU, no sync
|
||||
else:
|
||||
x_fp4, x_sf = quantize_activation_nvfp4(
|
||||
|
||||
Reference in New Issue
Block a user