Fix NameError: add rows/cols variables to MoE swizzle

This commit is contained in:
2026-06-04 03:14:27 +00:00
parent a434545d12
commit 5487a58df4

View File

@@ -456,6 +456,8 @@ class Nvfp4MoE:
# Phase 2: Full-buffer swizzle (no CPU sync, no Python loops)
# During graph capture, Python view ops (reshape, transpose) are not allowed.
# Use CUDA swizzle kernel instead.
rows = padded_x_sf.shape[0]
cols = padded_x_sf.shape[1]
if torch.cuda.is_current_stream_capturing():
from dsv4.kernels.cuda.loader import get_cuda_module
mod = get_cuda_module("blackwell_swizzle", ["blackwell_swizzle.cu"])