Fix NameError: add rows/cols variables to MoE swizzle
This commit is contained in:
@@ -456,6 +456,8 @@ class Nvfp4MoE:
|
||||
# Phase 2: Full-buffer swizzle (no CPU sync, no Python loops)
|
||||
# During graph capture, Python view ops (reshape, transpose) are not allowed.
|
||||
# Use CUDA swizzle kernel instead.
|
||||
rows = padded_x_sf.shape[0]
|
||||
cols = padded_x_sf.shape[1]
|
||||
if torch.cuda.is_current_stream_capturing():
|
||||
from dsv4.kernels.cuda.loader import get_cuda_module
|
||||
mod = get_cuda_module("blackwell_swizzle", ["blackwell_swizzle.cu"])
|
||||
|
||||
Reference in New Issue
Block a user