From 483e759d5380a0d8455a7846426933a066732ef4 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Mon, 1 Jun 2026 09:16:33 +0000 Subject: [PATCH] Fix: use tensor.mark_layout_dynamic() method (not cute.mark_layout_dynamic) --- dsv4/kernels/router/nvfp4_fused_router_kernel.py | 10 +++++----- tests/unit/test_fused_router.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dsv4/kernels/router/nvfp4_fused_router_kernel.py b/dsv4/kernels/router/nvfp4_fused_router_kernel.py index 6ab2871a..576df42d 100644 --- a/dsv4/kernels/router/nvfp4_fused_router_kernel.py +++ b/dsv4/kernels/router/nvfp4_fused_router_kernel.py @@ -1056,14 +1056,14 @@ def run_nvfp4_fused_router( # A tensor: [K_packed, M, L] where K_packed = K/2 (2 elements per byte for FP4) K_packed = K // 2 mat_a = cutlass_torch.from_dlpack(act_nvfp4) - mat_a = cute.mark_layout_dynamic(mat_a) + mat_a = mat_a.mark_layout_dynamic() # SFA tensor: [K_sf, M, L] scale_a = cutlass_torch.from_dlpack(act_sf) - scale_a = cute.mark_layout_dynamic(scale_a) + scale_a = scale_a.mark_layout_dynamic() # e_bias must be a CuTe tensor e_bias_cute = cutlass_torch.from_dlpack(e_bias) - e_bias_cute = cute.mark_layout_dynamic(e_bias_cute) + e_bias_cute = e_bias_cute.mark_layout_dynamic() # Number of experts from e_bias E = e_bias.shape[0] @@ -1072,9 +1072,9 @@ def run_nvfp4_fused_router( out_weights = torch.zeros(M, top_k, dtype=torch.float32, device=device) out_ids = torch.zeros(M, top_k, dtype=torch.int32, device=device) out_w_cute = cutlass_torch.from_dlpack(out_weights) - out_w_cute = cute.mark_layout_dynamic(out_w_cute) + out_w_cute = out_w_cute.mark_layout_dynamic() out_id_cute = cutlass_torch.from_dlpack(out_ids) - out_id_cute = cute.mark_layout_dynamic(out_id_cute) + out_id_cute = out_id_cute.mark_layout_dynamic() # MMA tiler: (128, 128, 64) for decode mma_tiler_mnk = (128, 128, 64) diff --git a/tests/unit/test_fused_router.py b/tests/unit/test_fused_router.py index 974c2e7c..efd3b722 100644 --- a/tests/unit/test_fused_router.py +++ b/tests/unit/test_fused_router.py @@ -102,9 +102,9 @@ def test_fused_router(): # CuTe tensors for A (activation) mat_a = cutlass_torch.from_dlpack(act_nvfp4) - mat_a = cute.mark_layout_dynamic(mat_a) + mat_a = mat_a.mark_layout_dynamic() scale_a = cutlass_torch.from_dlpack(act_sf) - scale_a = cute.mark_layout_dynamic(scale_a) + scale_a = scale_a.mark_layout_dynamic() # CuTe tensors for B (weight) — from gate_lin mat_b = gate_lin._mat_b @@ -112,15 +112,15 @@ def test_fused_router(): # e_bias CuTe tensor e_bias_cute = cutlass_torch.from_dlpack(e_bias) - e_bias_cute = cute.mark_layout_dynamic(e_bias_cute) + e_bias_cute = e_bias_cute.mark_layout_dynamic() # Output buffers out_weights = torch.zeros(M, top_k, dtype=torch.float32, device=device) out_ids = torch.zeros(M, top_k, dtype=torch.int32, device=device) out_w_cute = cutlass_torch.from_dlpack(out_weights) - out_w_cute = cute.mark_layout_dynamic(out_w_cute) + out_w_cute = out_w_cute.mark_layout_dynamic() out_id_cute = cutlass_torch.from_dlpack(out_ids) - out_id_cute = cute.mark_layout_dynamic(out_id_cute) + out_id_cute = out_id_cute.mark_layout_dynamic() kernel = Nvfp4FusedRouterKernel( sf_vec_size=sf_vec_size,