debug: print tensor dtypes/shapes at C++ call boundary in fp8_nvfp4_mega_moe
This commit is contained in:
@@ -317,6 +317,16 @@ def fp8_nvfp4_mega_moe(y: torch.Tensor,
|
||||
Activation format: E2M1 packed uint8 + UE4M3 scales (computed by staging kernel)
|
||||
Recipe: (1, 1, 16) — kGranK=16 for NVFP4 group_size=16.
|
||||
"""
|
||||
for name, t in [("l1_w", l1_weights), ("l1_w_sf", l1_weights_sf),
|
||||
("l2_w", l2_weights), ("l2_w_sf", l2_weights_sf)]:
|
||||
print(f"[debug] {name}: dtype={t.dtype} shape={tuple(t.shape)} contig={t.is_contiguous()}", flush=True)
|
||||
|
||||
# Also check symm buffer views
|
||||
for name, t in [("sym_x", sym_buffer.x), ("sym_x_sf", sym_buffer.x_sf),
|
||||
("sym_l1_acts", sym_buffer.l1_acts), ("sym_l1_acts_sf", sym_buffer.l1_acts_sf),
|
||||
("sym_l2_acts", sym_buffer.l2_acts), ("sym_l2_acts_sf", sym_buffer.l2_acts_sf)]:
|
||||
print(f"[debug] {name}: dtype={t.dtype} shape={tuple(t.shape)} contig={t.is_contiguous()}", flush=True)
|
||||
|
||||
_C.fp8_nvfp4_mega_moe(
|
||||
y,
|
||||
l1_weights, l2_weights,
|
||||
|
||||
Reference in New Issue
Block a user