fix: use randint for float4 dummy weights in cudagraph test
This commit is contained in:
@@ -106,15 +106,18 @@ def make_dummy_runner(num_experts=32, hidden_size=7168, intermediate_size=3072,
|
||||
runner = CuTeDSLMoERunner(num_experts, hidden_size, intermediate_size, device=device)
|
||||
|
||||
# Create minimal dummy weights
|
||||
l1_fp4 = [torch.randn(3584, intermediate_size * 2 // 2, dtype=torch.float4_e2m1fn_x2, device=device)
|
||||
for _ in range(num_experts)]
|
||||
l1_sf = [torch.randn(3584 // 16, intermediate_size * 2, dtype=torch.float8_e4m3fn, device=device)
|
||||
for _ in range(num_experts)]
|
||||
# Create minimal dummy weights (uint8 → view as float4)
|
||||
def rand_fp4(*shape, device="cuda"):
|
||||
return torch.randint(0, 256, shape, dtype=torch.uint8, device=device).view(torch.float4_e2m1fn_x2)
|
||||
|
||||
def rand_sf(*shape, device="cuda"):
|
||||
return torch.rand(shape, dtype=torch.float8_e4m3fn, device=device)
|
||||
|
||||
l1_fp4 = [rand_fp4(3584, intermediate_size, device=device) for _ in range(num_experts)]
|
||||
l1_sf = [rand_sf(3584 // 16, intermediate_size * 2, device=device) for _ in range(num_experts)]
|
||||
l1_gs = [0.1] * num_experts
|
||||
l2_fp4 = [torch.randn(1536, hidden_size // 2, dtype=torch.float4_e2m1fn_x2, device=device)
|
||||
for _ in range(num_experts)]
|
||||
l2_sf = [torch.randn(1536 // 16, hidden_size, dtype=torch.float8_e4m3fn, device=device)
|
||||
for _ in range(num_experts)]
|
||||
l2_fp4 = [rand_fp4(1536, hidden_size // 2, device=device) for _ in range(num_experts)]
|
||||
l2_sf = [rand_sf(1536 // 16, hidden_size, device=device) for _ in range(num_experts)]
|
||||
l2_gs = [0.1] * num_experts
|
||||
|
||||
runner.prepare_weights_direct(l1_fp4, l1_sf, l1_gs, l2_fp4, l2_sf, l2_gs)
|
||||
|
||||
Reference in New Issue
Block a user