[CI] Initial tests for SM100 Blackwell runner (#21877)

Signed-off-by: mgoin <mgoin64@gmail.com>
2025-08-01 19:18:38 -04:00
parent 881e1af43a
commit 88faa466d7
3 changed files with 30 additions and 14 deletions
--- a/tests/kernels/quantization/test_cutlass_scaled_mm.py
+++ b/tests/kernels/quantization/test_cutlass_scaled_mm.py
@@ -559,8 +559,6 @@ def test_cutlass_fp8_group_gemm(num_experts: int, per_act_token: bool,
        m_a_scales = m_g if per_act_token else 1
        n_b_scales = n_g if per_out_ch else 1

-        print("shape:", m_g, n_g, k_g)
-
        # Create group-specific A and B (FP8) and output (FP16/FP32)
        a_g = to_fp8(torch.randn((m_g, k_g), device=device))
        b_g = to_fp8(torch.randn((n_g, k_g), device=device).t())
@@ -639,7 +637,4 @@ def test_cutlass_fp8_group_gemm(num_experts: int, per_act_token: bool,
    for g in range(num_experts):
        baseline = baseline_tensors[g]
        c = out_tensors_stacked[expert_offsets[g]:expert_offsets[g + 1]]
-        print(baseline)
-        print(c)
-        print("*")
        torch.testing.assert_close(c, baseline, rtol=1e-2, atol=5e-4)