[Kernel] Update Cutlass fp8 configs (#5144)

Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com>
2024-06-01 14:16:07 +05:30
parent 260d119e86
commit f081c3ce4b
4 changed files with 480 additions and 15 deletions
--- a/tests/kernels/test_cutlass.py
+++ b/tests/kernels/test_cutlass.py
@@ -82,7 +82,7 @@ def cutlass_int8_gemm_helper(m: int,
    assert torch.allclose(out, baseline, rtol=1e-1, atol=1e0)


-@pytest.mark.parametrize("m", [512, 222, 33, 1])
+@pytest.mark.parametrize("m", [512, 222, 100, 33, 1])
@pytest.mark.parametrize("n", [2048, 256, 1024])
@pytest.mark.parametrize("k", [128, 496, 1024])
@pytest.mark.parametrize("per_act_token", [True, False])