[Kernel] Update Cutlass fp8 configs (#5144)

Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
Co-authored-by: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com>
This commit is contained in:
Varun Sundar Rabindranath
2024-06-01 14:16:07 +05:30
committed by GitHub
parent 260d119e86
commit f081c3ce4b
4 changed files with 480 additions and 15 deletions

View File

@@ -82,7 +82,7 @@ def cutlass_int8_gemm_helper(m: int,
assert torch.allclose(out, baseline, rtol=1e-1, atol=1e0)
@pytest.mark.parametrize("m", [512, 222, 33, 1])
@pytest.mark.parametrize("m", [512, 222, 100, 33, 1])
@pytest.mark.parametrize("n", [2048, 256, 1024])
@pytest.mark.parametrize("k", [128, 496, 1024])
@pytest.mark.parametrize("per_act_token", [True, False])