[Kernel] Tuned int8 Cutlass Kernels for SM75 (T4) (#6996)
Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
This commit is contained in:
committed by
GitHub
parent
93548eb37e
commit
35e9c12bfa
@@ -112,13 +112,20 @@ def bench_int8(dtype: torch.dtype, m: int, k: int, n: int, label: str,
|
||||
scale_b = torch.tensor(1.0, device="cuda", dtype=torch.float32)
|
||||
|
||||
timers = []
|
||||
# pytorch impl
|
||||
# pytorch impl - bfloat16
|
||||
timers.append(
|
||||
bench_fn(a.to(dtype=torch.bfloat16, device="cuda"),
|
||||
b.to(dtype=torch.bfloat16, device="cuda"), scale_a, scale_b,
|
||||
torch.bfloat16, label, sub_label, pytorch_mm_impl,
|
||||
"pytorch_bf16_bf16_bf16_matmul-no-scales"))
|
||||
|
||||
# pytorch impl - float16
|
||||
timers.append(
|
||||
bench_fn(a.to(dtype=torch.float16, device="cuda"),
|
||||
b.to(dtype=torch.float16, device="cuda"), scale_a, scale_b,
|
||||
torch.float16, label, sub_label, pytorch_mm_impl,
|
||||
"pytorch_fp16_fp16_fp16_matmul-no-scales"))
|
||||
|
||||
# cutlass impl
|
||||
timers.append(
|
||||
bench_fn(a, b, scale_a, scale_b, torch.bfloat16, label, sub_label,
|
||||
|
||||
Reference in New Issue
Block a user