Add more GPU architectures support (#112)
* Add more GPU architectures support * Update layout.py * Optimize performance, Add SM90 support, Add 1D2D SM100 support * Add fmtlib submodule at commit 553ec11 --------- Co-authored-by: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com>
This commit is contained in:
19
deep_gemm/testing/numeric.py
Normal file
19
deep_gemm/testing/numeric.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import torch
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
def calc_diff(x: torch.Tensor, y: torch.Tensor):
|
||||
x, y = x.double(), y.double()
|
||||
denominator = (x * x + y * y).sum()
|
||||
sim = 2 * (x * y).sum() / denominator
|
||||
return 1 - sim
|
||||
|
||||
|
||||
def count_bytes(*tensors):
|
||||
total = 0
|
||||
for t in tensors:
|
||||
if isinstance(t, (tuple, list)):
|
||||
total += count_bytes(*t)
|
||||
elif t is not None:
|
||||
total += t.numel() * t.element_size()
|
||||
return total
|
||||
Reference in New Issue
Block a user