* Add more GPU architectures support * Update layout.py * Optimize performance, Add SM90 support, Add 1D2D SM100 support * Add fmtlib submodule at commit 553ec11 --------- Co-authored-by: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com>
20 lines
465 B
Python
20 lines
465 B
Python
import torch
|
|
from typing import Iterable
|
|
|
|
|
|
def calc_diff(x: torch.Tensor, y: torch.Tensor):
|
|
x, y = x.double(), y.double()
|
|
denominator = (x * x + y * y).sum()
|
|
sim = 2 * (x * y).sum() / denominator
|
|
return 1 - sim
|
|
|
|
|
|
def count_bytes(*tensors):
|
|
total = 0
|
|
for t in tensors:
|
|
if isinstance(t, (tuple, list)):
|
|
total += count_bytes(*t)
|
|
elif t is not None:
|
|
total += t.numel() * t.element_size()
|
|
return total
|