[Model][Quantization] HQQ support through Marlin kernel expansion (#9766)

Signed-off-by: ElizaWszola <eliza@neuralmagic.com>
This commit is contained in:
ElizaWszola
2024-11-19 22:31:12 +01:00
committed by GitHub
parent efa9084628
commit b00b33d77e
11 changed files with 632 additions and 89 deletions

View File

@@ -210,7 +210,8 @@ def marlin_create_bench_fn(bt: BenchmarkTensors) -> Callable:
size_m=bt.a.shape[0],
size_n=bt.w_ref.shape[1],
size_k=bt.w_ref.shape[0],
is_k_full=True)
is_k_full=True,
is_zp_float=False)
else:
assert bt.a.dtype == torch.int8
assert bt.wtype == scalar_types.uint4b8