update directory for cutlass w8a8

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
yewentao256
2025-08-27 21:05:41 +00:00
parent c643e63f98
commit 57f2f26a05
42 changed files with 22 additions and 22 deletions

View File

@@ -0,0 +1,9 @@
#pragma once
#include <torch/all.h>
// 8-bit per-token-group quantization helper used by both FP8 and INT8
void per_token_group_quant_8bit(const torch::Tensor& input,
torch::Tensor& output_q,
torch::Tensor& output_s, int64_t group_size,
double eps, double min_8bit, double max_8bit,
bool scale_ue8m0 = false);