[2/n] Migrate per_token_group_quant to torch stable ABI (#36058)
Signed-off-by: Mikayla Gawarecki <mikaylagawarecki@gmail.com>
This commit is contained in:
@@ -1,12 +0,0 @@
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <torch/all.h>
|
||||
|
||||
#include "quantization/w8a8/per_token_group_quant_8bit.h"
|
||||
|
||||
void per_token_group_quant_int8(const torch::Tensor& input,
|
||||
torch::Tensor& output_q,
|
||||
torch::Tensor& output_s, int64_t group_size,
|
||||
double eps, double int8_min, double int8_max) {
|
||||
per_token_group_quant_8bit(input, output_q, output_s, group_size, eps,
|
||||
int8_min, int8_max);
|
||||
}
|
||||
@@ -5,7 +5,7 @@
|
||||
#include <cmath>
|
||||
|
||||
#include "dispatch_utils.h"
|
||||
#include "quantization/vectorization_utils.cuh"
|
||||
#include "libtorch_stable/quantization/vectorization_utils.cuh"
|
||||
#include "cub_helpers.h"
|
||||
|
||||
static inline __device__ int8_t float_to_int8_rn(float x) {
|
||||
|
||||
Reference in New Issue
Block a user