[2/n] Migrate per_token_group_quant to torch stable ABI (#36058)

Signed-off-by: Mikayla Gawarecki <mikaylagawarecki@gmail.com>
2026-03-25 13:15:13 -04:00
parent 1ac2ef2e53
commit bf4cc9ed2d
22 changed files with 207 additions and 133 deletions
--- a/csrc/ops.h
+++ b/csrc/ops.h
@@ -306,25 +306,6 @@ void silu_and_mul_scaled_fp4_experts_quant(
    torch::Tensor const& input_offset_by_experts,
    torch::Tensor const& output_scale_offset_by_experts);

-void per_token_group_quant_fp8(const torch::Tensor& input,
-                               torch::Tensor& output_q, torch::Tensor& output_s,
-                               int64_t group_size, double eps, double fp8_min,
-                               double fp8_max, bool scale_ue8m0,
-                               bool dummy_is_scale_transposed,
-                               bool dummy_is_tma_aligned);
-
-void per_token_group_quant_int8(const torch::Tensor& input,
-                                torch::Tensor& output_q,
-                                torch::Tensor& output_s, int64_t group_size,
-                                double eps, double int8_min, double int8_max);
-
-// Fused activation quantisation + DeepGEMM-compatible UE8M0-packed scales.
-void per_token_group_quant_8bit_packed(const torch::Tensor& input,
-                                       torch::Tensor& output_q,
-                                       torch::Tensor& output_s_packed,
-                                       int64_t group_size, double eps,
-                                       double min_8bit, double max_8bit);
-
 #endif

 void static_scaled_int8_quant(torch::Tensor& out, torch::Tensor const& input,