[Kernel] Enable fp8 support for pplx and BatchedTritonExperts. (#18864)

Signed-off-by: Bill Nell <bnell@redhat.com>
2025-07-03 17:55:40 -04:00
parent 2f2fcb31b8
commit 78fe77534b
25 changed files with 1277 additions and 663 deletions
--- a/tests/kernels/quant_utils.py
+++ b/tests/kernels/quant_utils.py
@@ -277,6 +277,24 @@ def dequant(
        return t.to(out_dtype)


+def batched_dequant(
+    t: torch.Tensor,
+    scale: Optional[torch.Tensor],
+    block_shape: Optional[list[int]],
+    per_act_token_quant: bool,
+    out_dtype: Optional[torch.dtype] = torch.float32,
+) -> torch.Tensor:
+    if scale is not None:
+        assert t.shape[0] == scale.shape[0]
+        out = torch.empty_like(t, dtype=out_dtype)
+        for e in range(t.shape[0]):
+            out[e] = dequant(t[e], scale[e], block_shape, per_act_token_quant,
+                             out_dtype)
+        return out
+
+    return t.to(out_dtype)
+
+
 def native_batched_masked_quant_matmul(
    A: torch.Tensor,
    B: torch.Tensor,