[torch.compile] Enable attention and allreduce fusion without custom ops enabled (#24604)

Signed-off-by: Luka Govedič <lgovedic@redhat.com> Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
2025-10-17 10:10:23 -04:00
parent be429d0cfd
commit bd7157a071
28 changed files with 1519 additions and 721 deletions
--- a/tests/kernels/quant_utils.py
+++ b/tests/kernels/quant_utils.py
@@ -103,7 +103,7 @@ def ref_dynamic_per_tensor_fp8_quant(
        .clamp(fp8_traits_min, fp8_traits_max)
        .to(FP8_DTYPE)
    )
-    return ref_out, ref_scale.view((1,))
+    return ref_out, ref_scale.view((1, 1))


 def native_w8a8_block_matmul(