[CPU][Perf] Accelerate Attention head for s390x using vector intrinsics (#34434)

Signed-off-by: Rehan Khan <Rehan.Khan7@ibm.com> Co-authored-by: Li, Jiang <jiang1.li@intel.com>
2026-02-24 20:55:39 +05:30
parent 0de5333989
commit 34ce0ffd1f
6 changed files with 424 additions and 8 deletions
--- a/csrc/cpu/cpu_attn.cpp
+++ b/csrc/cpu/cpu_attn.cpp
@@ -16,6 +16,8 @@ torch::Tensor get_scheduler_metadata(
    isa = cpu_attention::ISA::VEC16;
  } else if (isa_hint == "neon") {
    isa = cpu_attention::ISA::NEON;
+  } else if (isa_hint == "vxe") {
+    isa = cpu_attention::ISA::VXE;
  } else {
    TORCH_CHECK(false, "Unsupported CPU attention ISA hint: " + isa_hint);
  }
@@ -100,6 +102,8 @@ void cpu_attn_reshape_and_cache(
      return cpu_attention::ISA::VEC16;
    } else if (isa == "neon") {
      return cpu_attention::ISA::NEON;
+    } else if (isa == "vxe") {
+      return cpu_attention::ISA::VXE;
    } else {
      TORCH_CHECK(false, "Invalid ISA type: " + isa);
    }