[CPU][Perf] Accelerate Attention head for s390x using vector intrinsics (#34434)

Signed-off-by: Rehan Khan <Rehan.Khan7@ibm.com>
Co-authored-by: Li, Jiang <jiang1.li@intel.com>
This commit is contained in:
R3hankhan
2026-02-24 20:55:39 +05:30
committed by GitHub
parent 0de5333989
commit 34ce0ffd1f
6 changed files with 424 additions and 8 deletions

View File

@@ -16,6 +16,8 @@ torch::Tensor get_scheduler_metadata(
isa = cpu_attention::ISA::VEC16;
} else if (isa_hint == "neon") {
isa = cpu_attention::ISA::NEON;
} else if (isa_hint == "vxe") {
isa = cpu_attention::ISA::VXE;
} else {
TORCH_CHECK(false, "Unsupported CPU attention ISA hint: " + isa_hint);
}
@@ -100,6 +102,8 @@ void cpu_attn_reshape_and_cache(
return cpu_attention::ISA::VEC16;
} else if (isa == "neon") {
return cpu_attention::ISA::NEON;
} else if (isa == "vxe") {
return cpu_attention::ISA::VXE;
} else {
TORCH_CHECK(false, "Invalid ISA type: " + isa);
}