[CPU] Add head sizes 80 and 112 with vec16 fallback (#31968)

Signed-off-by: Rehan Khan <Rehan.Khan7@ibm.com>
2026-01-09 19:44:46 +05:30
parent 7cdf7e2fe0
commit 8e27663b6a
4 changed files with 12 additions and 5 deletions
--- a/csrc/cpu/cpu_attn_neon.hpp
+++ b/csrc/cpu/cpu_attn_neon.hpp
@@ -264,7 +264,7 @@ class AttentionImpl<ISA::NEON, scalar_t, head_dim> {
  constexpr static ISA ISAType = ISA::NEON;
  constexpr static bool scale_on_logits = false;  // apply scale on q_buffer

-  static_assert(HeadDim % HeadDimAlignment == 0);
+  //  static_assert(HeadDim % HeadDimAlignment == 0);
  // the gemm micro kernel is Mx8
  static_assert(HeadDimAlignment % 8 == 0);
  static_assert(BlockSizeAlignment % 8 == 0);