[Feature][CPU Backend]: Optimize ARM vectorization backend (#30329)

Signed-off-by: Radu Salavat <radu.salavat@arm.com>
This commit is contained in:
Radu Salavat
2026-02-03 04:17:56 +00:00
committed by GitHub
parent 5eac9a1b34
commit e69c990c21
5 changed files with 579 additions and 624 deletions

View File

@@ -38,9 +38,7 @@ struct KernelVecType<c10::BFloat16> {
using qk_vec_type = vec_op::BF16Vec32;
using v_load_vec_type = vec_op::BF16Vec16;
};
#elif defined(__aarch64__) && !defined(ARM_BF16_SUPPORT)
// pass
#else
#elif defined(__aarch64__)
template <>
struct KernelVecType<c10::BFloat16> {
using qk_load_vec_type = vec_op::BF16Vec16;