[perf][cpu] Accelerate paged attention GEMMs (QK, PV) on Arm CPUs with NEON (#29193)
Signed-off-by: Fadi Arafeh <fadi.arafeh@arm.com>
This commit is contained in:
@@ -14,7 +14,7 @@
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace cpu_attention {
|
||||
enum class ISA { AMX, VEC, VEC16 };
|
||||
enum class ISA { AMX, VEC, VEC16, NEON };
|
||||
|
||||
template <ISA isa, typename scalar_t, int64_t head_dim>
|
||||
class AttentionImpl {};
|
||||
@@ -143,6 +143,12 @@ struct AttentionMetadata {
|
||||
case ISA::VEC:
|
||||
ss << "VEC, ";
|
||||
break;
|
||||
case ISA::VEC16:
|
||||
ss << "VEC16, ";
|
||||
break;
|
||||
case ISA::NEON:
|
||||
ss << "NEON, ";
|
||||
break;
|
||||
}
|
||||
ss << "workitem_group_num: " << workitem_group_num
|
||||
<< ", reduction_item_num: " << reduction_item_num
|
||||
|
||||
Reference in New Issue
Block a user