[CPU] Refactor CPU fused MOE (#30531)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
@@ -13,6 +13,9 @@ namespace cpu_micro_gemm {
|
||||
#define CPU_MICRO_GEMM_PARAMS \
|
||||
a_ptr, b_ptr, c_ptr, m, k, lda, b_n_group_stride, ldc, accum_c
|
||||
|
||||
// Note: weights for MicroGemm should be packed as (output_size / 16) contiguous
|
||||
// blocks, means the logical shape of blocks is [16, input_size]. And the actual
|
||||
// layout of blocks can be ISA-specific.
|
||||
template <cpu_utils::ISA isa, typename scalar_t>
|
||||
class MicroGemm {
|
||||
public:
|
||||
@@ -86,6 +89,41 @@ FORCE_INLINE void bias_epilogue(float* __restrict__ c_ptr,
|
||||
curr_d += ldd;
|
||||
}
|
||||
}
|
||||
|
||||
template <int32_t n_size, typename scalar_t>
|
||||
FORCE_INLINE void add_bias_epilogue(float* c_ptr, float* d_ptr,
|
||||
scalar_t* __restrict__ bias_ptr,
|
||||
const int32_t m, const int64_t ldc,
|
||||
const int64_t ldd) {
|
||||
using scalar_vec_t = typename cpu_utils::VecTypeTrait<scalar_t>::vec_t;
|
||||
static_assert(n_size % 16 == 0);
|
||||
constexpr int32_t n_group_num = n_size / 16;
|
||||
static_assert(n_group_num <= 16);
|
||||
|
||||
vec_op::FP32Vec16 bias_vecs[n_group_num];
|
||||
scalar_t* __restrict__ curr_bias = bias_ptr;
|
||||
vec_op::unroll_loop<int32_t, n_group_num>([&](int32_t i) {
|
||||
scalar_vec_t vec(curr_bias);
|
||||
bias_vecs[i] = vec_op::FP32Vec16(vec);
|
||||
curr_bias += 16;
|
||||
});
|
||||
|
||||
float* curr_c = c_ptr;
|
||||
float* curr_d = d_ptr;
|
||||
for (int32_t i = 0; i < m; ++i) {
|
||||
float* curr_c_iter = curr_c;
|
||||
float* curr_d_iter = curr_d;
|
||||
vec_op::unroll_loop<int32_t, n_group_num>([&](int32_t n_g_idx) {
|
||||
vec_op::FP32Vec16 c_vec_fp32(curr_c_iter);
|
||||
c_vec_fp32 = c_vec_fp32 + bias_vecs[n_g_idx];
|
||||
c_vec_fp32.save(curr_d_iter);
|
||||
curr_c_iter += 16;
|
||||
curr_d_iter += 16;
|
||||
});
|
||||
curr_c += ldc;
|
||||
curr_d += ldd;
|
||||
}
|
||||
}
|
||||
} // namespace cpu_micro_gemm
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user