[CPU] Refactor CPU fused MOE (#30531)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
Li, Jiang
2025-12-18 14:36:49 +08:00
committed by GitHub
parent fc2ae6d617
commit e3ab93c896
23 changed files with 1388 additions and 200 deletions

View File

@@ -109,6 +109,25 @@ class MicroGemm<cpu_utils::ISA::VEC, scalar_t> {
void gemm(DEFINE_CPU_MICRO_GEMM_PARAMS) {
TileGemm82<scalar_t>::gemm(CPU_MICRO_GEMM_PARAMS);
}
// Note: pack contiguous weight [output_size, input_size] as contiguous
// packed weight [output_size / 16, input_size, 16]
static void pack_weight(const scalar_t* __restrict__ weight,
scalar_t* __restrict__ packed_weight,
const int32_t output_size, const int32_t input_size) {
TORCH_CHECK_EQ(output_size % 16, 0);
for (int32_t o_idx = 0; o_idx < output_size; ++o_idx) {
const scalar_t* __restrict__ curr_weight = weight + o_idx * input_size;
scalar_t* __restrict__ curr_packed_weight =
packed_weight + (o_idx / 16) * (16 * input_size) + o_idx % 16;
for (int32_t i_idx = 0; i_idx < input_size; ++i_idx) {
*curr_packed_weight = *curr_weight;
curr_packed_weight += 16;
++curr_weight;
}
}
}
};
} // namespace cpu_micro_gemm