[MoE Refactor] Oracle Select FP8+NVFP4 Kernels In Priority (#32414)
This commit is contained in:
@@ -875,6 +875,7 @@ class GPTQMarlinMoEMethod(FusedMoEMethodBase):
|
||||
return BatchedMarlinExperts(
|
||||
max_num_tokens=max_num_tokens_per_rank,
|
||||
num_dispatchers=prepare_finalize.num_dispatchers(),
|
||||
moe_config=self.moe,
|
||||
quant_config=self.moe_quant_config,
|
||||
w13_g_idx=w13_g_idx,
|
||||
w2_g_idx=w2_g_idx,
|
||||
@@ -885,6 +886,7 @@ class GPTQMarlinMoEMethod(FusedMoEMethodBase):
|
||||
else:
|
||||
# Standard Marlin experts for GPTQ
|
||||
return MarlinExperts(
|
||||
moe_config=self.moe,
|
||||
quant_config=self.moe_quant_config,
|
||||
w13_g_idx=w13_g_idx,
|
||||
w2_g_idx=w2_g_idx,
|
||||
|
||||
Reference in New Issue
Block a user