[Kernel] Enable fp8 support for pplx and BatchedTritonExperts. (#18864)
Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
@@ -148,8 +148,7 @@ def make_ll_modular_kernel(pg: ProcessGroup, pgi: ProcessGroupInfo,
|
||||
|
||||
fused_experts = BatchedDeepGemmExperts(
|
||||
max_num_tokens=max_tokens_per_rank,
|
||||
world_size=pgi.world_size,
|
||||
dp_size=dp_size,
|
||||
num_dispatchers=pgi.world_size // dp_size,
|
||||
block_shape=test_config.block_size,
|
||||
per_act_token_quant=test_config.per_act_token_quant)
|
||||
mk = FusedMoEModularKernel(prepare_finalize=a2a,
|
||||
|
||||
Reference in New Issue
Block a user