[1/N] Elastic EP Milestone 2 (#34861)
Signed-off-by: Yongji Wu <wuyongji317@gmail.com> Signed-off-by: Itay Alroy <ialroy@nvidia.com> Signed-off-by: Tyler Michael Smith <tlrmchlsmth@gmail.com> Signed-off-by: Ron Tourgeman <rtourgeman@nvidia.com> Co-authored-by: Yongji Wu <wuyongji317@gmail.com> Co-authored-by: Tyler Michael Smith <tlrmchlsmth@gmail.com> Co-authored-by: Ron Tourgeman <rtourgeman@nvidia.com>
This commit is contained in:
@@ -627,6 +627,7 @@ class FusedMoE(CustomOp):
|
||||
moe_quant_params["intermediate_size_full"] = intermediate_size
|
||||
|
||||
self.quant_method.create_weights(layer=self, **moe_quant_params)
|
||||
self.base_quant_method = self.quant_method
|
||||
|
||||
# Disable shared expert overlap if:
|
||||
# - we are using eplb with non-default backend, because of correctness issues
|
||||
@@ -683,7 +684,7 @@ class FusedMoE(CustomOp):
|
||||
# routing_tables only needed for round-robin expert placement with
|
||||
# DeepEP all2all backend.
|
||||
routing_tables = self._maybe_init_expert_routing_tables()
|
||||
prepare_finalize = self.quant_method.maybe_make_prepare_finalize(
|
||||
prepare_finalize = self.base_quant_method.maybe_make_prepare_finalize(
|
||||
routing_tables=routing_tables
|
||||
)
|
||||
if prepare_finalize is not None:
|
||||
@@ -693,7 +694,7 @@ class FusedMoE(CustomOp):
|
||||
self._replace_quant_method(
|
||||
FusedMoEModularMethod.make(
|
||||
self,
|
||||
self.quant_method,
|
||||
self.base_quant_method,
|
||||
prepare_finalize,
|
||||
self.shared_experts,
|
||||
inplace=not self.moe_config.disable_inplace,
|
||||
|
||||
Reference in New Issue
Block a user