[Docs] improve code formatting and comments for eliminate griffe build warning. (#25010)
Signed-off-by: samzong <samzong.lu@gmail.com>
This commit is contained in:
@@ -139,7 +139,7 @@ async def get_request(
|
|||||||
A lower burstiness value (0 < burstiness < 1) results
|
A lower burstiness value (0 < burstiness < 1) results
|
||||||
in more bursty requests, while a higher burstiness value
|
in more bursty requests, while a higher burstiness value
|
||||||
(burstiness > 1) results in a more uniform arrival of requests.
|
(burstiness > 1) results in a more uniform arrival of requests.
|
||||||
ramp_up_strategy (optional):
|
ramp_up_strategy (optional):
|
||||||
The ramp-up strategy. Can be "linear" or "exponential".
|
The ramp-up strategy. Can be "linear" or "exponential".
|
||||||
If None, uses constant request rate (specified by request_rate).
|
If None, uses constant request rate (specified by request_rate).
|
||||||
ramp_up_start_rps (optional):
|
ramp_up_start_rps (optional):
|
||||||
|
|||||||
@@ -337,11 +337,12 @@ class EplbState:
|
|||||||
Args:
|
Args:
|
||||||
model (MixtureOfExperts): The MoE model.
|
model (MixtureOfExperts): The MoE model.
|
||||||
is_dummy (bool): If `True`, this is a dummy step and the load
|
is_dummy (bool): If `True`, this is a dummy step and the load
|
||||||
metrics recorded in this forward pass will not count. Defaults
|
metrics recorded in this forward pass will not count.
|
||||||
to `False`.
|
Defaults to `False`.
|
||||||
is_profile (bool): If `True`, perform a dummy rearrangement
|
is_profile (bool): If `True`, perform a dummy rearrangement
|
||||||
with maximum communication cost. This is used in `profile_run`
|
with maximum communication cost. This is used in
|
||||||
to reserve enough memory for the communication buffer.
|
`profile_run` to reserve enough memory
|
||||||
|
for the communication buffer.
|
||||||
log_stats (bool): If `True`, log the expert load metrics.
|
log_stats (bool): If `True`, log the expert load metrics.
|
||||||
|
|
||||||
# Stats
|
# Stats
|
||||||
|
|||||||
@@ -109,13 +109,16 @@ def rebalance_experts_hierarchical(
|
|||||||
num_physical_experts: number of physical experts after replication
|
num_physical_experts: number of physical experts after replication
|
||||||
num_groups: number of expert groups
|
num_groups: number of expert groups
|
||||||
num_nodes: number of server nodes, where the intra-node network
|
num_nodes: number of server nodes, where the intra-node network
|
||||||
(e.g, NVLink) is faster
|
(e.g., NVLink) is faster
|
||||||
num_gpus: number of GPUs, must be a multiple of `num_nodes`
|
num_gpus: number of GPUs, must be a multiple of `num_nodes`
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
physical_to_logical_map: [num_moe_layers, num_physical_experts]
|
physical_to_logical_map (torch.Tensor):
|
||||||
logical_to_physical_map: [num_moe_layers, num_logical_experts, X]
|
[num_moe_layers, num_physical_experts]
|
||||||
logical_count: [num_moe_layers, num_logical_experts]
|
logical_to_physical_map (torch.Tensor):
|
||||||
|
[num_moe_layers, num_logical_experts, X]
|
||||||
|
logical_count (torch.Tensor):
|
||||||
|
[num_moe_layers, num_logical_experts]
|
||||||
"""
|
"""
|
||||||
num_layers, num_logical_experts = weight.shape
|
num_layers, num_logical_experts = weight.shape
|
||||||
assert num_logical_experts % num_groups == 0
|
assert num_logical_experts % num_groups == 0
|
||||||
@@ -197,11 +200,13 @@ def rebalance_experts(
|
|||||||
num_gpus: number of GPUs, must be a multiple of `num_nodes`
|
num_gpus: number of GPUs, must be a multiple of `num_nodes`
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
physical_to_logical_map: [layers, num_replicas], the expert index of
|
physical_to_logical_map:
|
||||||
each replica
|
[layers, num_replicas], the expert index of each replica
|
||||||
logical_to_physical_map: [layers, num_logical_experts, X], the replica
|
logical_to_physical_map:
|
||||||
indices for each expert
|
[layers, num_logical_experts, X], the replica indices for each
|
||||||
expert_count: [layers, num_logical_experts], number of physical
|
expert
|
||||||
|
expert_count:
|
||||||
|
[layers, num_logical_experts], number of physical
|
||||||
replicas for each logical expert
|
replicas for each logical expert
|
||||||
"""
|
"""
|
||||||
num_layers, num_logical_experts = weight.shape
|
num_layers, num_logical_experts = weight.shape
|
||||||
|
|||||||
Reference in New Issue
Block a user