[core][distributed] support n layers % pp size != 0 (#6115)
This commit is contained in:
@@ -50,8 +50,15 @@ def split_tensor_along_last_dim(
|
||||
|
||||
def get_pp_indices(num_hidden_layers: int, pp_rank: int,
|
||||
pp_size: int) -> Tuple[int, int]:
|
||||
layers_per_partition = divide(num_hidden_layers, pp_size)
|
||||
"""Try to evenly distribute layers across partitions.
|
||||
If the number of layers is not divisible by the number of partitions,
|
||||
the last partition will have the remaining layers.
|
||||
"""
|
||||
layers_per_partition = num_hidden_layers // pp_size
|
||||
start_layer = pp_rank * layers_per_partition
|
||||
end_layer = start_layer + layers_per_partition
|
||||
|
||||
if pp_rank == pp_size - 1:
|
||||
end_layer = num_hidden_layers
|
||||
|
||||
return (start_layer, end_layer)
|
||||
|
||||
Reference in New Issue
Block a user