[core][distributed] support n layers % pp size != 0 (#6115)

2024-07-03 16:40:31 -07:00
parent 966fe72141
commit 3de6e6a30e
7 changed files with 19 additions and 10 deletions
--- a/vllm/distributed/utils.py
+++ b/vllm/distributed/utils.py
@@ -50,8 +50,15 @@ def split_tensor_along_last_dim(

 def get_pp_indices(num_hidden_layers: int, pp_rank: int,
                   pp_size: int) -> Tuple[int, int]:
-    layers_per_partition = divide(num_hidden_layers, pp_size)
+    """Try to evenly distribute layers across partitions.
+    If the number of layers is not divisible by the number of partitions,
+    the last partition will have the remaining layers.
+    """
+    layers_per_partition = num_hidden_layers // pp_size
    start_layer = pp_rank * layers_per_partition
    end_layer = start_layer + layers_per_partition

+    if pp_rank == pp_size - 1:
+        end_layer = num_hidden_layers
+
    return (start_layer, end_layer)