[Bugfix] torchrun compatibility (#14899)

Signed-off-by: hiyouga <hiyouga@buaa.edu.cn>
Signed-off-by: youkaichao <youkaichao@gmail.com>
Co-authored-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
hoshi-hiyouga
2025-03-18 20:49:27 +08:00
committed by GitHub
parent db7c8ca910
commit 414919138b
2 changed files with 23 additions and 2 deletions

View File

@@ -904,7 +904,9 @@ class ModelConfig:
else:
total_num_hidden_layers = getattr(self.hf_text_config,
"num_hidden_layers", 0)
pp_rank = parallel_config.rank // parallel_config.tensor_parallel_size
# the layout order is: DP x PP x TP
pp_rank = (parallel_config.rank // parallel_config.tensor_parallel_size
) % parallel_config.pipeline_parallel_size
pp_size = parallel_config.pipeline_parallel_size
start, end = get_pp_indices(total_num_hidden_layers, pp_rank, pp_size)
return start, end