[Bugfix] Fix torchrun PP broadcast deadlock with async scheduling (#33701)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
@@ -32,9 +32,6 @@ llm = LLM(
|
||||
gpu_memory_utilization=random.uniform(0.7, 0.9),
|
||||
swap_space=random.randint(1, 4),
|
||||
seed=0,
|
||||
# FIXME(Isotr0py): async scheduling causes deadlock
|
||||
# on torchrun with PP, need to investigate further.
|
||||
async_scheduling=False,
|
||||
)
|
||||
|
||||
outputs = llm.generate(prompts, sampling_params)
|
||||
|
||||
@@ -39,9 +39,6 @@ llm = LLM(
|
||||
gpu_memory_utilization=random.uniform(0.7, 0.9),
|
||||
swap_space=random.randint(1, 4),
|
||||
seed=0,
|
||||
# FIXME(Isotr0py): async scheduling causes deadlock
|
||||
# on torchrun with PP, need to investigate further.
|
||||
async_scheduling=False,
|
||||
)
|
||||
|
||||
outputs = llm.generate(prompts, sampling_params)
|
||||
|
||||
@@ -3666,7 +3666,10 @@ class GPUModelRunner(
|
||||
)
|
||||
if self.use_async_scheduling:
|
||||
pp = get_pp_group()
|
||||
if pp.world_size > 1 and pp.is_last_rank:
|
||||
# For torchrun external_launcher PP mode with broadcast_pp_output=True,
|
||||
# PP outputs have been broadcasted to all ranks at logits computation.
|
||||
# Therefore, here is no need to send sampled token ids again in this case.
|
||||
if not self.broadcast_pp_output and pp.world_size > 1 and pp.is_last_rank:
|
||||
self._pp_broadcast_prev_sampled_token_ids(
|
||||
sampler_output.sampled_token_ids
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user