[BUG] Fix async rlhf tests (#35811)
Signed-off-by: ahao-anyscale <ahao@anyscale.com>
This commit is contained in:
@@ -1006,6 +1006,10 @@ class Worker(WorkerBase):
|
||||
load_weights=load_weights_direct,
|
||||
)
|
||||
|
||||
# NCCL broadcast/packed path are asynchronous.
|
||||
# Sync here so the next step uses the new weights.
|
||||
torch.accelerator.synchronize()
|
||||
|
||||
def shutdown(self) -> None:
|
||||
# has_kv_transfer_group can be None during interpreter shutdown.
|
||||
if ensure_kv_transfer_shutdown is not None:
|
||||
|
||||
Reference in New Issue
Block a user