[Core] Eliminate parallel worker per-step task scheduling overhead (#4894)

This commit is contained in:
Nick Hill
2024-05-22 14:17:27 -07:00
committed by GitHub
parent 97b030005c
commit eb6d3c264d
12 changed files with 350 additions and 211 deletions

View File

@@ -692,6 +692,14 @@ class LLMEngine:
# Log stats.
self.do_log_stats(scheduler_outputs, output)
if not request_outputs:
# Stop the execute model loop in parallel workers until there are
# more requests to process. This avoids waiting indefinitely in
# torch.distributed ops which may otherwise timeout, and unblocks
# the RPC thread in the workers so that they can process any other
# queued control plane messages, such as add/remove lora adapters.
self.model_executor.stop_remote_worker_execution_loop()
return request_outputs
def do_log_stats(