[V1] AsyncLLM data parallel (#13923)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@@ -28,6 +28,7 @@ Multi-node:
|
||||
--master-port=13345
|
||||
"""
|
||||
import os
|
||||
from time import sleep
|
||||
|
||||
from vllm import LLM, SamplingParams
|
||||
from vllm.utils import get_open_port
|
||||
@@ -36,14 +37,13 @@ from vllm.utils import get_open_port
|
||||
def main(model, dp_size, local_dp_rank, global_dp_rank, dp_master_ip,
|
||||
dp_master_port, GPUs_per_dp_rank):
|
||||
os.environ["VLLM_DP_RANK"] = str(global_dp_rank)
|
||||
os.environ["VLLM_DP_RANK_LOCAL"] = str(local_dp_rank)
|
||||
os.environ["VLLM_DP_SIZE"] = str(dp_size)
|
||||
os.environ["VLLM_DP_MASTER_IP"] = dp_master_ip
|
||||
os.environ["VLLM_DP_MASTER_PORT"] = str(dp_master_port)
|
||||
# set devices for each dp_rank
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
|
||||
str(i)
|
||||
for i in range(local_dp_rank * GPUs_per_dp_rank, (local_dp_rank + 1) *
|
||||
GPUs_per_dp_rank))
|
||||
|
||||
# CUDA_VISIBLE_DEVICES for each DP rank is set automatically inside the
|
||||
# engine processes.
|
||||
|
||||
# Sample prompts.
|
||||
prompts = [
|
||||
@@ -90,6 +90,9 @@ def main(model, dp_size, local_dp_rank, global_dp_rank, dp_master_ip,
|
||||
print(f"DP rank {global_dp_rank}, Prompt: {prompt!r}, "
|
||||
f"Generated text: {generated_text!r}")
|
||||
|
||||
# Give engines time to pause their processing loops before exiting.
|
||||
sleep(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
@@ -152,8 +155,13 @@ if __name__ == "__main__":
|
||||
procs.append(proc)
|
||||
exit_code = 0
|
||||
for proc in procs:
|
||||
proc.join()
|
||||
if proc.exitcode:
|
||||
proc.join(timeout=300)
|
||||
if proc.exitcode is None:
|
||||
print(f"Killing process {proc.pid} that "
|
||||
f"didn't stop within 5 minutes.")
|
||||
proc.kill()
|
||||
exit_code = 1
|
||||
elif proc.exitcode:
|
||||
exit_code = proc.exitcode
|
||||
|
||||
exit(exit_code)
|
||||
|
||||
Reference in New Issue
Block a user