[V1] Support DP with Ray (#18779)

This commit is contained in:
Rui Qiao
2025-06-02 21:15:13 -07:00
committed by GitHub
parent 9e6f61e8c3
commit bdce64f236
10 changed files with 551 additions and 120 deletions

View File

@@ -59,14 +59,22 @@ async def generate(engine: AsyncLLM,
@pytest.mark.parametrize(
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
"output_kind",
[
RequestOutputKind.DELTA,
RequestOutputKind.FINAL_ONLY,
],
)
@pytest.mark.parametrize("data_parallel_backend", ["mp", "ray"])
@pytest.mark.asyncio
async def test_load(output_kind: RequestOutputKind):
async def test_load(output_kind: RequestOutputKind,
data_parallel_backend: str):
with ExitStack() as after:
prompt = "This is a test of data parallel"
engine_args.data_parallel_backend = data_parallel_backend
engine = AsyncLLM.from_engine_args(engine_args)
after.callback(engine.shutdown)
@@ -82,7 +90,6 @@ async def test_load(output_kind: RequestOutputKind):
asyncio.create_task(
generate(engine, request_id, prompt, output_kind,
NUM_EXPECTED_TOKENS)))
# Confirm that we got all the EXPECTED tokens from the requests.
done, pending = await asyncio.wait(tasks,
return_when=asyncio.FIRST_EXCEPTION)