[Core] Pipeline Parallel Support (#4412)

Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai>
This commit is contained in:
Murali Andoorveedu
2024-07-02 10:58:08 -07:00
committed by GitHub
parent 15aba081f3
commit c5832d2ae9
82 changed files with 1096 additions and 400 deletions

View File

@@ -5,6 +5,7 @@ import pytest
import torch
from vllm import SamplingParams
from vllm.config import ParallelConfig
from vllm.engine.async_llm_engine import AsyncEngineArgs, AsyncLLMEngine
from ..utils import wait_for_gpu_memory_to_clear
@@ -23,8 +24,11 @@ class MockEngine:
self.add_request_calls = 0
self.abort_request_calls = 0
self.request_id = None
# Ugly, remove dependency when possible
self.parallel_config = ParallelConfig(1, 1, False)
async def step_async(self):
async def step_async(self, virtual_engine):
# PP size is 1, ignore virtual engine
self.step_calls += 1
return [RequestOutput(
request_id=self.request_id)] if self.request_id else []
@@ -32,6 +36,9 @@ class MockEngine:
async def process_model_inputs_async(self, *args, **kwargs):
pass
async def stop_remote_worker_execution_loop_async(self):
pass
def generate(self, request_id):
self.request_id = request_id
@@ -41,6 +48,7 @@ class MockEngine:
def add_request(self, **kwargs):
del kwargs # Unused
self.add_request_calls += 1
print(f'Request calls: {self.add_request_calls}')
async def add_request_async(self, **kwargs):
self.add_request_calls += 1
@@ -53,6 +61,9 @@ class MockEngine:
def has_unfinished_requests(self):
return self.request_id is not None
def has_unfinished_requests_for_virtual_engine(self, virtual_engine):
return self.request_id is not None
class MockAsyncLLMEngine(AsyncLLMEngine):
@@ -76,6 +87,7 @@ async def test_new_requests_event():
engine.engine.generate("2")
await asyncio.sleep(0)
await asyncio.sleep(0)
await asyncio.sleep(0)
assert engine.engine.add_request_calls == 2
assert engine.engine.step_calls >= 2
await asyncio.sleep(0.001)