[Core] Support async scheduling with uniproc executor (#24219)

Signed-off-by: Nick Hill <nhill@redhat.com>
Signed-off-by: Ronald1995 <ronaldautomobile@163.com>
Co-authored-by: Ronald1995 <ronaldautomobile@163.com>
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
This commit is contained in:
Nick Hill
2025-09-12 16:34:28 -07:00
committed by GitHub
parent 8226dd56bf
commit 4fdd6f5cbf
9 changed files with 103 additions and 55 deletions

View File

@@ -159,6 +159,9 @@ class EngineCore:
self.request_block_hasher = get_request_block_hasher(
block_size, caching_hash_fn)
self.step_fn = (self.step if self.batch_queue is None else
self.step_with_batch_queue)
def _initialize_kv_caches(
self, vllm_config: VllmConfig) -> tuple[int, int, KVCacheConfig]:
start = time.time()
@@ -331,7 +334,8 @@ class EngineCore:
model_executed = False
if self.scheduler.has_requests():
scheduler_output = self.scheduler.schedule()
future = self.model_executor.execute_model(scheduler_output)
future = self.model_executor.execute_model(scheduler_output,
non_block=True)
batch_queue.appendleft(
(future, scheduler_output)) # type: ignore[arg-type]
@@ -534,9 +538,6 @@ class EngineCoreProc(EngineCore):
assert addresses.coordinator_input is not None
logger.info("Waiting for READY message from DP Coordinator...")
self.step_fn = (self.step if self.batch_queue is None else
self.step_with_batch_queue)
# Mark the startup heap as static so that it's ignored by GC.
# Reduces pause times of oldest generation collections.
gc.collect()