[Core] Support async scheduling with uniproc executor (#24219)
Signed-off-by: Nick Hill <nhill@redhat.com> Signed-off-by: Ronald1995 <ronaldautomobile@163.com> Co-authored-by: Ronald1995 <ronaldautomobile@163.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
This commit is contained in:
@@ -159,6 +159,9 @@ class EngineCore:
|
||||
self.request_block_hasher = get_request_block_hasher(
|
||||
block_size, caching_hash_fn)
|
||||
|
||||
self.step_fn = (self.step if self.batch_queue is None else
|
||||
self.step_with_batch_queue)
|
||||
|
||||
def _initialize_kv_caches(
|
||||
self, vllm_config: VllmConfig) -> tuple[int, int, KVCacheConfig]:
|
||||
start = time.time()
|
||||
@@ -331,7 +334,8 @@ class EngineCore:
|
||||
model_executed = False
|
||||
if self.scheduler.has_requests():
|
||||
scheduler_output = self.scheduler.schedule()
|
||||
future = self.model_executor.execute_model(scheduler_output)
|
||||
future = self.model_executor.execute_model(scheduler_output,
|
||||
non_block=True)
|
||||
batch_queue.appendleft(
|
||||
(future, scheduler_output)) # type: ignore[arg-type]
|
||||
|
||||
@@ -534,9 +538,6 @@ class EngineCoreProc(EngineCore):
|
||||
assert addresses.coordinator_input is not None
|
||||
logger.info("Waiting for READY message from DP Coordinator...")
|
||||
|
||||
self.step_fn = (self.step if self.batch_queue is None else
|
||||
self.step_with_batch_queue)
|
||||
|
||||
# Mark the startup heap as static so that it's ignored by GC.
|
||||
# Reduces pause times of oldest generation collections.
|
||||
gc.collect()
|
||||
|
||||
Reference in New Issue
Block a user