[V1] [P/D] Refactor KV Connector Path (#21980)
Signed-off-by: David Ben-David <davidb@pliops.com> Co-authored-by: David Ben-David <davidb@pliops.com>
This commit is contained in:
@@ -30,7 +30,7 @@ from vllm.v1.engine import (EngineCoreEventType, EngineCoreOutput,
|
||||
EngineCoreOutputs)
|
||||
from vllm.v1.kv_cache_interface import KVCacheConfig
|
||||
from vllm.v1.metrics.stats import SchedulerStats
|
||||
from vllm.v1.outputs import ModelRunnerOutput
|
||||
from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput
|
||||
from vllm.v1.request import Request, RequestStatus
|
||||
from vllm.v1.spec_decode.metrics import SpecDecodingStats
|
||||
from vllm.v1.structured_output import StructuredOutputManager
|
||||
@@ -884,7 +884,9 @@ class Scheduler(SchedulerInterface):
|
||||
self.waiting.remove_requests(stopped_preempted_reqs)
|
||||
|
||||
# KV Connector: update state for finished KV Transfers.
|
||||
self._update_from_kv_xfer_finished(model_runner_output)
|
||||
if model_runner_output.kv_connector_output:
|
||||
self._update_from_kv_xfer_finished(
|
||||
model_runner_output.kv_connector_output)
|
||||
|
||||
# Create EngineCoreOutputs for all clients that have requests with
|
||||
# outputs in this step.
|
||||
@@ -1128,7 +1130,7 @@ class Scheduler(SchedulerInterface):
|
||||
return True
|
||||
|
||||
def _update_from_kv_xfer_finished(self,
|
||||
model_runner_output: ModelRunnerOutput):
|
||||
kv_connector_output: KVConnectorOutput):
|
||||
"""
|
||||
KV Connector: update the scheduler state based on the output.
|
||||
|
||||
@@ -1139,9 +1141,9 @@ class Scheduler(SchedulerInterface):
|
||||
scheduler the request during the next step.
|
||||
"""
|
||||
# KV Connector:: update recv and send status from last step.
|
||||
for req_id in (model_runner_output.finished_recving or ()):
|
||||
for req_id in (kv_connector_output.finished_recving or ()):
|
||||
logger.debug("Finished recving KV transfer for request %s", req_id)
|
||||
self.finished_recving_kv_req_ids.add(req_id)
|
||||
for req_id in (model_runner_output.finished_sending or ()):
|
||||
for req_id in (kv_connector_output.finished_sending or ()):
|
||||
logger.debug("Finished sending KV transfer for request %s", req_id)
|
||||
self._free_blocks(self.requests[req_id])
|
||||
|
||||
Reference in New Issue
Block a user