[V1] [P/D] Refactor KV Connector Path (#21980)

Signed-off-by: David Ben-David <davidb@pliops.com>
Co-authored-by: David Ben-David <davidb@pliops.com>
This commit is contained in:
David Ben-David
2025-08-03 14:03:40 +03:00
committed by GitHub
parent 24d1dffbeb
commit aefeea0fde
12 changed files with 142 additions and 80 deletions

View File

@@ -30,7 +30,7 @@ from vllm.v1.engine import (EngineCoreEventType, EngineCoreOutput,
EngineCoreOutputs)
from vllm.v1.kv_cache_interface import KVCacheConfig
from vllm.v1.metrics.stats import SchedulerStats
from vllm.v1.outputs import ModelRunnerOutput
from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput
from vllm.v1.request import Request, RequestStatus
from vllm.v1.spec_decode.metrics import SpecDecodingStats
from vllm.v1.structured_output import StructuredOutputManager
@@ -884,7 +884,9 @@ class Scheduler(SchedulerInterface):
self.waiting.remove_requests(stopped_preempted_reqs)
# KV Connector: update state for finished KV Transfers.
self._update_from_kv_xfer_finished(model_runner_output)
if model_runner_output.kv_connector_output:
self._update_from_kv_xfer_finished(
model_runner_output.kv_connector_output)
# Create EngineCoreOutputs for all clients that have requests with
# outputs in this step.
@@ -1128,7 +1130,7 @@ class Scheduler(SchedulerInterface):
return True
def _update_from_kv_xfer_finished(self,
model_runner_output: ModelRunnerOutput):
kv_connector_output: KVConnectorOutput):
"""
KV Connector: update the scheduler state based on the output.
@@ -1139,9 +1141,9 @@ class Scheduler(SchedulerInterface):
scheduler the request during the next step.
"""
# KV Connector:: update recv and send status from last step.
for req_id in (model_runner_output.finished_recving or ()):
for req_id in (kv_connector_output.finished_recving or ()):
logger.debug("Finished recving KV transfer for request %s", req_id)
self.finished_recving_kv_req_ids.add(req_id)
for req_id in (model_runner_output.finished_sending or ()):
for req_id in (kv_connector_output.finished_sending or ()):
logger.debug("Finished sending KV transfer for request %s", req_id)
self._free_blocks(self.requests[req_id])