[KVConnector] Support worker -> scheduler metadata (#31964)

Signed-off-by: Or Ozeri <oro@il.ibm.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
This commit is contained in:
Or Ozeri
2026-03-11 19:36:37 +02:00
committed by GitHub
parent 741f4e046b
commit a1a3523a56
6 changed files with 283 additions and 29 deletions

View File

@@ -14,9 +14,13 @@ from vllm.v1.core.sched.output import SchedulerOutput
if TYPE_CHECKING:
from vllm.distributed.kv_events import KVConnectorKVEvents
from vllm.distributed.kv_transfer.kv_connector.v1.base import (
KVConnectorWorkerMetadata,
)
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
else:
KVConnectorStats = object
KVConnectorWorkerMetadata = object
KVConnectorKVEvents = object
@@ -142,6 +146,7 @@ class KVConnectorOutput:
finished_recving: set[str] | None = None
kv_connector_stats: KVConnectorStats | None = None
kv_cache_events: KVConnectorKVEvents | None = None
kv_connector_worker_meta: KVConnectorWorkerMetadata | None = None
# IDs of externally computed KV blocks that failed to load.
# Requests referencing these blocks should be rescheduled to recompute them
invalid_block_ids: set[int] = field(default_factory=set)
@@ -159,6 +164,7 @@ class KVConnectorOutput:
and not self.kv_connector_stats
and not self.kv_cache_events
and not self.invalid_block_ids
and not self.kv_connector_worker_meta
)
@classmethod

View File

@@ -123,6 +123,7 @@ class KVConnectorModelRunnerMixin:
output.kv_connector_stats = kv_connector.get_kv_connector_stats()
output.kv_cache_events = kv_connector.get_kv_connector_kv_cache_events()
output.kv_connector_worker_meta = kv_connector.build_connector_worker_meta()
if not defer_finalize:
kv_connector.clear_connector_metadata()