[V1][Metrics] add support for kv event publishing (#16750)

Signed-off-by: alec-flowers <aflowers@nvidia.com>
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Co-authored-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
Alec
2025-04-30 16:44:45 +02:00
committed by GitHub
parent 77073c77bc
commit 0be6d05b5e
15 changed files with 1185 additions and 53 deletions

View File

@@ -1958,6 +1958,8 @@ class SchedulerConfig:
some image tokens can be scheduled (like TTTTIIIII, leaving IIIII),
it will be scheduled as TTTT in one step and IIIIIIIIII in the next."""
# scheduler class or path. "vllm.core.scheduler.Scheduler" (default)
# or "mod.custom_class".
scheduler_cls: Union[str, type[object]] = "vllm.core.scheduler.Scheduler"
"""The scheduler class to use. "vllm.core.scheduler.Scheduler" is the
default scheduler. Can be a class directly or the path to a class of form
@@ -3417,6 +3419,51 @@ class KVTransferConfig(BaseModel):
return self.kv_connector_extra_config.get(key, default)
class KVEventsConfig(BaseModel):
"""Configuration for KV event publishing."""
enable_kv_cache_events: bool = False
"""If True, enable KV cache events for tracking block storage and removal.
Events can be published externally by zmq using the event publisher config.
"""
publisher: str = "null"
"""The publisher to use for publishing kv events. Can be "null", "zmq".
"""
endpoint: str = "tcp://*:5557"
"""The zmq endpoint to use for publishing kv events.
"""
replay_endpoint: Optional[str] = None
"""The zmq endpoint to use for replaying kv events.
"""
buffer_steps: int = 10_000
"""The number of steps to cache for replay endpoint. Will only save
events from the last N steps for the replay endpoint.
"""
hwm: int = 100_000
"""The zmq high water mark for the event publisher. After queueing N events,
events will start dropping if the consumer is not keeping up.
"""
max_queue_size: int = 100_000
"""The maximum number of events to queue while waiting for publishing.
"""
topic: str = ""
"""The topic to use for the event publisher. Consumers can subscribe to
this topic to receive events.
"""
@classmethod
def from_cli(cls, cli_value: str) -> "KVEventsConfig":
"""Parse the CLI value for the event publisher config."""
return KVEventsConfig.model_validate_json(cli_value)
class CompilationLevel:
# constants for the levels of the compilation process
NO_COMPILATION = 0
@@ -3779,6 +3826,7 @@ class VllmConfig:
init=True) # type: ignore
kv_transfer_config: KVTransferConfig = field(default=None,
init=True) # type: ignore
kv_events_config: Optional[KVEventsConfig] = None
# some opaque config, only used to provide additional information
# for the hash computation, mainly used for testing, debugging or out of
# tree config registration.
@@ -4038,6 +4086,18 @@ class VllmConfig:
if self.cache_config is not None:
self.cache_config.enable_prefix_caching = False
if (self.kv_events_config
and self.kv_events_config.enable_kv_cache_events
and not self.cache_config.enable_prefix_caching):
logger.warning(
"KV cache events are on, but prefix caching is not enabled."
"Use --enable-prefix-caching to enable.")
if (self.kv_events_config and self.kv_events_config.publisher != "null"
and not self.kv_events_config.enable_kv_cache_events):
logger.warning("KV cache events are disabled,"
"but the scheduler is configured to publish them."
"Modify KVEventsConfig.enable_kv_cache_events"
"to True to enable.")
current_platform.check_and_update_config(self)
if not self.instance_id: