[V1][Metrics] add support for kv event publishing (#16750)
Signed-off-by: alec-flowers <aflowers@nvidia.com> Signed-off-by: Mark McLoughlin <markmc@redhat.com> Co-authored-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
@@ -1958,6 +1958,8 @@ class SchedulerConfig:
|
||||
some image tokens can be scheduled (like TTTTIIIII, leaving IIIII),
|
||||
it will be scheduled as TTTT in one step and IIIIIIIIII in the next."""
|
||||
|
||||
# scheduler class or path. "vllm.core.scheduler.Scheduler" (default)
|
||||
# or "mod.custom_class".
|
||||
scheduler_cls: Union[str, type[object]] = "vllm.core.scheduler.Scheduler"
|
||||
"""The scheduler class to use. "vllm.core.scheduler.Scheduler" is the
|
||||
default scheduler. Can be a class directly or the path to a class of form
|
||||
@@ -3417,6 +3419,51 @@ class KVTransferConfig(BaseModel):
|
||||
return self.kv_connector_extra_config.get(key, default)
|
||||
|
||||
|
||||
class KVEventsConfig(BaseModel):
|
||||
"""Configuration for KV event publishing."""
|
||||
|
||||
enable_kv_cache_events: bool = False
|
||||
"""If True, enable KV cache events for tracking block storage and removal.
|
||||
Events can be published externally by zmq using the event publisher config.
|
||||
"""
|
||||
|
||||
publisher: str = "null"
|
||||
"""The publisher to use for publishing kv events. Can be "null", "zmq".
|
||||
"""
|
||||
|
||||
endpoint: str = "tcp://*:5557"
|
||||
"""The zmq endpoint to use for publishing kv events.
|
||||
"""
|
||||
|
||||
replay_endpoint: Optional[str] = None
|
||||
"""The zmq endpoint to use for replaying kv events.
|
||||
"""
|
||||
|
||||
buffer_steps: int = 10_000
|
||||
"""The number of steps to cache for replay endpoint. Will only save
|
||||
events from the last N steps for the replay endpoint.
|
||||
"""
|
||||
|
||||
hwm: int = 100_000
|
||||
"""The zmq high water mark for the event publisher. After queueing N events,
|
||||
events will start dropping if the consumer is not keeping up.
|
||||
"""
|
||||
|
||||
max_queue_size: int = 100_000
|
||||
"""The maximum number of events to queue while waiting for publishing.
|
||||
"""
|
||||
|
||||
topic: str = ""
|
||||
"""The topic to use for the event publisher. Consumers can subscribe to
|
||||
this topic to receive events.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def from_cli(cls, cli_value: str) -> "KVEventsConfig":
|
||||
"""Parse the CLI value for the event publisher config."""
|
||||
return KVEventsConfig.model_validate_json(cli_value)
|
||||
|
||||
|
||||
class CompilationLevel:
|
||||
# constants for the levels of the compilation process
|
||||
NO_COMPILATION = 0
|
||||
@@ -3779,6 +3826,7 @@ class VllmConfig:
|
||||
init=True) # type: ignore
|
||||
kv_transfer_config: KVTransferConfig = field(default=None,
|
||||
init=True) # type: ignore
|
||||
kv_events_config: Optional[KVEventsConfig] = None
|
||||
# some opaque config, only used to provide additional information
|
||||
# for the hash computation, mainly used for testing, debugging or out of
|
||||
# tree config registration.
|
||||
@@ -4038,6 +4086,18 @@ class VllmConfig:
|
||||
if self.cache_config is not None:
|
||||
self.cache_config.enable_prefix_caching = False
|
||||
|
||||
if (self.kv_events_config
|
||||
and self.kv_events_config.enable_kv_cache_events
|
||||
and not self.cache_config.enable_prefix_caching):
|
||||
logger.warning(
|
||||
"KV cache events are on, but prefix caching is not enabled."
|
||||
"Use --enable-prefix-caching to enable.")
|
||||
if (self.kv_events_config and self.kv_events_config.publisher != "null"
|
||||
and not self.kv_events_config.enable_kv_cache_events):
|
||||
logger.warning("KV cache events are disabled,"
|
||||
"but the scheduler is configured to publish them."
|
||||
"Modify KVEventsConfig.enable_kv_cache_events"
|
||||
"to True to enable.")
|
||||
current_platform.check_and_update_config(self)
|
||||
|
||||
if not self.instance_id:
|
||||
|
||||
Reference in New Issue
Block a user