monitor metrics of tokens per step using cudagraph batchsizes (#11031)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao
2024-12-09 22:35:36 -08:00
committed by GitHub
parent 28b3a1c7e5
commit ebf778061d
4 changed files with 23 additions and 13 deletions

View File

@@ -232,6 +232,7 @@ class LLMEngine:
use_cached_outputs: bool = False,
) -> None:
self.vllm_config = vllm_config
self.model_config = vllm_config.model_config
self.cache_config = vllm_config.cache_config
self.lora_config = vllm_config.lora_config
@@ -385,13 +386,14 @@ class LLMEngine:
self.stat_loggers = {
"logging":
LoggingStatLogger(
local_interval=_LOCAL_LOGGING_INTERVAL_SEC),
local_interval=_LOCAL_LOGGING_INTERVAL_SEC,
vllm_config=vllm_config),
"prometheus":
PrometheusStatLogger(
local_interval=_LOCAL_LOGGING_INTERVAL_SEC,
labels=dict(
model_name=self.model_config.served_model_name),
max_model_len=self.model_config.max_model_len),
vllm_config=vllm_config),
}
self.stat_loggers["prometheus"].info("cache_config",
self.cache_config)