[Metrics] Log multi-modal cache stats and fix reset (#26285)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-10-10 16:45:55 +08:00
committed by GitHub
parent 6f0f570c43
commit ad430a67ca
25 changed files with 586 additions and 235 deletions

View File

@@ -17,7 +17,7 @@ from vllm.sampling_params import RequestOutputKind
from vllm.v1.engine.async_llm import AsyncLLM
from vllm.v1.engine.core_client import DPAsyncMPClient
from vllm.v1.metrics.loggers import StatLoggerBase
from vllm.v1.metrics.stats import IterationStats, SchedulerStats
from vllm.v1.metrics.stats import IterationStats, MultiModalCacheStats, SchedulerStats
DP_SIZE = int(os.getenv("DP_SIZE", 2))
@@ -93,6 +93,7 @@ async def test_load(
self,
scheduler_stats: Optional[SchedulerStats],
iteration_stats: Optional[IterationStats],
mm_cache_stats: Optional[MultiModalCacheStats] = None,
engine_idx: int = 0,
):
if iteration_stats: