Add logging for cudagraph related info (#29825)

Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
This commit is contained in:
Yong Hoon Shin
2025-12-02 23:01:48 -10:00
committed by GitHub
parent 3a7751485b
commit 69520bc695
9 changed files with 161 additions and 6 deletions

View File

@@ -7,6 +7,7 @@ from collections.abc import Iterable
from typing import Any
from vllm import envs
from vllm.compilation.cuda_graph import CUDAGraphStat
from vllm.config import VllmConfig
from vllm.distributed.ec_transfer.ec_connector.base import (
ECConnectorMetadata,
@@ -1037,6 +1038,7 @@ class Scheduler(SchedulerInterface):
pooler_outputs = model_runner_output.pooler_output
num_nans_in_logits = model_runner_output.num_nans_in_logits
kv_connector_output = model_runner_output.kv_connector_output
cudagraph_stats = model_runner_output.cudagraph_stats
outputs: dict[int, list[EngineCoreOutput]] = defaultdict(list)
spec_decoding_stats: SpecDecodingStats | None = None
@@ -1219,7 +1221,9 @@ class Scheduler(SchedulerInterface):
finished_req_ids.clear()
if (
stats := self.make_stats(spec_decoding_stats, kv_connector_stats)
stats := self.make_stats(
spec_decoding_stats, kv_connector_stats, cudagraph_stats
)
) is not None:
# Return stats to only one of the front-ends.
if (eco := next(iter(engine_core_outputs.values()), None)) is None:
@@ -1420,6 +1424,7 @@ class Scheduler(SchedulerInterface):
self,
spec_decoding_stats: SpecDecodingStats | None = None,
kv_connector_stats: KVConnectorStats | None = None,
cudagraph_stats: CUDAGraphStat | None = None,
) -> SchedulerStats | None:
if not self.log_stats:
return None
@@ -1444,6 +1449,7 @@ class Scheduler(SchedulerInterface):
kv_cache_eviction_events=eviction_events,
spec_decoding_stats=spec_stats,
kv_connector_stats=connector_stats_payload,
cudagraph_stats=cudagraph_stats,
)
def make_spec_decoding_stats(