Add logging for cudagraph related info (#29825)
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import dataclasses
|
||||
from collections import Counter
|
||||
from collections.abc import Callable
|
||||
from contextlib import ExitStack
|
||||
from typing import Any
|
||||
@@ -22,6 +23,99 @@ from vllm.utils.torch_utils import weak_ref_tensors
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class CUDAGraphStat:
|
||||
num_unpadded_tokens: int
|
||||
num_padded_tokens: int
|
||||
num_paddings: int
|
||||
runtime_mode: str
|
||||
|
||||
|
||||
class CUDAGraphLogging:
|
||||
"""Aggregate and log cudagraph metrics"""
|
||||
|
||||
COLUMN_HEADERS = [
|
||||
"Unpadded Tokens",
|
||||
"Padded Tokens",
|
||||
"Num Paddings",
|
||||
"Runtime Mode",
|
||||
"Count",
|
||||
]
|
||||
|
||||
def __init__(self, cg_mode: CUDAGraphMode, cg_capture_sizes: list[int] | None):
|
||||
self.reset()
|
||||
self.cg_mode = str(cg_mode)
|
||||
self.cg_capture_sizes = str(cg_capture_sizes or [])
|
||||
|
||||
self.settings_header = (
|
||||
"**CUDAGraph Config Settings:**\n\n"
|
||||
f"- Mode: {self.cg_mode}\n"
|
||||
f"- Capture sizes: {self.cg_capture_sizes}\n\n"
|
||||
"**CUDAGraph Stats:**\n\n"
|
||||
)
|
||||
|
||||
def reset(self):
|
||||
self.stats = []
|
||||
|
||||
def observe(self, cudagraph_stat: CUDAGraphStat):
|
||||
self.stats.append(cudagraph_stat)
|
||||
|
||||
def generate_metric_table(self) -> str:
|
||||
stats_counts = Counter(self.stats)
|
||||
|
||||
# Convert stats to rows of strings, in descending order of observed frequencies
|
||||
rows = []
|
||||
for stat, count in sorted(
|
||||
stats_counts.items(), key=lambda item: item[1], reverse=True
|
||||
):
|
||||
rows.append(
|
||||
[
|
||||
str(stat.num_unpadded_tokens),
|
||||
str(stat.num_padded_tokens),
|
||||
str(stat.num_paddings),
|
||||
stat.runtime_mode,
|
||||
str(count),
|
||||
]
|
||||
)
|
||||
|
||||
# Calculate column widths (max of header and data)
|
||||
col_widths = []
|
||||
for i, header_text in enumerate(self.COLUMN_HEADERS):
|
||||
max_width = len(header_text)
|
||||
for row in rows:
|
||||
max_width = max(max_width, len(row[i]))
|
||||
col_widths.append(max_width)
|
||||
|
||||
table_header_list = [
|
||||
h.ljust(w) for h, w in zip(self.COLUMN_HEADERS, col_widths)
|
||||
]
|
||||
table_header = "| " + " | ".join(table_header_list) + " |\n"
|
||||
|
||||
table_separator = "|" + "|".join("-" * (w + 2) for w in col_widths) + "|\n"
|
||||
|
||||
# Create data rows with proper alignment
|
||||
data_rows = []
|
||||
for row in rows:
|
||||
formatted_row = [
|
||||
str(val).ljust(width) for val, width in zip(row, col_widths)
|
||||
]
|
||||
data_rows.append("| " + " | ".join(formatted_row) + " |")
|
||||
|
||||
return (
|
||||
self.settings_header
|
||||
+ table_header
|
||||
+ table_separator
|
||||
+ "\n".join(data_rows)
|
||||
+ "\n"
|
||||
)
|
||||
|
||||
def log(self, log_fn=logger.info):
|
||||
if not self.stats:
|
||||
return
|
||||
log_fn(self.generate_metric_table())
|
||||
self.reset()
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class CUDAGraphEntry:
|
||||
batch_descriptor: BatchDescriptor
|
||||
|
||||
Reference in New Issue
Block a user