[Feature] Add iteration level logging and enhance nvtx marker (#31193)

Signed-off-by: Max Hu <maxhu@nvidia.com> Signed-off-by: Max Hu <hyoung2991@gmail.com> Co-authored-by: Max Hu <maxhu@nvidia.com>
2026-01-08 19:13:39 -05:00
parent 11cec296dd
commit 6ebe34d6fa
6 changed files with 137 additions and 9 deletions
--- a/vllm/config/observability.py
+++ b/vllm/config/observability.py
@@ -75,6 +75,12 @@ class ObservabilityConfig:
    enable_mfu_metrics: bool = False
    """Enable Model FLOPs Utilization (MFU) metrics."""

+    enable_logging_iteration_details: bool = False
+    """Enable detailed logging of iteration details.
+    If set, vllm EngineCore will log iteration details
+    This includes number of context/generation requests and tokens
+    and the elapsed cpu time for the iteration."""
+
    @cached_property
    def collect_model_forward_time(self) -> bool:
        """Whether to collect model forward time for the request."""