[Metrics] Model FLOPs Utilization estimation (#30738)

Signed-off-by: SungMinCho <tjdals4565@gmail.com>
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Co-authored-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
SungMinCho
2025-12-17 17:40:51 -08:00
committed by GitHub
parent ed2897f336
commit a0b782f9cc
8 changed files with 2186 additions and 2 deletions

View File

@@ -523,6 +523,7 @@ class EngineArgs:
enable_layerwise_nvtx_tracing: bool = (
ObservabilityConfig.enable_layerwise_nvtx_tracing
)
enable_mfu_metrics: bool = ObservabilityConfig.enable_mfu_metrics
scheduling_policy: SchedulerPolicy = SchedulerConfig.policy
scheduler_cls: str | type[object] | None = SchedulerConfig.scheduler_cls
@@ -1042,6 +1043,10 @@ class EngineArgs:
"--enable-layerwise-nvtx-tracing",
**observability_kwargs["enable_layerwise_nvtx_tracing"],
)
observability_group.add_argument(
"--enable-mfu-metrics",
**observability_kwargs["enable_mfu_metrics"],
)
# Scheduler arguments
scheduler_kwargs = get_kwargs(SchedulerConfig)
@@ -1689,6 +1694,7 @@ class EngineArgs:
kv_cache_metrics_sample=self.kv_cache_metrics_sample,
cudagraph_metrics=self.cudagraph_metrics,
enable_layerwise_nvtx_tracing=self.enable_layerwise_nvtx_tracing,
enable_mfu_metrics=self.enable_mfu_metrics,
)
# Compilation config overrides