[V1] Move usage stats to worker and start logging TPU hardware (#16211)

This commit is contained in:
Daniel Li
2025-04-25 13:06:01 -07:00
committed by GitHub
parent a5450f11c9
commit 48cb2109b6
6 changed files with 22 additions and 10 deletions

View File

@@ -23,6 +23,7 @@ from vllm.platforms import current_platform
from vllm.utils import GiB_bytes
from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
from vllm.v1.outputs import ModelRunnerOutput
from vllm.v1.utils import report_usage_stats
from vllm.v1.worker.gpu_model_runner import GPUModelRunner
from vllm.v1.worker.worker_base import WorkerBase
@@ -141,6 +142,10 @@ class Worker(WorkerBase):
self.model_runner: GPUModelRunner = GPUModelRunner(
self.vllm_config, self.device)
if self.rank == 0:
# If usage stat is enabled, collect relevant info.
report_usage_stats(self.vllm_config)
# FIXME(youkaichao & ywang96): Use TorchDispatchMode instead of memory pool
# to hijack tensor allocation.
def load_model(self) -> None: