[torch.compile] add logging for compilation time (#10941)

Signed-off-by: youkaichao <youkaichao@gmail.com>
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
youkaichao
2024-12-06 02:07:15 -08:00
committed by GitHub
parent db87eb6c67
commit b031a455a9
6 changed files with 75 additions and 10 deletions

View File

@@ -473,6 +473,7 @@ class LLMEngine:
The workers will determine the number of blocks in both the GPU cache
and the swap CPU cache.
"""
start = time.time()
num_gpu_blocks, num_cpu_blocks = (
self.model_executor.determine_num_available_blocks())
@@ -488,6 +489,9 @@ class LLMEngine:
self.cache_config.num_cpu_blocks = num_cpu_blocks
self.model_executor.initialize_cache(num_gpu_blocks, num_cpu_blocks)
elapsed = time.time() - start
logger.info(("init engine (profile, create kv cache, "
"warmup model) took %.2f seconds"), elapsed)
@classmethod
def _get_executor_cls(cls,