[torch.compile] add logging for compilation time (#10941)
Signed-off-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
@@ -473,6 +473,7 @@ class LLMEngine:
|
||||
The workers will determine the number of blocks in both the GPU cache
|
||||
and the swap CPU cache.
|
||||
"""
|
||||
start = time.time()
|
||||
num_gpu_blocks, num_cpu_blocks = (
|
||||
self.model_executor.determine_num_available_blocks())
|
||||
|
||||
@@ -488,6 +489,9 @@ class LLMEngine:
|
||||
self.cache_config.num_cpu_blocks = num_cpu_blocks
|
||||
|
||||
self.model_executor.initialize_cache(num_gpu_blocks, num_cpu_blocks)
|
||||
elapsed = time.time() - start
|
||||
logger.info(("init engine (profile, create kv cache, "
|
||||
"warmup model) took %.2f seconds"), elapsed)
|
||||
|
||||
@classmethod
|
||||
def _get_executor_cls(cls,
|
||||
|
||||
Reference in New Issue
Block a user