[torch.compile] add logging for compilation time (#10941)
Signed-off-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
@@ -67,6 +67,7 @@ class EngineCore:
|
||||
|
||||
def _initialize_kv_caches(self,
|
||||
cache_config: CacheConfig) -> Tuple[int, int]:
|
||||
start = time.time()
|
||||
num_gpu_blocks, _ = self.model_executor.determine_num_available_blocks(
|
||||
)
|
||||
|
||||
@@ -80,6 +81,9 @@ class EngineCore:
|
||||
|
||||
num_cpu_blocks = 0
|
||||
self.model_executor.initialize_cache(num_gpu_blocks)
|
||||
elapsed = time.time() - start
|
||||
logger.info(("init engine (profile, create kv cache, "
|
||||
"warmup model) took %.2f seconds"), elapsed)
|
||||
return num_gpu_blocks, num_cpu_blocks
|
||||
|
||||
def add_request(self, request: EngineCoreRequest):
|
||||
|
||||
Reference in New Issue
Block a user