[torch.compile] add logging for compilation time (#10941)

Signed-off-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2024-12-06 02:07:15 -08:00
parent db87eb6c67
commit b031a455a9
6 changed files with 75 additions and 10 deletions
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -473,6 +473,7 @@ class LLMEngine:
        The workers will determine the number of blocks in both the GPU cache
        and the swap CPU cache.
        """
+        start = time.time()
        num_gpu_blocks, num_cpu_blocks = (
            self.model_executor.determine_num_available_blocks())

@@ -488,6 +489,9 @@ class LLMEngine:
        self.cache_config.num_cpu_blocks = num_cpu_blocks

        self.model_executor.initialize_cache(num_gpu_blocks, num_cpu_blocks)
+        elapsed = time.time() - start
+        logger.info(("init engine (profile, create kv cache, "
+                     "warmup model) took %.2f seconds"), elapsed)

    @classmethod
    def _get_executor_cls(cls,