diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index 3651c835f..f97467ad6 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -407,10 +407,10 @@ def _support_torch_compile( if envs.VLLM_USE_AOT_COMPILE: """ When using torch.compile in AOT mode, we store the cache artifacts - under VLLM_CACHE_ROOT/torch_aot_compile/{hash}/rank_i_j. The {hash} - contains all of the factors except for the source files being - traced through, because we don't actually know which source files - to check at this point (before dynamo runs). + under VLLM_CACHE_ROOT/torch_compile_cache/torch_aot_compile/{hash} + The {hash} contains all of the factors except for the source files + being traced through, because we don't actually know which source + files to check at this point (before dynamo runs). On loading we will actually look at the source files being traced through. If any source file have changed (compared with the serialized backend artifacts), then we need to generate a new AOT @@ -424,6 +424,7 @@ def _support_torch_compile( hash_key = hashlib.sha256(str(factors).encode()).hexdigest() cache_dir = os.path.join( envs.VLLM_CACHE_ROOT, + "torch_compile_cache", "torch_aot_compile", hash_key, )