From e4a5d8c653fc00adb06922bddcb7fec14b01a62b Mon Sep 17 00:00:00 2001 From: Zhengxu Chen Date: Fri, 20 Feb 2026 11:46:45 -0500 Subject: [PATCH] [compile] Move torch_aot_compile directory under torch_compile_cache (#34831) Signed-off-by: zhxchen17 --- vllm/compilation/decorators.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index 3651c835f..f97467ad6 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -407,10 +407,10 @@ def _support_torch_compile( if envs.VLLM_USE_AOT_COMPILE: """ When using torch.compile in AOT mode, we store the cache artifacts - under VLLM_CACHE_ROOT/torch_aot_compile/{hash}/rank_i_j. The {hash} - contains all of the factors except for the source files being - traced through, because we don't actually know which source files - to check at this point (before dynamo runs). + under VLLM_CACHE_ROOT/torch_compile_cache/torch_aot_compile/{hash} + The {hash} contains all of the factors except for the source files + being traced through, because we don't actually know which source + files to check at this point (before dynamo runs). On loading we will actually look at the source files being traced through. If any source file have changed (compared with the serialized backend artifacts), then we need to generate a new AOT @@ -424,6 +424,7 @@ def _support_torch_compile( hash_key = hashlib.sha256(str(factors).encode()).hexdigest() cache_dir = os.path.join( envs.VLLM_CACHE_ROOT, + "torch_compile_cache", "torch_aot_compile", hash_key, )