[torch.compile] store inductor compiled Python file (#12182)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao
2025-01-19 16:27:26 +08:00
committed by GitHub
parent 630eb5b5ce
commit e66faf4809
2 changed files with 60 additions and 33 deletions

View File

@@ -2862,17 +2862,8 @@ class CompilationConfig(BaseModel):
"vllm.unified_attention_with_output",
]
else:
# v0 can use full graph compilation without splitting,
# splitting is optional.
# right now we still need it. kv cache shape
# will be included in the graph if we don't split
# the graph.
# TODO: hide kv cache in static forward context
# so that inductor does not see it.
self.splitting_ops = [
"vllm.unified_attention",
"vllm.unified_attention_with_output",
]
# v0 uses full graph compilation
self.splitting_ops = []
for k, v in self.inductor_passes.items():
if not isinstance(v, str):