[torch.compile] store inductor compiled Python file (#12182)
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
@@ -2862,17 +2862,8 @@ class CompilationConfig(BaseModel):
|
||||
"vllm.unified_attention_with_output",
|
||||
]
|
||||
else:
|
||||
# v0 can use full graph compilation without splitting,
|
||||
# splitting is optional.
|
||||
# right now we still need it. kv cache shape
|
||||
# will be included in the graph if we don't split
|
||||
# the graph.
|
||||
# TODO: hide kv cache in static forward context
|
||||
# so that inductor does not see it.
|
||||
self.splitting_ops = [
|
||||
"vllm.unified_attention",
|
||||
"vllm.unified_attention_with_output",
|
||||
]
|
||||
# v0 uses full graph compilation
|
||||
self.splitting_ops = []
|
||||
|
||||
for k, v in self.inductor_passes.items():
|
||||
if not isinstance(v, str):
|
||||
|
||||
Reference in New Issue
Block a user