[misc] use out argument for flash attention (#10822)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao
2024-12-02 02:50:10 -08:00
committed by GitHub
parent e95f275f57
commit a4c4daf364
13 changed files with 141 additions and 154 deletions

View File

@@ -2238,7 +2238,7 @@ class CompilationConfig(BaseModel):
custom_ops: List[str] = Field(default_factory=list)
splitting_ops: List[str] = Field(default_factory=lambda: [
"vllm.unified_attention",
"vllm.unified_v1_flash_attention",
"vllm.unified_attention_with_output",
])
use_inductor: bool = True