[misc] use out argument for flash attention (#10822)
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
@@ -2238,7 +2238,7 @@ class CompilationConfig(BaseModel):
|
||||
custom_ops: List[str] = Field(default_factory=list)
|
||||
splitting_ops: List[str] = Field(default_factory=lambda: [
|
||||
"vllm.unified_attention",
|
||||
"vllm.unified_v1_flash_attention",
|
||||
"vllm.unified_attention_with_output",
|
||||
])
|
||||
|
||||
use_inductor: bool = True
|
||||
|
||||
Reference in New Issue
Block a user