fix amd triton mla path (#17871)
This commit is contained in:
@@ -1063,7 +1063,7 @@ class MLACommonImpl(MLAAttentionImpl[T], Generic[T]):
|
|||||||
softmax_scale,
|
softmax_scale,
|
||||||
None, # bias
|
None, # bias
|
||||||
)
|
)
|
||||||
if is_vllm_fa:
|
elif is_vllm_fa:
|
||||||
attn_out = self.flash_attn_varlen_func(
|
attn_out = self.flash_attn_varlen_func(
|
||||||
q=q,
|
q=q,
|
||||||
k=k,
|
k=k,
|
||||||
|
|||||||
Reference in New Issue
Block a user