diff --git a/vllm/v1/attention/backends/mla/aiter_triton_mla.py b/vllm/v1/attention/backends/mla/aiter_triton_mla.py index 6082a5e86..b164bb7b2 100644 --- a/vllm/v1/attention/backends/mla/aiter_triton_mla.py +++ b/vllm/v1/attention/backends/mla/aiter_triton_mla.py @@ -54,7 +54,7 @@ class AiterTritonMLAImpl(AiterMLAImpl): k, v, softmax_scale=softmax_scale, - return_softmax_lse=return_softmax_lse, + return_lse=return_softmax_lse, **kwargs, ) # Transpose the LSE if Triton MHA is used: diff --git a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py index 71be5b171..f79d58ca1 100644 --- a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py +++ b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py @@ -236,7 +236,7 @@ class AiterMLAImpl(MLACommonImpl[AiterMLAMetadata]): k=k, v=v, softmax_scale=softmax_scale, - return_softmax_lse=return_softmax_lse, + return_lse=return_softmax_lse, **kwargs, )