Remove V0 attention backends (#25351)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon
2025-09-21 16:03:28 -07:00
committed by GitHub
parent af7dfb0d1a
commit bc6e542d9f
28 changed files with 143 additions and 7376 deletions

View File

@@ -478,7 +478,8 @@ class DeepseekV2MLAAttention(nn.Module):
Main reference: DeepseekV2 paper, and FlashInfer Implementation
(https://arxiv.org/abs/2405.04434 and https://github.com/flashinfer-ai/flashinfer/pull/551).
For more info see MLACommonImpl in: vllm/attention/backends/mla/utils.py
For more info see MLACommonImpl in:
vllm/v1/attention/backends/mla/utils.py
"""
def __init__(