Remove V0 attention backends (#25351)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-09-21 16:03:28 -07:00
parent af7dfb0d1a
commit bc6e542d9f
28 changed files with 143 additions and 7376 deletions
--- a/vllm/model_executor/models/deepseek_v2.py
+++ b/vllm/model_executor/models/deepseek_v2.py
@@ -478,7 +478,8 @@ class DeepseekV2MLAAttention(nn.Module):
    Main reference: DeepseekV2 paper, and FlashInfer Implementation
    (https://arxiv.org/abs/2405.04434 and https://github.com/flashinfer-ai/flashinfer/pull/551).
    
-    For more info see MLACommonImpl in: vllm/attention/backends/mla/utils.py
+        For more info see MLACommonImpl in:
+        vllm/v1/attention/backends/mla/utils.py
    """

    def __init__(