[Docs] Fix warnings in mkdocs build (#23649)

Signed-off-by: Zerohertz <ohg3417@gmail.com> Signed-off-by: Hyogeun Oh (오효근) <ohg3417@gmail.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-08-27 03:19:23 +09:00
parent 9b0187003e
commit 730d0ac8b9
14 changed files with 66 additions and 58 deletions
--- a/vllm/model_executor/layers/lightning_attn.py
+++ b/vllm/model_executor/layers/lightning_attn.py
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from typing import Optional
+
 import torch
 from einops import rearrange

@@ -453,7 +455,14 @@ class _attention(torch.autograd.Function):
 lightning_attention_ = _attention.apply


-def lightning_attention(q, k, v, ed, block_size=256, kv_history=None):
+def lightning_attention(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    v: torch.Tensor,
+    ed: torch.Tensor,
+    block_size: int = 256,
+    kv_history: Optional[torch.Tensor] = None
+) -> tuple[torch.Tensor, torch.Tensor]:
    """
    Apply lightning attention algorithm 
    to compute attention efficiently.