[Kernels] Add Flash Linear Attention Kernels (#24518)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2025-09-10 00:04:41 +08:00
parent 1c63a16b65
commit 1aa427fdc1
17 changed files with 2671 additions and 2 deletions
--- a/vllm/triton_utils/init.py
+++ b/vllm/triton_utils/init.py
@@ -7,8 +7,10 @@ from vllm.triton_utils.importing import (HAS_TRITON, TritonLanguagePlaceholder,
 if HAS_TRITON:
    import triton
    import triton.language as tl
+    import triton.language.extra.libdevice as tldevice
 else:
    triton = TritonPlaceholder()
    tl = TritonLanguagePlaceholder()
+    tldevice = TritonLanguagePlaceholder()

-__all__ = ["HAS_TRITON", "triton", "tl"]
+__all__ = ["HAS_TRITON", "triton", "tl", "tldevice"]