[Attention] Use sparse prefill kernel for fp8 kv-cache in DeepSeek-v3.2 (#27532)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
2025-12-12 08:57:47 -05:00
parent 91401c7a26
commit 3e41992fec
30 changed files with 1372 additions and 256 deletions
--- a/tests/kernels/moe/test_flashinfer_moe.py
+++ b/tests/kernels/moe/test_flashinfer_moe.py
@@ -51,7 +51,14 @@ MNK_FACTORS = [
@pytest.mark.parametrize("activation", ["silu_and_mul", "relu2"])
@torch.inference_mode()
 def test_flashinfer_fp4_moe_no_graph(
-    m: int, n: int, k: int, e: int, topk: int, dtype: torch.dtype, activation: str
+    m: int,
+    n: int,
+    k: int,
+    e: int,
+    topk: int,
+    dtype: torch.dtype,
+    activation: str,
+    workspace_init,
 ):
    current_platform.seed_everything(7)
    with set_current_vllm_config(