Triton Attention: Support cross-layers blocks (#30687)

Signed-off-by: Or Ozeri <oro@il.ibm.com>
2026-01-05 21:29:16 +02:00
parent 21156ff199
commit d8e38d4939
2 changed files with 14 additions and 3 deletions
--- a/tests/v1/kv_offload/test_cpu_offloading.py
+++ b/tests/v1/kv_offload/test_cpu_offloading.py
@@ -15,12 +15,10 @@ from vllm.distributed.kv_events import BlockStored, KVEventBatch
 from vllm.platforms import current_platform

 CPU_BLOCK_SIZES = [48]
-ATTN_BACKENDS = ["FLASH_ATTN"]
+ATTN_BACKENDS = ["FLASH_ATTN", "TRITON_ATTN"]

 if current_platform.is_cuda():
    ATTN_BACKENDS.append("FLASHINFER")
-elif current_platform.is_rocm():
-    ATTN_BACKENDS = ["TRITON_ATTN"]


 class MockSubscriber: