[ROCm][CI] v1 cpu offloading attention backend fix (#31833)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
2026-01-08 00:37:50 -06:00
parent 6b2a672e47
commit 5f2a473ff3
1 changed files with 4 additions and 2 deletions
--- a/tests/v1/kv_offload/test_cpu_offloading.py
+++ b/tests/v1/kv_offload/test_cpu_offloading.py
@@ -15,10 +15,12 @@ from vllm.distributed.kv_events import BlockStored, KVEventBatch
 from vllm.platforms import current_platform

 CPU_BLOCK_SIZES = [48]
-ATTN_BACKENDS = ["FLASH_ATTN", "TRITON_ATTN"]
+ATTN_BACKENDS = []

 if current_platform.is_cuda():
-    ATTN_BACKENDS.append("FLASHINFER")
+    ATTN_BACKENDS = ["FLASH_ATTN", "FLASHINFER", "TRITON_ATTN"]
+elif current_platform.is_rocm():
+    ATTN_BACKENDS = ["TRITON_ATTN"]


 class MockSubscriber: