[CI/Build Don't add FLASHINFER backend in test_cpu_offloading.py (#29229)

Signed-off-by: Randall Smith <ransmith@amd.com>
Co-authored-by: Randall Smith <ransmith@amd.com>
This commit is contained in:
rasmith
2025-11-22 05:00:54 -06:00
committed by GitHub
parent a4fdf2405c
commit 8e22da1d7f

View File

@@ -12,10 +12,14 @@ from tqdm import tqdm
from vllm import LLM, SamplingParams, TokensPrompt
from vllm.config import KVEventsConfig, KVTransferConfig
from vllm.distributed.kv_events import BlockStored, KVEventBatch
from vllm.platforms import current_platform
from vllm.utils.system_utils import set_env_var
CPU_BLOCK_SIZES = [48]
ATTN_BACKENDS = ["FLASH_ATTN", "FLASHINFER"]
ATTN_BACKENDS = ["FLASH_ATTN"]
if current_platform.is_cuda():
ATTN_BACKENDS.append("FLASHINFER")
class MockSubscriber: