Support FlashAttention Backend for Hybrid SSM Models (#23299)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Chen Zhang
2025-08-26 05:41:52 -07:00
committed by GitHub
parent ebd5a77bb5
commit 2b4fc9bd9b
2 changed files with 17 additions and 27 deletions

View File

@@ -110,9 +110,6 @@ def test_models(
if model in V1_SUPPORTED_MODELS:
with monkeypatch.context() as m:
m.setenv("VLLM_USE_V1", "1")
if model in HYBRID_MODELS:
# required due to reorder_batch behaviour
m.setenv("VLLM_ATTENTION_BACKEND", "FLASHINFER")
with vllm_runner(model,
max_num_seqs=MAX_NUM_SEQS,
enable_prefix_caching=False) as vllm_model: