Support FlashAttention Backend for Hybrid SSM Models (#23299)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
2025-08-26 05:41:52 -07:00
parent ebd5a77bb5
commit 2b4fc9bd9b
2 changed files with 17 additions and 27 deletions
--- a/tests/models/language/generation/test_hybrid.py
+++ b/tests/models/language/generation/test_hybrid.py
@@ -110,9 +110,6 @@ def test_models(
    if model in V1_SUPPORTED_MODELS:
        with monkeypatch.context() as m:
            m.setenv("VLLM_USE_V1", "1")
-            if model in HYBRID_MODELS:
-                # required due to reorder_batch behaviour
-                m.setenv("VLLM_ATTENTION_BACKEND", "FLASHINFER")
            with vllm_runner(model,
                             max_num_seqs=MAX_NUM_SEQS,
                             enable_prefix_caching=False) as vllm_model: