Fixes IMA for TP w/ flex-attention (#19712)

Signed-off-by: drisspg <drisspguessous@gmail.com>
2025-06-16 21:01:50 -07:00
parent 5b3ad5ecf2
commit ddfed314f9
2 changed files with 2 additions and 10 deletions
--- a/tests/kernels/test_flex_attention.py
+++ b/tests/kernels/test_flex_attention.py
@@ -51,7 +51,6 @@ def test_flex_attention_vs_default_backend(monkeypatch):
    with monkeypatch.context() as m:
        m.setenv("VLLM_USE_V1", "1")
        m.setenv("VLLM_ATTENTION_BACKEND", "FLEX_ATTENTION")
-        m.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")

        set_seed(seed)

@@ -66,7 +65,6 @@ def test_flex_attention_vs_default_backend(monkeypatch):
    # Run with default backend
    with monkeypatch.context() as m:
        m.setenv("VLLM_USE_V1", "1")
-        m.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
        set_seed(seed)
        llm_default = LLM(
            model_name,