Fixes IMA for TP w/ flex-attention (#19712)

Signed-off-by: drisspg <drisspguessous@gmail.com>
This commit is contained in:
Driss Guessous
2025-06-16 21:01:50 -07:00
committed by GitHub
parent 5b3ad5ecf2
commit ddfed314f9
2 changed files with 2 additions and 10 deletions

View File

@@ -51,7 +51,6 @@ def test_flex_attention_vs_default_backend(monkeypatch):
with monkeypatch.context() as m:
m.setenv("VLLM_USE_V1", "1")
m.setenv("VLLM_ATTENTION_BACKEND", "FLEX_ATTENTION")
m.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
set_seed(seed)
@@ -66,7 +65,6 @@ def test_flex_attention_vs_default_backend(monkeypatch):
# Run with default backend
with monkeypatch.context() as m:
m.setenv("VLLM_USE_V1", "1")
m.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
set_seed(seed)
llm_default = LLM(
model_name,