Fixes IMA for TP w/ flex-attention (#19712)
Signed-off-by: drisspg <drisspguessous@gmail.com>
This commit is contained in:
@@ -51,7 +51,6 @@ def test_flex_attention_vs_default_backend(monkeypatch):
|
||||
with monkeypatch.context() as m:
|
||||
m.setenv("VLLM_USE_V1", "1")
|
||||
m.setenv("VLLM_ATTENTION_BACKEND", "FLEX_ATTENTION")
|
||||
m.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
|
||||
|
||||
set_seed(seed)
|
||||
|
||||
@@ -66,7 +65,6 @@ def test_flex_attention_vs_default_backend(monkeypatch):
|
||||
# Run with default backend
|
||||
with monkeypatch.context() as m:
|
||||
m.setenv("VLLM_USE_V1", "1")
|
||||
m.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
|
||||
set_seed(seed)
|
||||
llm_default = LLM(
|
||||
model_name,
|
||||
|
||||
Reference in New Issue
Block a user