[Misc] Remove redundant attention var constants (#29650)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-11-28 20:35:19 +08:00
committed by GitHub
parent 5c2b5cb422
commit 33b06a6f24
7 changed files with 19 additions and 63 deletions

View File

@@ -11,7 +11,6 @@ import pytest
from tests.quantization.utils import is_quant_method_supported
from vllm.attention.utils.fa_utils import flash_attn_supports_fp8
from vllm.platforms import current_platform
from vllm.utils import STR_BACKEND_ENV_VAR
from ..utils import check_logprobs_close
@@ -76,7 +75,7 @@ def test_models(
with monkeypatch.context() as m:
m.setenv("TOKENIZERS_PARALLELISM", "true")
m.setenv(STR_BACKEND_ENV_VAR, backend)
m.setenv("VLLM_ATTENTION_BACKEND", backend)
MAX_MODEL_LEN = 1024
NUM_LOG_PROBS = 8