[Misc] centralize all usage of environment variables (#4548)

This commit is contained in:
youkaichao
2024-05-02 11:13:25 -07:00
committed by GitHub
parent 1ff0c73a79
commit 5b8a7c1cb0
18 changed files with 220 additions and 64 deletions

View File

@@ -1,18 +1,16 @@
import enum
import os
from functools import lru_cache
from typing import Type
import torch
import vllm.envs as envs
from vllm.attention.backends.abstract import AttentionBackend
from vllm.logger import init_logger
from vllm.utils import is_cpu, is_hip
logger = init_logger(__name__)
VLLM_ATTENTION_BACKEND = "VLLM_ATTENTION_BACKEND"
class _Backend(enum.Enum):
FLASH_ATTN = enum.auto()
@@ -79,7 +77,7 @@ def _which_attn_to_use(dtype: torch.dtype) -> _Backend:
"package is not found. Please install it for better performance.")
return _Backend.XFORMERS
backend_by_env_var = os.getenv(VLLM_ATTENTION_BACKEND)
backend_by_env_var = envs.VLLM_ATTENTION_BACKEND
if backend_by_env_var is not None:
return _Backend[backend_by_env_var]