[CI/Build] vLLM cache directory for images (#6444)

This commit is contained in:
Cyrus Leung
2024-07-16 14:12:25 +08:00
committed by GitHub
parent 37d776606f
commit d97011512e
13 changed files with 123 additions and 140 deletions

View File

@@ -17,7 +17,8 @@ if TYPE_CHECKING:
S3_ACCESS_KEY_ID: Optional[str] = None
S3_SECRET_ACCESS_KEY: Optional[str] = None
S3_ENDPOINT_URL: Optional[str] = None
VLLM_CONFIG_ROOT: str = ""
VLLM_CACHE_ROOT: str = os.path.expanduser("~/.cache/vllm")
VLLM_CONFIG_ROOT: str = os.path.expanduser("~/.config/vllm")
VLLM_USAGE_STATS_SERVER: str = "https://stats.vllm.ai"
VLLM_NO_USAGE_STATS: bool = False
VLLM_DO_NOT_TRACK: bool = False
@@ -31,10 +32,11 @@ if TYPE_CHECKING:
VLLM_OPENVINO_KVCACHE_SPACE: int = 0
VLLM_OPENVINO_CPU_KV_CACHE_PRECISION: Optional[str] = None
VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS: bool = False
VLLM_XLA_CACHE_PATH: str = "~/.vllm/xla_cache/"
VLLM_XLA_CACHE_PATH: str = os.path.join(VLLM_CACHE_ROOT, "xla_cache")
VLLM_FUSED_MOE_CHUNK_SIZE: int = 64 * 1024
VLLM_USE_RAY_COMPILED_DAG: bool = False
VLLM_WORKER_MULTIPROC_METHOD: str = "fork"
VLLM_ASSETS_CACHE: str = os.path.join(VLLM_CACHE_ROOT, "assets")
VLLM_IMAGE_FETCH_TIMEOUT: int = 5
VLLM_TARGET_DEVICE: str = "cuda"
MAX_JOBS: Optional[str] = None
@@ -45,6 +47,21 @@ if TYPE_CHECKING:
CMAKE_BUILD_TYPE: Optional[str] = None
VERBOSE: bool = False
def get_default_cache_root():
return os.getenv(
"XDG_CACHE_HOME",
os.path.join(os.path.expanduser("~"), ".cache"),
)
def get_default_config_root():
return os.getenv(
"XDG_CONFIG_HOME",
os.path.join(os.path.expanduser("~"), ".config"),
)
# The begin-* and end* here are used by the documentation generator
# to extract the used env vars.
@@ -89,15 +106,28 @@ environment_variables: Dict[str, Callable[[], Any]] = {
lambda: bool(int(os.getenv('VERBOSE', '0'))),
# Root directory for VLLM configuration files
# Defaults to `~/.config/vllm` unless `XDG_CONFIG_HOME` is set
# Note that this not only affects how vllm finds its configuration files
# during runtime, but also affects how vllm installs its configuration
# files during **installation**.
"VLLM_CONFIG_ROOT":
lambda: os.environ.get("VLLM_CONFIG_ROOT", None) or os.getenv(
"XDG_CONFIG_HOME", None) or os.path.expanduser("~/.config"),
lambda: os.path.expanduser(
os.getenv(
"VLLM_CONFIG_ROOT",
os.path.join(get_default_config_root(), "vllm"),
)),
# ================== Runtime Env Vars ==================
# Root directory for VLLM cache files
# Defaults to `~/.cache/vllm` unless `XDG_CACHE_HOME` is set
"VLLM_CACHE_ROOT":
lambda: os.path.expanduser(
os.getenv(
"VLLM_CACHE_ROOT",
os.path.join(get_default_cache_root(), "vllm"),
)),
# used in distributed environment to determine the master address
'VLLM_HOST_IP':
lambda: os.getenv('VLLM_HOST_IP', "") or os.getenv("HOST_IP", ""),
@@ -242,6 +272,14 @@ environment_variables: Dict[str, Callable[[], Any]] = {
"VLLM_WORKER_MULTIPROC_METHOD":
lambda: os.getenv("VLLM_WORKER_MULTIPROC_METHOD", "fork"),
# Path to the cache for storing downloaded assets
"VLLM_ASSETS_CACHE":
lambda: os.path.expanduser(
os.getenv(
"VLLM_ASSETS_CACHE",
os.path.join(get_default_cache_root(), "vllm", "assets"),
)),
# Timeout for fetching images when serving multimodal models
# Default is 5 seconds
"VLLM_IMAGE_FETCH_TIMEOUT":
@@ -250,7 +288,11 @@ environment_variables: Dict[str, Callable[[], Any]] = {
# Path to the XLA persistent cache directory.
# Only used for XLA devices such as TPUs.
"VLLM_XLA_CACHE_PATH":
lambda: os.getenv("VLLM_XLA_CACHE_PATH", "~/.vllm/xla_cache/"),
lambda: os.path.expanduser(
os.getenv(
"VLLM_ASSETS_CACHE",
os.path.join(get_default_cache_root(), "vllm", "xla_cache"),
)),
"VLLM_FUSED_MOE_CHUNK_SIZE":
lambda: int(os.getenv("VLLM_FUSED_MOE_CHUNK_SIZE", "65536")),
@@ -262,7 +304,7 @@ environment_variables: Dict[str, Callable[[], Any]] = {
# end-env-vars-definition
def __getattr__(name):
def __getattr__(name: str):
# lazy evaluation of environment variables
if name in environment_variables:
return environment_variables[name]()