[CI/Build] vLLM cache directory for images (#6444)

2024-07-16 14:12:25 +08:00
parent 37d776606f
commit d97011512e
13 changed files with 123 additions and 140 deletions
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -17,7 +17,8 @@ if TYPE_CHECKING:
    S3_ACCESS_KEY_ID: Optional[str] = None
    S3_SECRET_ACCESS_KEY: Optional[str] = None
    S3_ENDPOINT_URL: Optional[str] = None
-    VLLM_CONFIG_ROOT: str = ""
+    VLLM_CACHE_ROOT: str = os.path.expanduser("~/.cache/vllm")
+    VLLM_CONFIG_ROOT: str = os.path.expanduser("~/.config/vllm")
    VLLM_USAGE_STATS_SERVER: str = "https://stats.vllm.ai"
    VLLM_NO_USAGE_STATS: bool = False
    VLLM_DO_NOT_TRACK: bool = False
@@ -31,10 +32,11 @@ if TYPE_CHECKING:
    VLLM_OPENVINO_KVCACHE_SPACE: int = 0
    VLLM_OPENVINO_CPU_KV_CACHE_PRECISION: Optional[str] = None
    VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS: bool = False
-    VLLM_XLA_CACHE_PATH: str = "~/.vllm/xla_cache/"
+    VLLM_XLA_CACHE_PATH: str = os.path.join(VLLM_CACHE_ROOT, "xla_cache")
    VLLM_FUSED_MOE_CHUNK_SIZE: int = 64 * 1024
    VLLM_USE_RAY_COMPILED_DAG: bool = False
    VLLM_WORKER_MULTIPROC_METHOD: str = "fork"
+    VLLM_ASSETS_CACHE: str = os.path.join(VLLM_CACHE_ROOT, "assets")
    VLLM_IMAGE_FETCH_TIMEOUT: int = 5
    VLLM_TARGET_DEVICE: str = "cuda"
    MAX_JOBS: Optional[str] = None
@@ -45,6 +47,21 @@ if TYPE_CHECKING:
    CMAKE_BUILD_TYPE: Optional[str] = None
    VERBOSE: bool = False

+
+def get_default_cache_root():
+    return os.getenv(
+        "XDG_CACHE_HOME",
+        os.path.join(os.path.expanduser("~"), ".cache"),
+    )
+
+
+def get_default_config_root():
+    return os.getenv(
+        "XDG_CONFIG_HOME",
+        os.path.join(os.path.expanduser("~"), ".config"),
+    )
+
+
 # The begin-* and end* here are used by the documentation generator
 # to extract the used env vars.

@@ -89,15 +106,28 @@ environment_variables: Dict[str, Callable[[], Any]] = {
    lambda: bool(int(os.getenv('VERBOSE', '0'))),

    # Root directory for VLLM configuration files
+    # Defaults to `~/.config/vllm` unless `XDG_CONFIG_HOME` is set
    # Note that this not only affects how vllm finds its configuration files
    # during runtime, but also affects how vllm installs its configuration
    # files during **installation**.
    "VLLM_CONFIG_ROOT":
-    lambda: os.environ.get("VLLM_CONFIG_ROOT", None) or os.getenv(
-        "XDG_CONFIG_HOME", None) or os.path.expanduser("~/.config"),
+    lambda: os.path.expanduser(
+        os.getenv(
+            "VLLM_CONFIG_ROOT",
+            os.path.join(get_default_config_root(), "vllm"),
+        )),

    # ================== Runtime Env Vars ==================

+    # Root directory for VLLM cache files
+    # Defaults to `~/.cache/vllm` unless `XDG_CACHE_HOME` is set
+    "VLLM_CACHE_ROOT":
+    lambda: os.path.expanduser(
+        os.getenv(
+            "VLLM_CACHE_ROOT",
+            os.path.join(get_default_cache_root(), "vllm"),
+        )),
+
    # used in distributed environment to determine the master address
    'VLLM_HOST_IP':
    lambda: os.getenv('VLLM_HOST_IP', "") or os.getenv("HOST_IP", ""),
@@ -242,6 +272,14 @@ environment_variables: Dict[str, Callable[[], Any]] = {
    "VLLM_WORKER_MULTIPROC_METHOD":
    lambda: os.getenv("VLLM_WORKER_MULTIPROC_METHOD", "fork"),

+    # Path to the cache for storing downloaded assets
+    "VLLM_ASSETS_CACHE":
+    lambda: os.path.expanduser(
+        os.getenv(
+            "VLLM_ASSETS_CACHE",
+            os.path.join(get_default_cache_root(), "vllm", "assets"),
+        )),
+
    # Timeout for fetching images when serving multimodal models
    # Default is 5 seconds
    "VLLM_IMAGE_FETCH_TIMEOUT":
@@ -250,7 +288,11 @@ environment_variables: Dict[str, Callable[[], Any]] = {
    # Path to the XLA persistent cache directory.
    # Only used for XLA devices such as TPUs.
    "VLLM_XLA_CACHE_PATH":
-    lambda: os.getenv("VLLM_XLA_CACHE_PATH", "~/.vllm/xla_cache/"),
+    lambda: os.path.expanduser(
+        os.getenv(
+            "VLLM_ASSETS_CACHE",
+            os.path.join(get_default_cache_root(), "vllm", "xla_cache"),
+        )),
    "VLLM_FUSED_MOE_CHUNK_SIZE":
    lambda: int(os.getenv("VLLM_FUSED_MOE_CHUNK_SIZE", "65536")),

@@ -262,7 +304,7 @@ environment_variables: Dict[str, Callable[[], Any]] = {
 # end-env-vars-definition


-def __getattr__(name):
+def __getattr__(name: str):
    # lazy evaluation of environment variables
    if name in environment_variables:
        return environment_variables[name]()