Disable outlines cache by default (#14837)

This commit is contained in:
Russell Bryant
2025-03-14 23:57:55 -04:00
committed by GitHub
parent ccf02fcbae
commit 776dcec8fe
2 changed files with 16 additions and 1 deletions

View File

@@ -95,6 +95,7 @@ if TYPE_CHECKING:
VLLM_DP_MASTER_IP: str = ""
VLLM_DP_MASTER_PORT: int = 0
VLLM_MARLIN_USE_ATOMIC_ADD: bool = False
VLLM_V0_USE_OUTLINES_CACHE: bool = False
def get_default_cache_root():
@@ -623,6 +624,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Whether to use atomicAdd reduce in gptq/awq marlin kernel.
"VLLM_MARLIN_USE_ATOMIC_ADD":
lambda: os.environ.get("VLLM_MARLIN_USE_ATOMIC_ADD", "0") == "1",
# Whether to turn on the outlines cache for V0
# This cache is unbounded and on disk, so it's not safe to use in
# an environment with potentially malicious users.
"VLLM_V0_USE_OUTLINES_CACHE":
lambda: os.environ.get("VLLM_V0_USE_OUTLINES_CACHE", "0") == "1",
}
# end-env-vars-definition