[V1] Consolidate MM cache size to vllm.envs (#13239)
This commit is contained in:
11
vllm/envs.py
11
vllm/envs.py
@@ -55,6 +55,7 @@ if TYPE_CHECKING:
|
||||
VLLM_IMAGE_FETCH_TIMEOUT: int = 5
|
||||
VLLM_VIDEO_FETCH_TIMEOUT: int = 30
|
||||
VLLM_AUDIO_FETCH_TIMEOUT: int = 10
|
||||
VLLM_MM_INPUT_CACHE_SIZE: int = 256
|
||||
VLLM_TARGET_DEVICE: str = "cuda"
|
||||
MAX_JOBS: Optional[str] = None
|
||||
NVCC_THREADS: Optional[str] = None
|
||||
@@ -401,15 +402,21 @@ environment_variables: Dict[str, Callable[[], Any]] = {
|
||||
lambda: int(os.getenv("VLLM_IMAGE_FETCH_TIMEOUT", "5")),
|
||||
|
||||
# Timeout for fetching videos when serving multimodal models
|
||||
# Default is 15 seconds
|
||||
# Default is 30 seconds
|
||||
"VLLM_VIDEO_FETCH_TIMEOUT":
|
||||
lambda: int(os.getenv("VLLM_VIDEO_FETCH_TIMEOUT", "15")),
|
||||
lambda: int(os.getenv("VLLM_VIDEO_FETCH_TIMEOUT", "30")),
|
||||
|
||||
# Timeout for fetching audio when serving multimodal models
|
||||
# Default is 10 seconds
|
||||
"VLLM_AUDIO_FETCH_TIMEOUT":
|
||||
lambda: int(os.getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10")),
|
||||
|
||||
# Cache size for multimodal feature/input cache for multimodal models
|
||||
# in unit of number of multimodal data items (e.g. image, video, audio).
|
||||
# Default is 256 multimodal data items.
|
||||
"VLLM_MM_INPUT_CACHE_SIZE":
|
||||
lambda: int(os.getenv("VLLM_MM_INPUT_CACHE_SIZE", "256")),
|
||||
|
||||
# Path to the XLA persistent cache directory.
|
||||
# Only used for XLA devices such as TPUs.
|
||||
"VLLM_XLA_CACHE_PATH":
|
||||
|
||||
Reference in New Issue
Block a user