[Core][Easy] Use envs.__getattr__ for all Unify to environment variable access (#26810)
Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
This commit is contained in:
@@ -10,12 +10,12 @@ from typing import TYPE_CHECKING, Generic, TypeAlias, TypeVar, cast
|
|||||||
import torch
|
import torch
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
|
|
||||||
|
import vllm.envs as envs
|
||||||
from vllm.distributed.device_communicators.shm_object_storage import (
|
from vllm.distributed.device_communicators.shm_object_storage import (
|
||||||
MsgpackSerde,
|
MsgpackSerde,
|
||||||
SingleWriterShmObjectStorage,
|
SingleWriterShmObjectStorage,
|
||||||
SingleWriterShmRingBuffer,
|
SingleWriterShmRingBuffer,
|
||||||
)
|
)
|
||||||
from vllm.envs import VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.utils import GiB_bytes, MiB_bytes
|
from vllm.utils import GiB_bytes, MiB_bytes
|
||||||
from vllm.utils.cache import CacheInfo, LRUCache
|
from vllm.utils.cache import CacheInfo, LRUCache
|
||||||
@@ -436,7 +436,7 @@ class ShmObjectStoreSenderCache(BaseMultiModalProcessorCache):
|
|||||||
|
|
||||||
ring_buffer = SingleWriterShmRingBuffer(
|
ring_buffer = SingleWriterShmRingBuffer(
|
||||||
data_buffer_size=int(mm_config.mm_processor_cache_gb * GiB_bytes),
|
data_buffer_size=int(mm_config.mm_processor_cache_gb * GiB_bytes),
|
||||||
name=VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME,
|
name=envs.VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME,
|
||||||
create=True, # sender is the writer
|
create=True, # sender is the writer
|
||||||
)
|
)
|
||||||
self._shm_cache = SingleWriterShmObjectStorage(
|
self._shm_cache = SingleWriterShmObjectStorage(
|
||||||
@@ -678,7 +678,7 @@ class ShmObjectStoreReceiverCache(BaseMultiModalReceiverCache):
|
|||||||
|
|
||||||
ring_buffer = SingleWriterShmRingBuffer(
|
ring_buffer = SingleWriterShmRingBuffer(
|
||||||
data_buffer_size=int(mm_config.mm_processor_cache_gb * GiB_bytes),
|
data_buffer_size=int(mm_config.mm_processor_cache_gb * GiB_bytes),
|
||||||
name=VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME,
|
name=envs.VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME,
|
||||||
create=False, # Server is a reader
|
create=False, # Server is a reader
|
||||||
)
|
)
|
||||||
self._shm_cache = SingleWriterShmObjectStorage(
|
self._shm_cache = SingleWriterShmObjectStorage(
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from os import PathLike
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from vllm.envs import VLLM_MODEL_REDIRECT_PATH
|
import vllm.envs as envs
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
@@ -86,7 +86,7 @@ def maybe_model_redirect(model: str) -> str:
|
|||||||
:return: maybe redirect to a local folder
|
:return: maybe redirect to a local folder
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model_redirect_path = VLLM_MODEL_REDIRECT_PATH
|
model_redirect_path = envs.VLLM_MODEL_REDIRECT_PATH
|
||||||
|
|
||||||
if not model_redirect_path:
|
if not model_redirect_path:
|
||||||
return model
|
return model
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from collections import Counter
|
|||||||
from contextlib import suppress
|
from contextlib import suppress
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from vllm.envs import VLLM_GC_DEBUG
|
import vllm.envs as envs
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
@@ -36,7 +36,7 @@ class GCDebugConfig:
|
|||||||
self.top_objects = json_conf.get("top_objects", -1)
|
self.top_objects = json_conf.get("top_objects", -1)
|
||||||
except Exception:
|
except Exception:
|
||||||
self.enabled = False
|
self.enabled = False
|
||||||
logger.error("Failed to parse VLLM_GC_DEBUG(%s)", VLLM_GC_DEBUG)
|
logger.error("Failed to parse VLLM_GC_DEBUG(%s)", envs.VLLM_GC_DEBUG)
|
||||||
logger.info("GC Debug Config. %s", str(self))
|
logger.info("GC Debug Config. %s", str(self))
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
@@ -93,7 +93,7 @@ def maybe_attach_gc_debug_callback() -> None:
|
|||||||
"""
|
"""
|
||||||
Attached a callback for GC debug when VLLM_GC_DEBUG is enabled.
|
Attached a callback for GC debug when VLLM_GC_DEBUG is enabled.
|
||||||
"""
|
"""
|
||||||
config = GCDebugConfig(VLLM_GC_DEBUG)
|
config = GCDebugConfig(envs.VLLM_GC_DEBUG)
|
||||||
if config.enabled:
|
if config.enabled:
|
||||||
debugger: GCDebugger = GCDebugger(config)
|
debugger: GCDebugger = GCDebugger(config)
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ from vllm.config import VllmConfig
|
|||||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||||
from vllm.engine.protocol import EngineClient
|
from vllm.engine.protocol import EngineClient
|
||||||
from vllm.entrypoints.utils import _validate_truncation_size
|
from vllm.entrypoints.utils import _validate_truncation_size
|
||||||
from vllm.envs import VLLM_V1_OUTPUT_PROC_CHUNK_SIZE
|
|
||||||
from vllm.inputs import PromptType
|
from vllm.inputs import PromptType
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
@@ -483,12 +482,12 @@ class AsyncLLM(EngineClient):
|
|||||||
# Split outputs into chunks of at most
|
# Split outputs into chunks of at most
|
||||||
# VLLM_V1_OUTPUT_PROC_CHUNK_SIZE, so that we don't block the
|
# VLLM_V1_OUTPUT_PROC_CHUNK_SIZE, so that we don't block the
|
||||||
# event loop for too long.
|
# event loop for too long.
|
||||||
if num_outputs <= VLLM_V1_OUTPUT_PROC_CHUNK_SIZE:
|
if num_outputs <= envs.VLLM_V1_OUTPUT_PROC_CHUNK_SIZE:
|
||||||
slices = (outputs.outputs,)
|
slices = (outputs.outputs,)
|
||||||
else:
|
else:
|
||||||
slices = np.array_split(
|
slices = np.array_split(
|
||||||
outputs.outputs,
|
outputs.outputs,
|
||||||
cdiv(num_outputs, VLLM_V1_OUTPUT_PROC_CHUNK_SIZE),
|
cdiv(num_outputs, envs.VLLM_V1_OUTPUT_PROC_CHUNK_SIZE),
|
||||||
)
|
)
|
||||||
|
|
||||||
for i, outputs_slice in enumerate(slices):
|
for i, outputs_slice in enumerate(slices):
|
||||||
|
|||||||
Reference in New Issue
Block a user