[2/N] Elastic EP Milestone 2: Integrating NIXL-EP (#35627)
Signed-off-by: Itay Alroy <ialroy@nvidia.com> Co-authored-by: Yongji Wu <wuyongji317@gmail.com> Co-authored-by: Ron Tourgeman <rtourgeman@nvidia.com>
This commit is contained in:
@@ -244,6 +244,7 @@ if TYPE_CHECKING:
|
||||
VLLM_ELASTIC_EP_SCALE_UP_LAUNCH: bool = False
|
||||
VLLM_ELASTIC_EP_DRAIN_REQUESTS: bool = False
|
||||
VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS: bool = False
|
||||
VLLM_NIXL_EP_MAX_NUM_RANKS: int = 32
|
||||
|
||||
|
||||
def get_default_cache_root():
|
||||
@@ -1628,6 +1629,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
"VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS": lambda: bool(
|
||||
int(os.getenv("VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS", "0"))
|
||||
),
|
||||
# NIXL EP environment variables
|
||||
"VLLM_NIXL_EP_MAX_NUM_RANKS": lambda: int(
|
||||
os.getenv("VLLM_NIXL_EP_MAX_NUM_RANKS", "32")
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user