diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index 7916d1e02..7e7e3ca55 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -4,7 +4,9 @@ import contextlib import copy import logging import math +import os import queue +import sys import threading import time import uuid @@ -87,6 +89,21 @@ logger = init_logger(__name__) # Lazy import nixl_wrapper to avoid loading nixl_bindings if nixl is not used try: + if "UCX_MEM_MMAP_HOOK_MODE" not in os.environ: + # avoid a memory leak in UCX when using NIXL on some models + # see: https://github.com/vllm-project/vllm/issues/24264 + if "nixl" in sys.modules or "rixl" in sys.modules: + logger.warning( + "NIXL was already imported, we can't disable UCX mmap hooks. " + "Please set UCX_MEM_MMAP_HOOK_MODE to 'none' manually." + ) + else: + logger.info( + "Setting UCX_MEM_MMAP_HOOK_MODE to 'none' to avoid a rare " + "memory leak in UCX when using NIXL." + ) + os.environ["UCX_MEM_MMAP_HOOK_MODE"] = "none" + if not current_platform.is_rocm(): from nixl._api import nixl_agent as NixlWrapper from nixl._bindings import nixlXferTelemetry