[CI][amd] Revert NIXL connector change to avoid crash (#32570)

Signed-off-by: Qiang Li <qiang.li2@amd.com>
Signed-off-by: Matthew Wong <Matthew.Wong2@amd.com>
This commit is contained in:
qli88
2026-01-19 12:39:16 -06:00
committed by GitHub
parent cd3ac5b797
commit a0490be8f1
3 changed files with 8 additions and 8 deletions

View File

@@ -1451,7 +1451,7 @@ steps:
- bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large-amd.txt
- label: NixlConnector PD accuracy tests (Distributed) # 30min
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_4
# grade: Blocking
timeout_in_minutes: 30
@@ -1465,7 +1465,7 @@ steps:
- VLLM_ATTENTION_BACKEND=ROCM_ATTN bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
- label: DP EP NixlConnector PD accuracy tests (Distributed) # 15min
mirror_hardwares: [amdexperimental]
mirror_hardwares: [amdexperimental, amdproduction]
agent_pool: mi325_4
# grade: Blocking
timeout_in_minutes: 15

View File

@@ -385,5 +385,5 @@ RUN echo "VLLM_BASE_IMAGE=${BASE_IMAGE}" >> ${COMMON_WORKDIR}/versions.txt
CMD ["/bin/bash"]
#Set entrypoint for vllm-openai official images
FROM final As vllm-openai
FROM final AS vllm-openai
ENTRYPOINT ["vllm", "serve"]

View File

@@ -89,20 +89,20 @@ logger = init_logger(__name__)
# Lazy import nixl_wrapper to avoid loading nixl_bindings if nixl is not used
try:
if "UCX_MEM_MMAP_HOOK_MODE" not in os.environ:
if "UCX_RCACHE_MAX_UNRELEASED" not in os.environ:
# avoid a memory leak in UCX when using NIXL on some models
# see: https://github.com/vllm-project/vllm/issues/24264
if "nixl" in sys.modules or "rixl" in sys.modules:
logger.warning(
"NIXL was already imported, we can't disable UCX mmap hooks. "
"Please set UCX_MEM_MMAP_HOOK_MODE to 'none' manually."
"NIXL was already imported, we can't reset UCX_RCACHE_MAX_UNRELEASED. "
"Please set it to '1024' manually."
)
else:
logger.info(
"Setting UCX_MEM_MMAP_HOOK_MODE to 'none' to avoid a rare "
"Setting UCX_RCACHE_MAX_UNRELEASED to '1024' to avoid a rare "
"memory leak in UCX when using NIXL."
)
os.environ["UCX_MEM_MMAP_HOOK_MODE"] = "none"
os.environ["UCX_RCACHE_MAX_UNRELEASED"] = "1024"
if not current_platform.is_rocm():
from nixl._api import nixl_agent as NixlWrapper