From f85b4eda3a22fedd885ef31650c825d56867587e Mon Sep 17 00:00:00 2001 From: youkaichao Date: Tue, 10 Mar 2026 07:49:47 +0800 Subject: [PATCH] [bugfix] fix nvlink for nixl/ucx (#36475) Signed-off-by: youkaichao --- .../kv_transfer/kv_connector/v1/nixl_connector.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index fa0dd6f67..356a837fb 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -1141,6 +1141,19 @@ class NixlConnectorWorker: expected_engine_id: str, ) -> dict[int, str]: """Do a NIXL handshake with a remote instance.""" + + # the first time we connect to a remote agent. + # be careful, the handshake happens in a background thread. + # it does not have an active cuda context until any cuda runtime + # call is made. when UCX fails to find a valid cuda context, it will + # disable any cuda ipc communication, essentially disabling any NVLink + # communication. + # when we are using device buffers, we need to set the device + # explicitly to make sure the handshake background thread has a valid + # cuda context. + if not self.use_host_buffer: + current_platform.set_device(self.device_id) + # When target instance TP > local TP, we need to perform multiple # handshakes. Do it in a single background job for simplicity. # Regardless, only handshake with the remote TP rank(s) that current