From 83e1c76dbe07e30b7f4e6dbe17ba580f4afc98f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Thu, 8 Jan 2026 18:34:43 +0100 Subject: [PATCH] [CI][ROCm] Fix NIXL tests on ROCm (#31728) Signed-off-by: NickLucche --- .buildkite/test-amd.yaml | 16 +++++++++++++++- .buildkite/test-pipeline.yaml | 8 ++++---- .buildkite/test_areas/distributed.yaml | 2 +- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 18ad5849c..7d322aeaf 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -1448,7 +1448,21 @@ steps: - tests/v1/kv_connector/nixl_integration/ commands: - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt - - VLLM_ATTENTION_BACKEND=ROCM_ATTN bash v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh + - VLLM_ATTENTION_BACKEND=ROCM_ATTN bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh + +- label: DP EP NixlConnector PD accuracy tests (Distributed) # 15min + mirror_hardwares: [amdexperimental] + agent_pool: mi325_4 + # grade: Blocking + timeout_in_minutes: 15 + working_dir: "/vllm-workspace/tests" + num_gpus: 4 + source_file_dependencies: + - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py + - tests/v1/kv_connector/nixl_integration/ + commands: + - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt + - VLLM_ATTENTION_BACKEND=ROCM_ATTN DP_EP=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh ##### multi gpus test ##### ##### A100 test ##### diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 63122a969..fceae9685 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -1267,8 +1267,8 @@ steps: commands: - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt -- label: NixlConnector PD accuracy tests (Distributed) # 30min - timeout_in_minutes: 30 +- label: NixlConnector PD accuracy tests (Distributed) # 40min + timeout_in_minutes: 40 working_dir: "/vllm-workspace/tests" num_gpus: 4 source_file_dependencies: @@ -1278,8 +1278,8 @@ steps: - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt - bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh -- label: DP EP NixlConnector PD accuracy tests (Distributed) - timeout_in_minutes: 30 +- label: DP EP NixlConnector PD accuracy tests (Distributed) # 15min + timeout_in_minutes: 15 working_dir: "/vllm-workspace/tests" num_gpus: 4 source_file_dependencies: diff --git a/.buildkite/test_areas/distributed.yaml b/.buildkite/test_areas/distributed.yaml index 65a981a9d..c88076bb5 100644 --- a/.buildkite/test_areas/distributed.yaml +++ b/.buildkite/test_areas/distributed.yaml @@ -182,7 +182,7 @@ steps: - tests/v1/kv_connector/nixl_integration/ commands: - uv pip install --system -r /vllm-workspace/requirements/kv_connectors.txt - - bash v1/kv_connector/nixl_integration/tp_config_sweep_accuracy_test.sh + - bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh - label: Pipeline + Context Parallelism (4 GPUs)) timeout_in_minutes: 60