From eebd14651f7618eddda5e79eab2d4ea0cdcc1770 Mon Sep 17 00:00:00 2001 From: qli88 Date: Sat, 7 Mar 2026 15:49:56 -0600 Subject: [PATCH] [CI] Enable Crosslayer KV layout tests for ROCm platforms (#35416) --- .buildkite/test-amd.yaml | 28 ++++++++++++++++++ .../config_sweep_accuracy_test.sh | 29 ++++++++++--------- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index f69713a33..9323310b4 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -1486,6 +1486,20 @@ steps: - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt - DP_EP=1 ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh +- label: CrossLayer KV layout Distributed NixlConnector PD accuracy tests (4 GPUs) + mirror_hardwares: [amdexperimental, amdproduction] + agent_pool: mi325_4 + # grade: Blocking + timeout_in_minutes: 30 + working_dir: "/vllm-workspace/tests" + num_devices: 4 + source_file_dependencies: + - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py + - tests/v1/kv_connector/nixl_integration/ + commands: + - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt + - CROSS_LAYERS_BLOCKS=1 ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh + ##### multi gpus test ##### ##### A100 test ##### @@ -3136,6 +3150,20 @@ steps: - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt - DP_EP=1 ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh +- label: CrossLayer KV layout Distributed NixlConnector PD accuracy tests (4 GPUs) + mirror_hardwares: [amdexperimental, amdproduction] + agent_pool: mi355_4 + # grade: Blocking + timeout_in_minutes: 30 + working_dir: "/vllm-workspace/tests" + num_devices: 4 + source_file_dependencies: + - vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py + - tests/v1/kv_connector/nixl_integration/ + commands: + - uv pip install --system -r /vllm-workspace/requirements/kv_connectors_rocm.txt + - CROSS_LAYERS_BLOCKS=1 ROCM_ATTN=1 bash v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh + ##### multi gpus test ##### ##### A100 test ##### diff --git a/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh index c35f4bfe8..684e2ec4d 100755 --- a/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh +++ b/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh @@ -56,24 +56,27 @@ run_tests() { echo "✅ All ${label} tests passed!" } -# Run tests +# Set backend +label="default backend" +cmdline_args="" if [[ -n "${ROCM_ATTN:-}" ]]; then echo "ROCM_ATTN is set, running with --attention-backend ROCM_ATTN" - run_tests "ROCM_ATTN backend" "--attention-backend ROCM_ATTN" + label="ROCM_ATTN backend" + cmdline_args=" --attention-backend ROCM_ATTN " +elif [[ -n "${FLASHINFER:-}" ]]; then + echo "FLASHINFER is set, running with --attention-backend FLASHINFER" + label="FLASHINFER backend" + cmdline_args=" --attention-backend FLASHINFER " else - run_tests "default backend" "" -fi - -# Check if FLASHINFER is set (non-empty) -if [[ -n "${FLASHINFER:-}" ]]; then - echo "FLASHINFER is set, rerunning with --attention-backend FLASHINFER" - run_tests "FLASHINFER backend" "--attention-backend FLASHINFER" -else - echo "FLASHINFER not set, skipping FLASHINFER runs." + echo "running with default attention backend" fi # Check if cross-layers is enabled (non-empty) if [[ -n "${CROSS_LAYERS_BLOCKS:-}" ]]; then - echo "CROSS_LAYERS_BLOCKS is set, rerunning with --enable-cross-layers" - run_tests "default backend" "--enable-cross-layers" + echo "CROSS_LAYERS_BLOCKS is set, running with --enable-cross-layers" + label+=" - CROSS_LAYERS_BLOCKS enabled" + cmdline_args+=" --enable-cross-layers " fi + +# Run tests +run_tests "${label}" "${cmdline_args}"