From 121ea5a21f0d16e63e98883e17489927adea0728 Mon Sep 17 00:00:00 2001 From: Divin Honnappa Date: Fri, 3 Apr 2026 15:11:08 -0500 Subject: [PATCH] Removed GPU state confirmation and cleanup steps. (#38238) Signed-off-by: Divin Honnappa --- .../scripts/hardware_ci/run-amd-test.sh | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 472691d63..703a7d753 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -35,23 +35,6 @@ export PYTHONPATH=".." # Helper Functions ############################################################################### -wait_for_clean_gpus() { - local timeout=${1:-300} - local start=$SECONDS - echo "--- Waiting for clean GPU state (timeout: ${timeout}s)" - while true; do - if grep -q clean /opt/amdgpu/etc/gpu_state; then - echo "GPUs state is \"clean\"" - return - fi - if (( SECONDS - start >= timeout )); then - echo "Error: GPUs did not reach clean state within ${timeout}s" >&2 - exit 1 - fi - sleep 3 - done -} - cleanup_docker() { # Get Docker's root directory docker_root=$(docker info -f '{{.DockerRootDir}}') @@ -365,19 +348,12 @@ apply_rocm_test_overrides() { ############################################################################### # --- GPU initialization --- -echo "--- Confirming Clean Initial State" -wait_for_clean_gpus - echo "--- ROCm info" rocminfo # --- Docker housekeeping --- cleanup_docker -echo "--- Resetting GPUs" -echo "reset" > /opt/amdgpu/etc/gpu_state -wait_for_clean_gpus - # --- Pull test image --- echo "--- Pulling container" image_name="rocm/vllm-ci:${BUILDKITE_COMMIT}"