diff --git a/.buildkite/image_build/image_build_cpu.sh b/.buildkite/image_build/image_build_cpu.sh index a69732f43..2d5e49ecd 100755 --- a/.buildkite/image_build/image_build_cpu.sh +++ b/.buildkite/image_build/image_build_cpu.sh @@ -11,10 +11,10 @@ REPO=$2 BUILDKITE_COMMIT=$3 # authenticate with AWS ECR -aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $REGISTRY +aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" # skip build if image already exists -if [[ -z $(docker manifest inspect $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu) ]]; then +if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu) ]]; then echo "Image not found, proceeding with build..." else echo "Image found" @@ -24,13 +24,13 @@ fi # build docker build --file docker/Dockerfile.cpu \ --build-arg max_jobs=16 \ - --build-arg buildkite_commit=$BUILDKITE_COMMIT \ + --build-arg buildkite_commit="$BUILDKITE_COMMIT" \ --build-arg VLLM_CPU_AVX512BF16=true \ --build-arg VLLM_CPU_AVX512VNNI=true \ --build-arg VLLM_CPU_AMXBF16=true \ - --tag $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu \ + --tag "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu \ --target vllm-test \ --progress plain . # push -docker push $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu +docker push "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu diff --git a/.buildkite/image_build/image_build_cpu_arm64.sh b/.buildkite/image_build/image_build_cpu_arm64.sh index 615298b65..3f25fbaec 100755 --- a/.buildkite/image_build/image_build_cpu_arm64.sh +++ b/.buildkite/image_build/image_build_cpu_arm64.sh @@ -11,10 +11,10 @@ REPO=$2 BUILDKITE_COMMIT=$3 # authenticate with AWS ECR -aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $REGISTRY +aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" # skip build if image already exists -if [[ -z $(docker manifest inspect $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu) ]]; then +if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu) ]]; then echo "Image not found, proceeding with build..." else echo "Image found" @@ -24,10 +24,10 @@ fi # build docker build --file docker/Dockerfile.cpu \ --build-arg max_jobs=16 \ - --build-arg buildkite_commit=$BUILDKITE_COMMIT \ - --tag $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu \ + --build-arg buildkite_commit="$BUILDKITE_COMMIT" \ + --tag "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu \ --target vllm-test \ --progress plain . # push -docker push $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu +docker push "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu diff --git a/.buildkite/image_build/image_build_hpu.sh b/.buildkite/image_build/image_build_hpu.sh index 192447ef4..60fa1789f 100755 --- a/.buildkite/image_build/image_build_hpu.sh +++ b/.buildkite/image_build/image_build_hpu.sh @@ -11,10 +11,10 @@ REPO=$2 BUILDKITE_COMMIT=$3 # authenticate with AWS ECR -aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $REGISTRY +aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" # skip build if image already exists -if [[ -z $(docker manifest inspect $REGISTRY/$REPO:$BUILDKITE_COMMIT-hpu) ]]; then +if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-hpu) ]]; then echo "Image not found, proceeding with build..." else echo "Image found" @@ -25,10 +25,10 @@ fi docker build \ --file tests/pytorch_ci_hud_benchmark/Dockerfile.hpu \ --build-arg max_jobs=16 \ - --build-arg buildkite_commit=$BUILDKITE_COMMIT \ - --tag $REGISTRY/$REPO:$BUILDKITE_COMMIT-hpu \ + --build-arg buildkite_commit="$BUILDKITE_COMMIT" \ + --tag "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-hpu \ --progress plain \ https://github.com/vllm-project/vllm-gaudi.git # push -docker push $REGISTRY/$REPO:$BUILDKITE_COMMIT-hpu +docker push "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-hpu diff --git a/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh index 02371f3dd..518af9a66 100755 --- a/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh +++ b/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh @@ -41,4 +41,4 @@ lm_eval --model vllm-vlm \ --tasks chartqa \ --batch_size auto \ --apply_chat_template \ - --limit $LIMIT + --limit "$LIMIT" diff --git a/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh index c5128cea6..e3c6e16bd 100644 --- a/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh +++ b/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh @@ -20,14 +20,11 @@ usage() { echo } -while getopts "m:b:l:f:t:" OPT; do +while getopts "m:l:f:t:" OPT; do case ${OPT} in m ) MODEL="$OPTARG" ;; - b ) - BATCH_SIZE="$OPTARG" - ;; l ) LIMIT="$OPTARG" ;; diff --git a/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh b/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh index 7dabcf517..2ad599ff1 100755 --- a/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh +++ b/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh @@ -15,11 +15,11 @@ DTYPE_FILTER="${DTYPE_FILTER:-}" check_gpus() { if command -v nvidia-smi; then # check the number of GPUs and GPU type. - declare -g gpu_count=$(nvidia-smi --list-gpus | wc -l) + declare -g gpu_count=$(nvidia-smi --list-gpus | grep -c . || true) elif command -v amd-smi; then - declare -g gpu_count=$(amd-smi list | grep 'GPU' | wc -l) + declare -g gpu_count=$(amd-smi list | grep -c 'GPU' || true) elif command -v hl-smi; then - declare -g gpu_count=$(hl-smi --list | grep -i "Module ID" | wc -l) + declare -g gpu_count=$(hl-smi --list | grep -ci "Module ID" || true) fi if [[ $gpu_count -gt 0 ]]; then @@ -47,7 +47,7 @@ check_cpus() { declare -g numa_count=$(lscpu | grep "NUMA node(s):" | awk '{print $3}') if [[ $numa_count -gt 0 ]]; then echo "NUMA found." - echo $numa_count + echo "$numa_count" else echo "Need at least 1 NUMA to run benchmarking." exit 1 @@ -434,7 +434,7 @@ run_serving_tests() { # iterate over different max_concurrency for max_concurrency in $max_concurrency_list; do - new_test_name=$test_name"_qps_"$qps"_concurrency_"$max_concurrency + new_test_name="${test_name}_qps_${qps}_concurrency_${max_concurrency}" echo " new test name $new_test_name" # pass the tensor parallel size, the compilation mode, and the optimization # level to the client so that they can be used on the benchmark dashboard @@ -471,7 +471,7 @@ run_serving_tests() { # clean up if [[ "${DRY_RUN:-0}" != "1" ]]; then - kill -9 $server_pid + kill -9 "$server_pid" kill_gpu_processes fi done diff --git a/.buildkite/scripts/annotate-rocm-release.sh b/.buildkite/scripts/annotate-rocm-release.sh index 8e7dbfb9e..0a817890c 100755 --- a/.buildkite/scripts/annotate-rocm-release.sh +++ b/.buildkite/scripts/annotate-rocm-release.sh @@ -25,7 +25,7 @@ S3_REGION="${AWS_DEFAULT_REGION:-us-west-2}" S3_URL="http://${S3_BUCKET}.s3-website-${S3_REGION}.amazonaws.com" # Format ROCm version for path (e.g., "7.1" -> "rocm710") -ROCM_VERSION_PATH="rocm$(echo ${ROCM_VERSION} | tr -d '.')" +ROCM_VERSION_PATH="rocm$(echo "${ROCM_VERSION}" | tr -d '.')" ROCM_PATH="rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}" buildkite-agent annotate --style 'success' --context 'rocm-release-workflow' << EOF ## ROCm Wheel and Docker Image Releases diff --git a/.buildkite/scripts/cache-rocm-base-wheels.sh b/.buildkite/scripts/cache-rocm-base-wheels.sh index be2447250..060d09db4 100755 --- a/.buildkite/scripts/cache-rocm-base-wheels.sh +++ b/.buildkite/scripts/cache-rocm-base-wheels.sh @@ -83,7 +83,7 @@ case "${1:-}" in exit 1 fi - WHEEL_COUNT=$(ls artifacts/rocm-base-wheels/*.whl 2>/dev/null | wc -l) + WHEEL_COUNT=$(find artifacts/rocm-base-wheels -maxdepth 1 -name '*.whl' 2>/dev/null | wc -l) if [[ "$WHEEL_COUNT" -eq 0 ]]; then echo "ERROR: No wheels found in artifacts/rocm-base-wheels/" >&2 exit 1 @@ -110,9 +110,9 @@ case "${1:-}" in echo "" echo "Downloaded wheels:" - ls -lh artifacts/rocm-base-wheels/ + find artifacts/rocm-base-wheels -maxdepth 1 -name '*.whl' -exec ls -lh {} \; - WHEEL_COUNT=$(ls artifacts/rocm-base-wheels/*.whl 2>/dev/null | wc -l) + WHEEL_COUNT=$(find artifacts/rocm-base-wheels -maxdepth 1 -name '*.whl' 2>/dev/null | wc -l) echo "" echo "Total: $WHEEL_COUNT wheels" echo "========================================" diff --git a/.buildkite/scripts/cherry-pick-from-milestone.sh b/.buildkite/scripts/cherry-pick-from-milestone.sh index 99eb36acd..67f30930b 100755 --- a/.buildkite/scripts/cherry-pick-from-milestone.sh +++ b/.buildkite/scripts/cherry-pick-from-milestone.sh @@ -134,7 +134,7 @@ log_info "Fetching merged PRs from milestone '${MILESTONE}'..." # Store PR data in a temp file PR_DATA=$(mktemp) -trap "rm -f $PR_DATA" EXIT +trap 'rm -f "$PR_DATA"' EXIT if ! gh pr list --state merged --search "milestone:${MILESTONE}" \ --limit 1000 \ diff --git a/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh b/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh index 3728f73fa..75ae2765e 100755 --- a/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh +++ b/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh @@ -27,7 +27,7 @@ function cpu_tests() { podman exec -it "$container_id" bash -c " export TORCH_COMPILE_DISABLE=1 set -xve - python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m" >> $HOME/test_basic.log + python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m" >> "$HOME"/test_basic.log # Run basic model test podman exec -it "$container_id" bash -c " @@ -43,7 +43,7 @@ function cpu_tests() { pytest -v -s tests/models/language/generation/test_common.py::test_models[False-False-5-32-google/gemma-1.1-2b-it] pytest -v -s tests/models/language/pooling/test_classification.py::test_models[float-jason9693/Qwen2.5-1.5B-apeach] # TODO: Below test case tests/models/language/pooling/test_embedding.py::test_models[True-ssmits/Qwen2-7B-Instruct-embed-base] fails on ppc64le. Disabling it for time being. - # pytest -v -s tests/models/language/pooling/test_embedding.py -m cpu_model" >> $HOME/test_rest.log + # pytest -v -s tests/models/language/pooling/test_embedding.py -m cpu_model" >> "$HOME"/test_rest.log } # All of CPU tests are expected to be finished less than 40 mins. diff --git a/.buildkite/scripts/hardware_ci/run-cpu-test.sh b/.buildkite/scripts/hardware_ci/run-cpu-test.sh index c32b051ca..db75ad308 100644 --- a/.buildkite/scripts/hardware_ci/run-cpu-test.sh +++ b/.buildkite/scripts/hardware_ci/run-cpu-test.sh @@ -16,5 +16,5 @@ echo "--- :docker: Building Docker image" docker build --progress plain --tag "$IMAGE_NAME" --target vllm-test -f docker/Dockerfile.cpu . # Run the image, setting --shm-size=4g for tensor parallel. -docker run --rm --cpuset-cpus=$CORE_RANGE --cpuset-mems=$NUMA_NODE -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 --shm-size=4g $IMAGE_NAME \ - timeout $TIMEOUT_VAL bash -c "set -euox pipefail; echo \"--- Print packages\"; pip list; echo \"--- Running tests\"; ${TEST_COMMAND}" +docker run --rm --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 --shm-size=4g "$IMAGE_NAME" \ + timeout "$TIMEOUT_VAL" bash -c "set -euox pipefail; echo \"--- Print packages\"; pip list; echo \"--- Running tests\"; ${TEST_COMMAND}" diff --git a/.buildkite/scripts/hardware_ci/run-hpu-test.sh b/.buildkite/scripts/hardware_ci/run-hpu-test.sh index 7df696eb2..c6a556e21 100644 --- a/.buildkite/scripts/hardware_ci/run-hpu-test.sh +++ b/.buildkite/scripts/hardware_ci/run-hpu-test.sh @@ -7,7 +7,7 @@ set -exuo pipefail # Try building the docker image image_name="hpu/upstream-vllm-ci:${BUILDKITE_COMMIT}" container_name="hpu-upstream-vllm-ci-${BUILDKITE_COMMIT}-container" -cat <&2 exit 1 fi + # shellcheck source=/dev/null source "${TEST_RUN_CONFIG_FILE}" echo "Base docker image name that get from configuration: ${BASE_IMAGE_NAME}" return 0 @@ -48,9 +49,8 @@ get_config() { # get test running configuration. fetch_vllm_test_cfg -get_config # Check if the function call was successful. If not, exit the script. -if [ $? -ne 0 ]; then +if ! get_config; then exit 1 fi @@ -62,14 +62,14 @@ agent_idx=$(echo "${BUILDKITE_AGENT_NAME}" | awk -F'-' '{print $(NF-1)}') echo "agent_idx: ${agent_idx}" builder_name="cachebuilder${agent_idx}" builder_cache_dir="/mnt/docker-cache${agent_idx}" -mkdir -p ${builder_cache_dir} +mkdir -p "${builder_cache_dir}" # Try building the docker image cat < /dev/null; then echo "Installing UV package manager..." curl -LsSf https://astral.sh/uv/install.sh | sh - source $HOME/.local/bin/env + source "$HOME"/.local/bin/env fi # Clone Prime-RL repository at specific branch for reproducible tests diff --git a/.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh b/.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh index 463969cbc..e26273bba 100644 --- a/.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh +++ b/.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh @@ -51,14 +51,14 @@ for BACK in "${BACKENDS[@]}"; do --enable-eplb \ --trust-remote-code \ --max-model-len 2048 \ - --all2all-backend $BACK \ - --port $PORT & + --all2all-backend "$BACK" \ + --port "$PORT" & SERVER_PID=$! - wait_for_server $PORT + wait_for_server "$PORT" TAG=$(echo "$MODEL" | tr '/: \\n' '_____') OUT="${OUT_DIR}/${TAG}_${BACK}.json" - python3 tests/evals/gsm8k/gsm8k_eval.py --host http://127.0.0.1 --port $PORT --num-questions ${NUM_Q} --save-results ${OUT} + python3 tests/evals/gsm8k/gsm8k_eval.py --host http://127.0.0.1 --port "$PORT" --num-questions "${NUM_Q}" --save-results "${OUT}" python3 - < "$VLLM_LOG" 2>&1 & + --download_dir "$DOWNLOAD_DIR" \ + --max-model-len "$MAX_MODEL_LEN" > "$VLLM_LOG" 2>&1 & echo "wait for 20 minutes.." echo # sleep 1200 # wait for 10 minutes... -for i in {1..120}; do +for _ in {1..120}; do # TODO: detect other type of errors. if grep -Fq "raise RuntimeError" "$VLLM_LOG"; then echo "Detected RuntimeError, exiting." @@ -78,11 +78,11 @@ echo "logging to $BM_LOG" echo vllm bench serve \ --backend vllm \ - --model $MODEL \ + --model "$MODEL" \ --dataset-name sonnet \ --dataset-path benchmarks/sonnet_4x.txt \ - --sonnet-input-len $INPUT_LEN \ - --sonnet-output-len $OUTPUT_LEN \ + --sonnet-input-len "$INPUT_LEN" \ + --sonnet-output-len "$OUTPUT_LEN" \ --ignore-eos > "$BM_LOG" echo "completed..." diff --git a/.buildkite/scripts/upload-nightly-wheels.sh b/.buildkite/scripts/upload-nightly-wheels.sh index 1af7f476a..5efcb89bf 100644 --- a/.buildkite/scripts/upload-nightly-wheels.sh +++ b/.buildkite/scripts/upload-nightly-wheels.sh @@ -76,16 +76,15 @@ mkdir -p "$INDICES_OUTPUT_DIR" # this indices have relative paths that could work as long as it is next to the wheel directory in s3 # i.e., the wheels are always in s3://vllm-wheels// # and indices can be placed in //, or /nightly/, or // -if [[ ! -z "$DEFAULT_VARIANT_ALIAS" ]]; then - alias_arg="--alias-to-default $DEFAULT_VARIANT_ALIAS" -else - alias_arg="" +alias_args=() +if [[ -n "$DEFAULT_VARIANT_ALIAS" ]]; then + alias_args=(--alias-to-default "$DEFAULT_VARIANT_ALIAS") fi # HACK: we do not need regex module here, but it is required by pre-commit hook # To avoid any external dependency, we simply replace it back to the stdlib re module sed -i 's/import regex as re/import re/g' .buildkite/scripts/generate-nightly-index.py -$PYTHON .buildkite/scripts/generate-nightly-index.py --version "$SUBPATH" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "commit $BUILDKITE_COMMIT" $alias_arg +$PYTHON .buildkite/scripts/generate-nightly-index.py --version "$SUBPATH" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "commit $BUILDKITE_COMMIT" "${alias_args[@]}" # copy indices to // unconditionally echo "Uploading indices to $S3_COMMIT_PREFIX" @@ -100,9 +99,9 @@ fi # re-generate and copy to // only if it does not have "dev" in the version if [[ "$version" != *"dev"* ]]; then echo "Re-generating indices for /$pure_version/" - rm -rf "$INDICES_OUTPUT_DIR/*" + rm -rf "${INDICES_OUTPUT_DIR:?}/*" mkdir -p "$INDICES_OUTPUT_DIR" # wheel-dir is overridden to be the commit directory, so that the indices point to the correct wheel path - $PYTHON .buildkite/scripts/generate-nightly-index.py --version "$pure_version" --wheel-dir "$SUBPATH" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "version $pure_version" $alias_arg + $PYTHON .buildkite/scripts/generate-nightly-index.py --version "$pure_version" --wheel-dir "$SUBPATH" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "version $pure_version" "${alias_args[@]}" aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/$pure_version/" fi diff --git a/.buildkite/scripts/upload-release-wheels-pypi.sh b/.buildkite/scripts/upload-release-wheels-pypi.sh index 75f519168..dacdb6e92 100644 --- a/.buildkite/scripts/upload-release-wheels-pypi.sh +++ b/.buildkite/scripts/upload-release-wheels-pypi.sh @@ -7,7 +7,7 @@ SUBPATH=$BUILDKITE_COMMIT S3_COMMIT_PREFIX="s3://$BUCKET/$SUBPATH/" RELEASE_VERSION=$(buildkite-agent meta-data get release-version) -GIT_VERSION=$(git describe --exact-match --tags $BUILDKITE_COMMIT 2>/dev/null) +GIT_VERSION=$(git describe --exact-match --tags "$BUILDKITE_COMMIT" 2>/dev/null) echo "Release version from Buildkite: $RELEASE_VERSION" @@ -55,7 +55,7 @@ mkdir -p $DIST_DIR aws s3 cp --recursive --exclude "*" --include "vllm-${PURE_VERSION}*.whl" --exclude "*dev*" --exclude "*rc[0-9]*" "$S3_COMMIT_PREFIX" $DIST_DIR echo "Wheels copied to local directory" # generate source tarball -git archive --format=tar.gz --output="$DIST_DIR/vllm-${PURE_VERSION}.tar.gz" $BUILDKITE_COMMIT +git archive --format=tar.gz --output="$DIST_DIR/vllm-${PURE_VERSION}.tar.gz" "$BUILDKITE_COMMIT" ls -la $DIST_DIR # upload wheels to PyPI (only default variant, i.e. files without '+' in the name) @@ -65,6 +65,6 @@ if [[ -z "$PYPI_WHEEL_FILES" ]]; then exit 1 fi -python3 -m twine check $PYPI_WHEEL_FILES -python3 -m twine upload --non-interactive --verbose $PYPI_WHEEL_FILES +python3 -m twine check "$PYPI_WHEEL_FILES" +python3 -m twine upload --non-interactive --verbose "$PYPI_WHEEL_FILES" echo "Wheels uploaded to PyPI" diff --git a/.buildkite/scripts/upload-rocm-wheels.sh b/.buildkite/scripts/upload-rocm-wheels.sh index bb555bc84..a42848a16 100755 --- a/.buildkite/scripts/upload-rocm-wheels.sh +++ b/.buildkite/scripts/upload-rocm-wheels.sh @@ -55,7 +55,7 @@ mkdir -p all-rocm-wheels cp artifacts/rocm-base-wheels/*.whl all-rocm-wheels/ 2>/dev/null || true cp artifacts/rocm-vllm-wheel/*.whl all-rocm-wheels/ 2>/dev/null || true -WHEEL_COUNT=$(ls all-rocm-wheels/*.whl 2>/dev/null | wc -l) +WHEEL_COUNT=$(find all-rocm-wheels -maxdepth 1 -name '*.whl' 2>/dev/null | wc -l) echo "Total wheels to upload: $WHEEL_COUNT" if [ "$WHEEL_COUNT" -eq 0 ]; then @@ -115,7 +115,7 @@ if [[ "$BUILDKITE_BRANCH" == "main" && "$BUILDKITE_PULL_REQUEST" == "false" ]] | fi # Extract version from vLLM wheel and update version-specific index -VLLM_WHEEL=$(ls all-rocm-wheels/vllm*.whl 2>/dev/null | head -1) +VLLM_WHEEL=$(find all-rocm-wheels -maxdepth 1 -name 'vllm*.whl' 2>/dev/null | head -1) if [ -n "$VLLM_WHEEL" ]; then VERSION=$(unzip -p "$VLLM_WHEEL" '**/METADATA' | grep '^Version: ' | cut -d' ' -f2) echo "Version in wheel: $VERSION" diff --git a/benchmarks/auto_tune/auto_tune.sh b/benchmarks/auto_tune/auto_tune.sh index a245e2022..efb234a2d 100644 --- a/benchmarks/auto_tune/auto_tune.sh +++ b/benchmarks/auto_tune/auto_tune.sh @@ -46,10 +46,10 @@ echo "VLLM_LOGGING_LEVEL=$VLLM_LOGGING_LEVEL" echo "RESULT_FILE=$RESULT" echo "====================== AUTO TUNEPARAMETERS ====================" -rm -rf $LOG_FOLDER -rm -rf $PROFILE_PATH -mkdir -p $LOG_FOLDER -mkdir -p $PROFILE_PATH +rm -rf "$LOG_FOLDER" +rm -rf "$PROFILE_PATH" +mkdir -p "$LOG_FOLDER" +mkdir -p "$PROFILE_PATH" cd "$BASE/vllm" @@ -114,7 +114,7 @@ start_server() { # wait for 10 minutes... server_started=0 - for i in {1..60}; do + for _ in {1..60}; do # This line checks whether the server is still alive or not, # since that we should always have permission to send signal to the server process. kill -0 $server_pid 2> /dev/null || break @@ -145,12 +145,12 @@ run_benchmark() { local vllm_log="$LOG_FOLDER/vllm_log_${max_num_seqs}_${max_num_batched_tokens}.txt" echo "vllm_log: $vllm_log" echo - rm -f $vllm_log + rm -f "$vllm_log" pkill -if "vllm serve" || true echo "starting server..." # Call start_server without a profile_dir to avoid profiling overhead - start_server $gpu_memory_utilization $max_num_seqs $max_num_batched_tokens $vllm_log "" + start_server "$gpu_memory_utilization" "$max_num_seqs" "$max_num_batched_tokens" "$vllm_log" "" result=$? if [[ "$result" -eq 1 ]]; then echo "server failed to start. gpu_memory_utilization:$gpu_memory_utilization, max_num_seqs:$max_num_seqs, max_num_batched_tokens: $max_num_batched_tokens" @@ -168,15 +168,15 @@ run_benchmark() { # --profile flag is removed from this call vllm bench serve \ --backend vllm \ - --model $MODEL \ + --model "$MODEL" \ --dataset-name random \ --random-input-len $adjusted_input_len \ - --random-output-len $OUTPUT_LEN \ + --random-output-len "$OUTPUT_LEN" \ --ignore-eos \ --disable-tqdm \ --request-rate inf \ --percentile-metrics ttft,tpot,itl,e2el \ - --goodput e2el:$MAX_LATENCY_ALLOWED_MS \ + --goodput e2el:"$MAX_LATENCY_ALLOWED_MS" \ --num-prompts 1000 \ --random-prefix-len $prefix_len \ --host "$HOSTNAME" \ @@ -195,20 +195,20 @@ run_benchmark() { request_rate=$((${throughput%.*} + 1)) while ((request_rate > 0)); do # clear prefix cache - curl -X POST http://${HOSTNAME}:8004/reset_prefix_cache + curl -X POST http://"${HOSTNAME}":8004/reset_prefix_cache sleep 5 bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_${request_rate}.txt" vllm bench serve \ --backend vllm \ - --model $MODEL \ + --model "$MODEL" \ --dataset-name random \ --random-input-len $adjusted_input_len \ - --random-output-len $OUTPUT_LEN \ + --random-output-len "$OUTPUT_LEN" \ --ignore-eos \ --disable-tqdm \ --request-rate $request_rate \ --percentile-metrics ttft,tpot,itl,e2el \ - --goodput e2el:$MAX_LATENCY_ALLOWED_MS \ + --goodput e2el:"$MAX_LATENCY_ALLOWED_MS" \ --num-prompts 100 \ --random-prefix-len $prefix_len \ --host "$HOSTNAME" \ @@ -255,7 +255,7 @@ gpu_memory_utilization=0.98 find_gpu_memory_utilization=0 while (( $(echo "$gpu_memory_utilization >= 0.9" | bc -l) )); do # Pass empty string for profile_dir argument - start_server $gpu_memory_utilization "${num_seqs_list[-1]}" "${num_batched_tokens_list[-1]}" "$LOG_FOLDER/vllm_log_gpu_memory_utilization_$gpu_memory_utilization.log" "" + start_server "$gpu_memory_utilization" "${num_seqs_list[-1]}" "${num_batched_tokens_list[-1]}" "$LOG_FOLDER/vllm_log_gpu_memory_utilization_$gpu_memory_utilization.log" "" result=$? if [[ "$result" -eq 0 ]]; then find_gpu_memory_utilization=1 @@ -274,7 +274,7 @@ fi for num_seqs in "${num_seqs_list[@]}"; do for num_batched_tokens in "${num_batched_tokens_list[@]}"; do - run_benchmark $num_seqs $num_batched_tokens $gpu_memory_utilization + run_benchmark "$num_seqs" "$num_batched_tokens" "$gpu_memory_utilization" done done echo "finish permutations" @@ -285,7 +285,7 @@ echo "finish permutations" if (( $(echo "$best_throughput > 0" | bc -l) )); then echo echo "Benchmark tuning finished. Now running profiling on the best configuration found..." - echo "Best config: max_num_seqs: $best_max_num_seqs, max_num_batched_tokens: $best_num_batched_tokens, throughput: $best_throughput" + echo "Best config: max_num_seqs: $best_max_num_seqs, max_num_batched_tokens: $best_num_batched_tokens, throughput: $best_throughput, goodput: $best_goodput" echo vllm_log="$LOG_FOLDER/vllm_log_BEST_PROFILE.txt" @@ -293,7 +293,7 @@ if (( $(echo "$best_throughput > 0" | bc -l) )); then # Start server with the best params and profiling ENABLED echo "Starting server for profiling..." - start_server $gpu_memory_utilization $best_max_num_seqs $best_num_batched_tokens "$vllm_log" "$PROFILE_PATH" + start_server "$gpu_memory_utilization" "$best_max_num_seqs" "$best_num_batched_tokens" "$vllm_log" "$PROFILE_PATH" # Run benchmark with the best params and the --profile flag echo "Running benchmark with profiling..." @@ -301,15 +301,15 @@ if (( $(echo "$best_throughput > 0" | bc -l) )); then adjusted_input_len=$(( INPUT_LEN - prefix_len )) vllm bench serve \ --backend vllm \ - --model $MODEL \ + --model "$MODEL" \ --dataset-name random \ --random-input-len $adjusted_input_len \ - --random-output-len $OUTPUT_LEN \ + --random-output-len "$OUTPUT_LEN" \ --ignore-eos \ --disable-tqdm \ - --request-rate $best_request_rate \ + --request-rate "$best_request_rate" \ --percentile-metrics ttft,tpot,itl,e2el \ - --goodput e2el:$MAX_LATENCY_ALLOWED_MS \ + --goodput e2el:"$MAX_LATENCY_ALLOWED_MS" \ --num-prompts 100 \ --random-prefix-len $prefix_len \ --host "$HOSTNAME" \ diff --git a/benchmarks/auto_tune/batch_auto_tune.sh b/benchmarks/auto_tune/batch_auto_tune.sh index 57ef20daf..0f3ef0f03 100755 --- a/benchmarks/auto_tune/batch_auto_tune.sh +++ b/benchmarks/auto_tune/batch_auto_tune.sh @@ -64,7 +64,7 @@ for i in $(seq 0 $(($num_runs - 1))); do else STATUS="FAILURE" ((FAILURE_COUNT++)) - FAILED_RUNS+=("Run #$((i+1)): $(echo $run_object | jq -c .)") + FAILED_RUNS+=("Run #$((i+1)): $(echo "$run_object" | jq -c .)") fi RUN_OUTPUT=$(<"$RUN_OUTPUT_FILE") diff --git a/benchmarks/run_structured_output_benchmark.sh b/benchmarks/run_structured_output_benchmark.sh index b043ab83e..bc40ed83f 100755 --- a/benchmarks/run_structured_output_benchmark.sh +++ b/benchmarks/run_structured_output_benchmark.sh @@ -71,7 +71,7 @@ while [[ $# -gt 0 ]]; do usage ;; *) - echo "Unknown argument: $1\n" + printf "Unknown argument: %s\n" "$1" usage ;; esac @@ -84,15 +84,17 @@ mkdir -p "$OUTPUT_DIR" QPS_VALUES=(25 20 15 10 5 1) # Common parameters -COMMON_PARAMS="--backend $BACKEND \ - --model $MODEL \ - --dataset $DATASET \ - --structured-output-ratio $STRUCTURED_OUTPUT_RATIO \ - --save-results \ - --result-dir $OUTPUT_DIR \ - --output-len $MAX_NEW_TOKENS \ - --port $PORT \ - --tokenizer-mode $TOKENIZER_MODE" +COMMON_PARAMS=( + --backend "$BACKEND" + --model "$MODEL" + --dataset "$DATASET" + --structured-output-ratio "$STRUCTURED_OUTPUT_RATIO" + --save-results + --result-dir "$OUTPUT_DIR" + --output-len "$MAX_NEW_TOKENS" + --port "$PORT" + --tokenizer-mode "$TOKENIZER_MODE" +) echo "Starting structured output benchmark with model: $MODEL" echo "Backend: $BACKEND" @@ -109,17 +111,17 @@ for qps in "${QPS_VALUES[@]}"; do GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "unknown") # Construct filename for this run - FILENAME="${BACKEND}_${qps}qps_$(basename $MODEL)_${DATASET}_${GIT_HASH}.json" + FILENAME="${BACKEND}_${qps}qps_$(basename "$MODEL")_${DATASET}_${GIT_HASH}_${GIT_BRANCH}.json" NUM_PROMPTS=$(echo "$TOTAL_SECONDS * $qps" | bc) NUM_PROMPTS=${NUM_PROMPTS%.*} # Remove fractional part echo "Running benchmark with $NUM_PROMPTS prompts" # Run the benchmark - python "$SCRIPT_DIR/benchmark_serving_structured_output.py" $COMMON_PARAMS \ - --request-rate $qps \ + python "$SCRIPT_DIR/benchmark_serving_structured_output.py" "${COMMON_PARAMS[@]}" \ + --request-rate "$qps" \ --result-filename "$FILENAME" \ - --num-prompts $NUM_PROMPTS + --num-prompts "$NUM_PROMPTS" echo "Completed benchmark with QPS: $qps" echo "----------------------------------------" diff --git a/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh b/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh index 95a418374..19459acc9 100644 --- a/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh +++ b/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh @@ -8,7 +8,7 @@ declare -a PIDS=() ############################################################################### MODEL="${MODEL:-Qwen/Qwen2.5-VL-3B-Instruct}" LOG_PATH="${LOG_PATH:-./logs}" -mkdir -p $LOG_PATH +mkdir -p "$LOG_PATH" ENCODE_PORT="${ENCODE_PORT:-19534}" PREFILL_PORT="${PREFILL_PORT:-19535}" @@ -84,10 +84,10 @@ trap cleanup TERM # clear previous cache echo "remove previous ec cache folder" -rm -rf $EC_SHARED_STORAGE_PATH +rm -rf "$EC_SHARED_STORAGE_PATH" echo "make ec cache folder" -mkdir -p $EC_SHARED_STORAGE_PATH +mkdir -p "$EC_SHARED_STORAGE_PATH" ############################################################################### # Encoder worker @@ -100,7 +100,7 @@ CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \ --no-enable-prefix-caching \ --max-num-batched-tokens 114688 \ --max-num-seqs 128 \ - --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \ + --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \ --ec-transfer-config '{ "ec_connector": "ECExampleConnector", "ec_role": "ec_producer", @@ -124,7 +124,7 @@ vllm serve "$MODEL" \ --enforce-eager \ --enable-request-id-headers \ --max-num-seqs 128 \ - --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \ + --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \ --ec-transfer-config '{ "ec_connector": "ECExampleConnector", "ec_role": "ec_consumer", @@ -152,7 +152,7 @@ vllm serve "$MODEL" \ --enforce-eager \ --enable-request-id-headers \ --max-num-seqs 128 \ - --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \ + --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \ --kv-transfer-config '{ "kv_connector": "NixlConnector", "kv_role": "kv_consumer" @@ -162,9 +162,9 @@ vllm serve "$MODEL" \ PIDS+=($!) # Wait for workers -wait_for_server $ENCODE_PORT -wait_for_server $PREFILL_PORT -wait_for_server $DECODE_PORT +wait_for_server "$ENCODE_PORT" +wait_for_server "$PREFILL_PORT" +wait_for_server "$DECODE_PORT" ############################################################################### # Proxy @@ -179,7 +179,7 @@ python disagg_epd_proxy.py \ PIDS+=($!) -wait_for_server $PROXY_PORT +wait_for_server "$PROXY_PORT" echo "All services are up!" ############################################################################### @@ -187,14 +187,14 @@ echo "All services are up!" ############################################################################### echo "Running benchmark (stream)..." vllm bench serve \ - --model $MODEL \ + --model "$MODEL" \ --backend openai-chat \ --endpoint /v1/chat/completions \ --dataset-name hf \ --dataset-path lmarena-ai/VisionArena-Chat \ --seed 0 \ - --num-prompts $NUM_PROMPTS \ - --port $PROXY_PORT + --num-prompts "$NUM_PROMPTS" \ + --port "$PROXY_PORT" PIDS+=($!) @@ -202,10 +202,10 @@ PIDS+=($!) # Single request with local image ############################################################################### echo "Running single request with local image (non-stream)..." -curl http://127.0.0.1:${PROXY_PORT}/v1/chat/completions \ +curl http://127.0.0.1:"${PROXY_PORT}"/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "'${MODEL}'", + "model": "'"${MODEL}"'", "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": [ diff --git a/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh b/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh index c4a591d74..18c278b2a 100644 --- a/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh +++ b/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh @@ -8,7 +8,7 @@ declare -a PIDS=() ############################################################################### MODEL="${MODEL:-Qwen/Qwen2.5-VL-3B-Instruct}" LOG_PATH="${LOG_PATH:-./logs}" -mkdir -p $LOG_PATH +mkdir -p "$LOG_PATH" ENCODE_PORT="${ENCODE_PORT:-19534}" PREFILL_DECODE_PORT="${PREFILL_DECODE_PORT:-19535}" @@ -78,10 +78,10 @@ trap cleanup TERM # clear previous cache echo "remove previous ec cache folder" -rm -rf $EC_SHARED_STORAGE_PATH +rm -rf "$EC_SHARED_STORAGE_PATH" echo "make ec cache folder" -mkdir -p $EC_SHARED_STORAGE_PATH +mkdir -p "$EC_SHARED_STORAGE_PATH" ############################################################################### # Encoder worker @@ -94,7 +94,7 @@ CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \ --no-enable-prefix-caching \ --max-num-batched-tokens 114688 \ --max-num-seqs 128 \ - --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \ + --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \ --ec-transfer-config '{ "ec_connector": "ECExampleConnector", "ec_role": "ec_producer", @@ -115,7 +115,7 @@ CUDA_VISIBLE_DEVICES="$GPU_PD" vllm serve "$MODEL" \ --enforce-eager \ --enable-request-id-headers \ --max-num-seqs 128 \ - --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \ + --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \ --ec-transfer-config '{ "ec_connector": "ECExampleConnector", "ec_role": "ec_consumer", @@ -128,8 +128,8 @@ CUDA_VISIBLE_DEVICES="$GPU_PD" vllm serve "$MODEL" \ PIDS+=($!) # Wait for workers -wait_for_server $ENCODE_PORT -wait_for_server $PREFILL_DECODE_PORT +wait_for_server "$ENCODE_PORT" +wait_for_server "$PREFILL_DECODE_PORT" ############################################################################### # Proxy @@ -144,7 +144,7 @@ python disagg_epd_proxy.py \ PIDS+=($!) -wait_for_server $PROXY_PORT +wait_for_server "$PROXY_PORT" echo "All services are up!" ############################################################################### @@ -152,14 +152,14 @@ echo "All services are up!" ############################################################################### echo "Running benchmark (stream)..." vllm bench serve \ - --model $MODEL \ + --model "$MODEL" \ --backend openai-chat \ --endpoint /v1/chat/completions \ --dataset-name hf \ --dataset-path lmarena-ai/VisionArena-Chat \ --seed 0 \ - --num-prompts $NUM_PROMPTS \ - --port $PROXY_PORT + --num-prompts "$NUM_PROMPTS" \ + --port "$PROXY_PORT" PIDS+=($!) @@ -167,10 +167,10 @@ PIDS+=($!) # Single request with local image ############################################################################### echo "Running single request with local image (non-stream)..." -curl http://127.0.0.1:${PROXY_PORT}/v1/chat/completions \ +curl http://127.0.0.1:"${PROXY_PORT}"/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "'${MODEL}'", + "model": "'"${MODEL}"'", "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": [ diff --git a/examples/online_serving/disaggregated_prefill.sh b/examples/online_serving/disaggregated_prefill.sh index cd2f2e44a..3022711d7 100644 --- a/examples/online_serving/disaggregated_prefill.sh +++ b/examples/online_serving/disaggregated_prefill.sh @@ -54,7 +54,7 @@ wait_for_server() { # You can also adjust --kv-ip and --kv-port for distributed inference. # prefilling instance, which is the KV producer -CUDA_VISIBLE_DEVICES=0 vllm serve $MODEL_NAME \ +CUDA_VISIBLE_DEVICES=0 vllm serve "$MODEL_NAME" \ --host 0.0.0.0 \ --port 8100 \ --max-model-len 100 \ @@ -64,7 +64,7 @@ CUDA_VISIBLE_DEVICES=0 vllm serve $MODEL_NAME \ '{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_rank":0,"kv_parallel_size":2,"kv_buffer_size":"1e9","kv_port":"14579","kv_connector_extra_config":{"proxy_ip":"'"$VLLM_HOST_IP"'","proxy_port":"30001","http_ip":"'"$VLLM_HOST_IP"'","http_port":"8100","send_type":"PUT_ASYNC"}}' & # decoding instance, which is the KV consumer -CUDA_VISIBLE_DEVICES=1 vllm serve $MODEL_NAME \ +CUDA_VISIBLE_DEVICES=1 vllm serve "$MODEL_NAME" \ --host 0.0.0.0 \ --port 8200 \ --max-model-len 100 \ diff --git a/examples/online_serving/disaggregated_serving/kv_events.sh b/examples/online_serving/disaggregated_serving/kv_events.sh index a111db217..533a12cb0 100644 --- a/examples/online_serving/disaggregated_serving/kv_events.sh +++ b/examples/online_serving/disaggregated_serving/kv_events.sh @@ -34,7 +34,7 @@ wait_for_server() { done" && return 0 || return 1 } -vllm serve $MODEL_NAME \ +vllm serve "$MODEL_NAME" \ --port 8100 \ --max-model-len 100 \ --enforce-eager \ diff --git a/examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh b/examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh index e38d377c3..5a3b939a9 100644 --- a/examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh +++ b/examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh @@ -143,7 +143,7 @@ main() { IFS=',' read -ra BOOTSTRAP_PORT_ARRAY <<< "$BOOTSTRAP_PORTS" IFS=',' read -ra DECODE_PORT_ARRAY <<< "$DECODE_PORTS" - proxy_param="" + proxy_args=() # ============================================================================= # Launch Prefill Servers (X Producers) @@ -156,12 +156,12 @@ main() { local bootstrap_port=${BOOTSTRAP_PORT_ARRAY[$i]} echo " Prefill server $((i+1)): GPU $gpu_id, Port $port, Bootstrap Port $bootstrap_port" - VLLM_MOONCAKE_BOOTSTRAP_PORT=$bootstrap_port CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \ - --port $port \ + VLLM_MOONCAKE_BOOTSTRAP_PORT=$bootstrap_port CUDA_VISIBLE_DEVICES=$gpu_id vllm serve "$MODEL" \ + --port "$port" \ --kv-transfer-config \ "{\"kv_connector\":\"MooncakeConnector\",\"kv_role\":\"kv_producer\"}" > prefill$((i+1)).log 2>&1 & PIDS+=($!) - proxy_param="${proxy_param} --prefill http://0.0.0.0:${port} $bootstrap_port" + proxy_args+=(--prefill "http://0.0.0.0:${port}" "$bootstrap_port") done # ============================================================================= @@ -174,12 +174,12 @@ main() { local port=${DECODE_PORT_ARRAY[$i]} echo " Decode server $((i+1)): GPU $gpu_id, Port $port" - CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \ - --port $port \ + CUDA_VISIBLE_DEVICES=$gpu_id vllm serve "$MODEL" \ + --port "$port" \ --kv-transfer-config \ "{\"kv_connector\":\"MooncakeConnector\",\"kv_role\":\"kv_consumer\"}" > decode$((i+1)).log 2>&1 & PIDS+=($!) - proxy_param="${proxy_param} --decode http://0.0.0.0:${port}" + proxy_args+=(--decode "http://0.0.0.0:${port}") done # ============================================================================= @@ -187,7 +187,7 @@ main() { # ============================================================================= echo "" echo "Starting proxy server on port $PROXY_PORT..." - python3 mooncake_connector_proxy.py $proxy_param --port $PROXY_PORT > proxy.log 2>&1 & + python3 mooncake_connector_proxy.py "${proxy_args[@]}" --port "$PROXY_PORT" > proxy.log 2>&1 & PIDS+=($!) # ============================================================================= @@ -196,9 +196,10 @@ main() { echo "" echo "Waiting for all servers to start..." for port in "${PREFILL_PORT_ARRAY[@]}" "${DECODE_PORT_ARRAY[@]}"; do - if ! wait_for_server $port; then + if ! wait_for_server "$port"; then echo "Failed to start server on port $port" cleanup + # shellcheck disable=SC2317 exit 1 fi done @@ -209,8 +210,8 @@ main() { # ============================================================================= # Run Benchmark # ============================================================================= - vllm bench serve --port $PROXY_PORT --seed $(date +%s) \ - --backend vllm --model $MODEL \ + vllm bench serve --port "$PROXY_PORT" --seed "$(date +%s)" \ + --backend vllm --model "$MODEL" \ --dataset-name random --random-input-len 7500 --random-output-len 200 \ --num-prompts 200 --burstiness 100 --request-rate 2 | tee benchmark.log diff --git a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh index 1e7acccb4..603f9eb91 100644 --- a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh +++ b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh @@ -166,10 +166,10 @@ main() { local kv_port=$((21001 + i)) echo " Prefill server $((i+1)): GPU $gpu_id, Port $port, KV Port $kv_port" - CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \ + CUDA_VISIBLE_DEVICES=$gpu_id vllm serve "$MODEL" \ --enforce-eager \ --host 0.0.0.0 \ - --port $port \ + --port "$port" \ --tensor-parallel-size 1 \ --seed 1024 \ --dtype float16 \ @@ -194,10 +194,10 @@ main() { local kv_port=$((22001 + i)) echo " Decode server $((i+1)): GPU $gpu_id, Port $port, KV Port $kv_port" - CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \ + CUDA_VISIBLE_DEVICES=$gpu_id vllm serve "$MODEL" \ --enforce-eager \ --host 0.0.0.0 \ - --port $port \ + --port "$port" \ --tensor-parallel-size 1 \ --seed 1024 \ --dtype float16 \ @@ -217,9 +217,10 @@ main() { echo "" echo "Waiting for all servers to start..." for port in "${PREFILL_PORT_ARRAY[@]}" "${DECODE_PORT_ARRAY[@]}"; do - if ! wait_for_server $port; then + if ! wait_for_server "$port"; then echo "Failed to start server on port $port" cleanup + # shellcheck disable=SC2317 exit 1 fi done @@ -231,8 +232,8 @@ main() { # Run Benchmark # ============================================================================= cd ../../../benchmarks/ - vllm bench serve --port 10001 --seed $(date +%s) \ - --model $MODEL \ + vllm bench serve --port 10001 --seed "$(date +%s)" \ + --model "$MODEL" \ --dataset-name random --random-input-len 7500 --random-output-len 200 \ --num-prompts 200 --burstiness 100 --request-rate 2 | tee benchmark.log diff --git a/examples/online_serving/elastic_ep/bench.sh b/examples/online_serving/elastic_ep/bench.sh index e47631465..4f5dede43 100644 --- a/examples/online_serving/elastic_ep/bench.sh +++ b/examples/online_serving/elastic_ep/bench.sh @@ -50,8 +50,8 @@ while [[ $# -gt 0 ]]; do done vllm bench serve \ - --model $MODEL_NAME \ - --host $HOST \ - --port $PORT \ - --num-prompts $NUM_PROMPTS \ - --request-rate $REQUEST_RATE + --model "$MODEL_NAME" \ + --host "$HOST" \ + --port "$PORT" \ + --num-prompts "$NUM_PROMPTS" \ + --request-rate "$REQUEST_RATE" diff --git a/examples/online_serving/elastic_ep/serve_deepseek_v2.sh b/examples/online_serving/elastic_ep/serve_deepseek_v2.sh index 20bf598c0..b4e922099 100644 --- a/examples/online_serving/elastic_ep/serve_deepseek_v2.sh +++ b/examples/online_serving/elastic_ep/serve_deepseek_v2.sh @@ -57,15 +57,15 @@ echo "Starting vLLM server for $MODEL_NAME with data parallel size: $DATA_PARALL export RAY_DEDUP_LOGS=0 export VLLM_USE_DEEP_GEMM=1 -vllm serve $MODEL_NAME \ - --data-parallel-size $DATA_PARALLEL_SIZE \ - --data-parallel-size-local $DATA_PARALLEL_SIZE \ +vllm serve "$MODEL_NAME" \ + --data-parallel-size "$DATA_PARALLEL_SIZE" \ + --data-parallel-size-local "$DATA_PARALLEL_SIZE" \ --data-parallel-backend ray \ --enforce-eager \ --enable-expert-parallel \ --enable-eplb \ --all2all-backend pplx \ - --num-redundant-experts $REDUNDANT_EXPERTS \ + --num-redundant-experts "$REDUNDANT_EXPERTS" \ --trust-remote-code \ - --host $HOST \ - --port $PORT + --host "$HOST" \ + --port "$PORT" diff --git a/examples/online_serving/multi-node-serving.sh b/examples/online_serving/multi-node-serving.sh index 3fc5502fb..d2823bb8f 100644 --- a/examples/online_serving/multi-node-serving.sh +++ b/examples/online_serving/multi-node-serving.sh @@ -57,8 +57,7 @@ case "$subcommand" in # Retry until the worker node connects to the head node or the timeout expires. for (( i=0; i < $ray_init_timeout; i+=5 )); do - ray start --address=$ray_address:$ray_port --block "${start_params[@]}" - if [ $? -eq 0 ]; then + if ray start --address="$ray_address":"$ray_port" --block "${start_params[@]}"; then echo "Worker: Ray runtime started with head address $ray_address:$ray_port" exit 0 fi @@ -95,12 +94,12 @@ case "$subcommand" in fi # Start the Ray head node. - ray start --head --port=$ray_port "${start_params[@]}" + ray start --head --port="$ray_port" "${start_params[@]}" # Poll Ray until every worker node is active. for (( i=0; i < $ray_init_timeout; i+=5 )); do - active_nodes=`python3 -c 'import ray; ray.init(); print(sum(node["Alive"] for node in ray.nodes()))'` - if [ $active_nodes -eq $ray_cluster_size ]; then + active_nodes=$(python3 -c 'import ray; ray.init(); print(sum(node["Alive"] for node in ray.nodes()))') + if [ "$active_nodes" -eq "$ray_cluster_size" ]; then echo "All ray workers are active and the ray cluster is initialized successfully." exit 0 fi diff --git a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh index a409c49b5..3636d7e99 100644 --- a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh +++ b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh @@ -22,11 +22,10 @@ check_hf_token() { check_num_gpus() { # can you check if the number of GPUs are >=2 via nvidia-smi/rocm-smi? - which rocm-smi > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if ! which rocm-smi > /dev/null 2>&1; then num_gpus=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l) else - num_gpus=$(rocm-smi --showid | grep Instinct | wc -l) + num_gpus=$(rocm-smi --showid | grep -c Instinct) fi if [ "$num_gpus" -lt 2 ]; then @@ -39,8 +38,7 @@ check_num_gpus() { ensure_python_library_installed() { echo "Checking if $1 is installed..." - python3 -c "import $1" > /dev/null 2>&1 - if [ $? -ne 0 ]; then + if ! python3 -c "import $1" > /dev/null 2>&1; then if [ "$1" == "nixl" ]; then echo "$1 is not installed. Please refer to https://github.com/ai-dynamo/nixl for installation." else @@ -102,12 +100,12 @@ main() { bash disagg_vllm_launcher.sh prefiller \ > >(tee prefiller.log) 2>&1 & prefiller_pid=$! - PIDS+=($prefiller_pid) + PIDS+=("$prefiller_pid") bash disagg_vllm_launcher.sh decoder \ > >(tee decoder.log) 2>&1 & decoder_pid=$! - PIDS+=($decoder_pid) + PIDS+=("$decoder_pid") python3 disagg_proxy_server.py \ --host localhost \ @@ -118,7 +116,7 @@ main() { --decoder-port 8200 \ > >(tee proxy.log) 2>&1 & proxy_pid=$! - PIDS+=($proxy_pid) + PIDS+=("$proxy_pid") wait_for_server 8100 wait_for_server 8200 @@ -128,7 +126,7 @@ main() { # begin benchmark cd ../../../../benchmarks/ - vllm bench serve --port 9000 --seed $(date +%s) \ + vllm bench serve --port 9000 --seed "$(date +%s)" \ --model meta-llama/Llama-3.1-8B-Instruct \ --dataset-name random --random-input-len 7500 --random-output-len 200 \ --num-prompts 200 --burstiness 100 --request-rate 3.6 | tee benchmark.log diff --git a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh index 682df45d9..363c35028 100644 --- a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh +++ b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh @@ -34,7 +34,7 @@ if [[ $1 == "prefiller" ]]; then VLLM_ENABLE_V1_MULTIPROCESSING=1 \ VLLM_WORKER_MULTIPROC_METHOD=spawn \ CUDA_VISIBLE_DEVICES=0 \ - vllm serve $MODEL \ + vllm serve "$MODEL" \ --port 8100 \ --enforce-eager \ --kv-transfer-config \ @@ -51,7 +51,7 @@ elif [[ $1 == "decoder" ]]; then VLLM_ENABLE_V1_MULTIPROCESSING=1 \ VLLM_WORKER_MULTIPROC_METHOD=spawn \ CUDA_VISIBLE_DEVICES=1 \ - vllm serve $MODEL \ + vllm serve "$MODEL" \ --port 8200 \ --enforce-eager \ --kv-transfer-config \ diff --git a/examples/pooling/embed/openai_embedding_long_text/service.sh b/examples/pooling/embed/openai_embedding_long_text/service.sh index 0353b8f5a..37a8b625b 100644 --- a/examples/pooling/embed/openai_embedding_long_text/service.sh +++ b/examples/pooling/embed/openai_embedding_long_text/service.sh @@ -103,7 +103,7 @@ vllm serve "$MODEL_NAME" \ --tensor-parallel-size "$GPU_COUNT" \ --enforce-eager \ --pooler-config "$POOLER_CONFIG" \ - --served-model-name ${MODEL_CODE} \ + --served-model-name "${MODEL_CODE}" \ --api-key "$API_KEY" \ --trust-remote-code \ --port "$PORT" \ diff --git a/tests/standalone_tests/python_only_compile.sh b/tests/standalone_tests/python_only_compile.sh index ebf199a50..adfab1139 100644 --- a/tests/standalone_tests/python_only_compile.sh +++ b/tests/standalone_tests/python_only_compile.sh @@ -6,7 +6,7 @@ set -e merge_base_commit=$(git merge-base HEAD origin/main) echo "INFO: current merge base commit with main: $merge_base_commit" -git show --oneline -s $merge_base_commit +git show --oneline -s "$merge_base_commit" # test whether the metadata.json url is valid, retry each 3 minutes up to 5 times # this avoids cumbersome error messages & manual retries in case the precompiled wheel @@ -40,7 +40,7 @@ for i in {1..5}; do fi fi # failure handling & retry logic - if [ $i -eq 5 ]; then + if [ "$i" -eq 5 ]; then echo "ERROR: metadata is still not available after 5 attempts." echo "ERROR: Please check whether the precompiled wheel for commit $merge_base_commit is available." echo " NOTE: If $merge_base_commit is a new commit on main, maybe try again after its release pipeline finishes." diff --git a/tests/v1/ec_connector/integration/run_epd_correctness_test.sh b/tests/v1/ec_connector/integration/run_epd_correctness_test.sh index 0c2666306..ffe9cac38 100644 --- a/tests/v1/ec_connector/integration/run_epd_correctness_test.sh +++ b/tests/v1/ec_connector/integration/run_epd_correctness_test.sh @@ -24,7 +24,7 @@ MODEL="${MODEL:-Qwen/Qwen2.5-VL-3B-Instruct}" # Set 1 to use multimodal prompts; else to use text-only USE_MM_PROMPTS="${USE_MM_PROMPTS:-1}" MM_FLAG="" -if [ $USE_MM_PROMPTS = "1" ]; then +if [ "$USE_MM_PROMPTS" = "1" ]; then MM_FLAG="--use_mm_prompts" fi @@ -51,7 +51,7 @@ LOG_PATH="${LOG_PATH:-/tmp}" BASELINE_FILE="${BASELINE_FILE:-/tmp/vllm_baseline.txt}" BASELINE_PD_FILE="${BASELINE_PD_FILE:-/tmp/vllm_epd_baseline.txt}" -mkdir -p $LOG_PATH +mkdir -p "$LOG_PATH" # Trap the SIGINT signal (triggered by Ctrl+C) trap 'kill $(jobs -pr)' SIGINT SIGTERM EXIT @@ -87,20 +87,20 @@ run_baseline() { # Start baseline instance echo "Starting baseline instance on GPU $GPU_SINGLE, port $PORT" CUDA_VISIBLE_DEVICES="$GPU_SINGLE" vllm serve "$MODEL" \ - --port $PORT \ + --port "$PORT" \ --enforce-eager \ --gpu-memory-utilization 0.7 \ --max-num-seqs 128 \ - --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \ - > $LOG_PATH/baseline.log 2>&1 & + --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \ + > "$LOG_PATH"/baseline.log 2>&1 & local BASELINE_PID=$! # Wait for baseline to start echo "Waiting for baseline instance to start..." - wait_for_server $PORT + wait_for_server "$PORT" - curl http://127.0.0.1:$PORT/v1/models + curl http://127.0.0.1:"$PORT"/v1/models echo "" # Run test in baseline mode @@ -139,14 +139,14 @@ run_epd_1e_1pd() { # Start encoder instance echo "Starting encoder instance on GPU $GPU_E, port $ENCODE_PORT" CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \ - --port $ENCODE_PORT \ + --port "$ENCODE_PORT" \ --enforce-eager \ --gpu-memory-utilization 0.01 \ --enable-request-id-headers \ --no-enable-prefix-caching \ --max-num-batched-tokens 114688 \ --max-num-seqs 128 \ - --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \ + --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \ --ec-transfer-config '{ "ec_connector": "ECExampleConnector", "ec_role": "ec_producer", @@ -154,18 +154,18 @@ run_epd_1e_1pd() { "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'" } }' \ - > $LOG_PATH/1e1pd_encoder.log 2>&1 & + > "$LOG_PATH"/1e1pd_encoder.log 2>&1 & PIDS+=($!) # Start prefill+decode instance echo "Starting PD instance on GPU $GPU_PD, port $PREFILL_DECODE_PORT" CUDA_VISIBLE_DEVICES="$GPU_PD" vllm serve "$MODEL" \ - --port $PREFILL_DECODE_PORT \ + --port "$PREFILL_DECODE_PORT" \ --enforce-eager \ --gpu-memory-utilization 0.7 \ --enable-request-id-headers \ --max-num-seqs 128 \ - --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \ + --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \ --ec-transfer-config '{ "ec_connector": "ECExampleConnector", "ec_role": "ec_consumer", @@ -173,32 +173,32 @@ run_epd_1e_1pd() { "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'" } }' \ - > $LOG_PATH/1e1pd_pd.log 2>&1 & + > "$LOG_PATH"/1e1pd_pd.log 2>&1 & PIDS+=($!) # Wait for instances to start echo "Waiting for encoder instance..." - wait_for_server $ENCODE_PORT + wait_for_server "$ENCODE_PORT" echo "Waiting for PD instance..." - wait_for_server $PREFILL_DECODE_PORT + wait_for_server "$PREFILL_DECODE_PORT" # Start proxy echo "Starting EPD proxy on port $PROXY_PORT" python "${GIT_ROOT}/examples/online_serving/disaggregated_encoder/disagg_epd_proxy.py" \ --host "0.0.0.0" \ - --port $PROXY_PORT \ + --port "$PROXY_PORT" \ --encode-servers-urls "http://localhost:$ENCODE_PORT" \ --prefill-servers-urls "disable" \ --decode-servers-urls "http://localhost:$PREFILL_DECODE_PORT" \ - > $LOG_PATH/1e1pd_proxy.log 2>&1 & + > "$LOG_PATH"/1e1pd_proxy.log 2>&1 & PIDS+=($!) # Wait for proxy echo "Waiting for proxy..." - wait_for_server $PROXY_PORT + wait_for_server "$PROXY_PORT" - curl http://127.0.0.1:$PROXY_PORT/v1/models - curl http://127.0.0.1:$PROXY_PORT/health + curl http://127.0.0.1:"$PROXY_PORT"/v1/models + curl http://127.0.0.1:"$PROXY_PORT"/health echo "" echo "All EPD (1E+1PD) services are up!" @@ -217,7 +217,7 @@ run_epd_1e_1pd() { echo "✓✓ 1E+1PD Correctness Test finished" echo "Stopping EPD (1E+1PD) instances..." for pid in "${PIDS[@]}"; do - kill $pid 2>/dev/null || true + kill "$pid" 2>/dev/null || true done sleep 2 cleanup_instances @@ -244,17 +244,17 @@ run_baseline_1p_1d() { CUDA_VISIBLE_DEVICES="$GPU_P" \ VLLM_NIXL_SIDE_CHANNEL_PORT=5559 \ vllm serve "$MODEL" \ - --port $PREFILL_PORT \ + --port "$PREFILL_PORT" \ --enforce-eager \ --gpu-memory-utilization 0.7 \ --enable-request-id-headers \ --max-num-seqs 128 \ - --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \ + --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \ --kv-transfer-config '{ "kv_connector": "NixlConnector", "kv_role": "kv_producer" }' \ - > $LOG_PATH/1p1d_prefill.log 2>&1 & + > "$LOG_PATH"/1p1d_prefill.log 2>&1 & PIDS+=($!) # Start decode instance @@ -262,40 +262,40 @@ run_baseline_1p_1d() { CUDA_VISIBLE_DEVICES="$GPU_D" \ VLLM_NIXL_SIDE_CHANNEL_PORT=6000 \ vllm serve "$MODEL" \ - --port $DECODE_PORT \ + --port "$DECODE_PORT" \ --enforce-eager \ --gpu-memory-utilization 0.7 \ --enable-request-id-headers \ --max-num-seqs 128 \ - --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \ + --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \ --kv-transfer-config '{ "kv_connector": "NixlConnector", "kv_role": "kv_consumer" }' \ - > $LOG_PATH/1p1d_decode.log 2>&1 & + > "$LOG_PATH"/1p1d_decode.log 2>&1 & PIDS+=($!) # Wait for instances to start echo "Waiting for prefill instance..." - wait_for_server $PREFILL_PORT + wait_for_server "$PREFILL_PORT" echo "Waiting for decode instance..." - wait_for_server $DECODE_PORT + wait_for_server "$DECODE_PORT" # Start proxy echo "Starting EPD proxy on port $PROXY_PORT" python "${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py" \ --host "0.0.0.0" \ - --port $PROXY_PORT \ - --prefiller-ports $PREFILL_PORT \ - --decoder-ports $DECODE_PORT \ - > $LOG_PATH/1p1d_proxy.log 2>&1 & + --port "$PROXY_PORT" \ + --prefiller-ports "$PREFILL_PORT" \ + --decoder-ports "$DECODE_PORT" \ + > "$LOG_PATH"/1p1d_proxy.log 2>&1 & PIDS+=($!) # Wait for proxy echo "Waiting for proxy..." - wait_for_server $PROXY_PORT + wait_for_server "$PROXY_PORT" - curl http://127.0.0.1:$PROXY_PORT/healthcheck + curl http://127.0.0.1:"$PROXY_PORT"/healthcheck echo "" echo "All PD (1P+1D) services are up!" @@ -313,7 +313,7 @@ run_baseline_1p_1d() { # Cleanup echo "Stopping PD (1P+1D) instances..." for pid in "${PIDS[@]}"; do - kill $pid 2>/dev/null || true + kill "$pid" 2>/dev/null || true done sleep 2 cleanup_instances @@ -339,14 +339,14 @@ run_epd_1e_1p_1d() { # Start encoder instance echo "Starting encoder instance on GPU $GPU_E, port $ENCODE_PORT" CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \ - --port $ENCODE_PORT \ + --port "$ENCODE_PORT" \ --enforce-eager \ --gpu-memory-utilization 0.01 \ --enable-request-id-headers \ --no-enable-prefix-caching \ --max-num-batched-tokens 114688 \ --max-num-seqs 128 \ - --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \ + --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \ --ec-transfer-config '{ "ec_connector": "ECExampleConnector", "ec_role": "ec_producer", @@ -354,7 +354,7 @@ run_epd_1e_1p_1d() { "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'" } }' \ - > $LOG_PATH/1e1p1d_encoder.log 2>&1 & + > "$LOG_PATH"/1e1p1d_encoder.log 2>&1 & PIDS+=($!) # Start prefill instance @@ -362,12 +362,12 @@ run_epd_1e_1p_1d() { CUDA_VISIBLE_DEVICES="$GPU_P" \ VLLM_NIXL_SIDE_CHANNEL_PORT=5559 \ vllm serve "$MODEL" \ - --port $PREFILL_PORT \ + --port "$PREFILL_PORT" \ --enforce-eager \ --gpu-memory-utilization 0.7 \ --enable-request-id-headers \ --max-num-seqs 128 \ - --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \ + --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \ --ec-transfer-config '{ "ec_connector": "ECExampleConnector", "ec_role": "ec_consumer", @@ -379,7 +379,7 @@ run_epd_1e_1p_1d() { "kv_connector": "NixlConnector", "kv_role": "kv_producer" }' \ - > $LOG_PATH/1e1p1d_prefill.log 2>&1 & + > "$LOG_PATH"/1e1p1d_prefill.log 2>&1 & PIDS+=($!) # Start decode instance @@ -387,44 +387,44 @@ run_epd_1e_1p_1d() { CUDA_VISIBLE_DEVICES="$GPU_D" \ VLLM_NIXL_SIDE_CHANNEL_PORT=6000 \ vllm serve "$MODEL" \ - --port $DECODE_PORT \ + --port "$DECODE_PORT" \ --enforce-eager \ --gpu-memory-utilization 0.7 \ --enable-request-id-headers \ --max-num-seqs 128 \ - --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \ + --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \ --kv-transfer-config '{ "kv_connector": "NixlConnector", "kv_role": "kv_consumer" }' \ - > $LOG_PATH/1e1p1d_decode.log 2>&1 & + > "$LOG_PATH"/1e1p1d_decode.log 2>&1 & PIDS+=($!) # Wait for instances to start echo "Waiting for encoder instance..." - wait_for_server $ENCODE_PORT + wait_for_server "$ENCODE_PORT" echo "Waiting for prefill instance..." - wait_for_server $PREFILL_PORT + wait_for_server "$PREFILL_PORT" echo "Waiting for decode instance..." - wait_for_server $DECODE_PORT + wait_for_server "$DECODE_PORT" # Start proxy echo "Starting EPD proxy on port $PROXY_PORT" python "${GIT_ROOT}/examples/online_serving/disaggregated_encoder/disagg_epd_proxy.py" \ --host "0.0.0.0" \ - --port $PROXY_PORT \ + --port "$PROXY_PORT" \ --encode-servers-urls "http://localhost:$ENCODE_PORT" \ --prefill-servers-urls "http://localhost:$PREFILL_PORT" \ --decode-servers-urls "http://localhost:$DECODE_PORT" \ - > $LOG_PATH/1e1p1d_proxy.log 2>&1 & + > "$LOG_PATH"/1e1p1d_proxy.log 2>&1 & PIDS+=($!) # Wait for proxy echo "Waiting for proxy..." - wait_for_server $PROXY_PORT + wait_for_server "$PROXY_PORT" - curl http://127.0.0.1:$PROXY_PORT/v1/models - curl http://127.0.0.1:$PROXY_PORT/health + curl http://127.0.0.1:"$PROXY_PORT"/v1/models + curl http://127.0.0.1:"$PROXY_PORT"/health echo "" echo "All EPD (1E+1P+1D) services are up!" @@ -443,7 +443,7 @@ run_epd_1e_1p_1d() { echo "✓✓ 1E+1P+1D Correctness Test finished" echo "Stopping EPD (1E+1P+1D) instances..." for pid in "${PIDS[@]}"; do - kill $pid 2>/dev/null || true + kill "$pid" 2>/dev/null || true done sleep 2 cleanup_instances diff --git a/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh index cdbcdca54..abdf88ad6 100755 --- a/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh +++ b/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh @@ -32,9 +32,14 @@ run_tests() { echo "=== Running tests (${label}) ===" for cfg in "${configs[@]}"; do + local -a cfg_parts extra_args_parts + read -r -a cfg_parts <<< "$cfg" + read -r -a extra_args_parts <<< "$extra_args" + echo "-> Running with ${cfg} ${extra_args:+and ${extra_args}}" # Use 'env' to safely set variables without eval - if ! env ${cfg} bash "${SCRIPT}" ${extra_args}; then + # keep argv splitting safe and SC2086-clean via arrays. + if ! env "${cfg_parts[@]}" bash "${SCRIPT}" "${extra_args_parts[@]}"; then echo "❌ Test failed for config: ${cfg} ${extra_args:+(${extra_args})}" exit 1 fi diff --git a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh index 560ce4407..58ae42126 100755 --- a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh @@ -109,9 +109,9 @@ get_model_args() { get_num_gpus() { if [[ "$SMI_BIN" == *"nvidia"* ]]; then - echo "$($SMI_BIN --query-gpu=name --format=csv,noheader | wc -l)" + $SMI_BIN --query-gpu=name --format=csv,noheader | wc -l elif [[ "$SMI_BIN" == *"rocm"* ]]; then - echo "$($SMI_BIN -l | grep GPU | wc -l)" + $SMI_BIN -l | grep -c GPU else # works for non-cuda platforms, # assuming at least 1 device and @@ -182,7 +182,7 @@ run_tests_for_model() { # Store host and port for proxy configuration PREFILL_HOSTS+=("localhost") - PREFILL_PORTS+=($PORT) + PREFILL_PORTS+=("$PORT") done # Start decode instances @@ -237,30 +237,30 @@ run_tests_for_model() { # Store host and port for proxy configuration DECODE_HOSTS+=("localhost") - DECODE_PORTS+=($PORT) + DECODE_PORTS+=("$PORT") done # Wait for all instances to start for PORT in "${PREFILL_PORTS[@]}"; do echo "Waiting for prefill instance on port $PORT to start..." - wait_for_server $PORT + wait_for_server "$PORT" done for PORT in "${DECODE_PORTS[@]}"; do echo "Waiting for decode instance on port $PORT to start..." - wait_for_server $PORT + wait_for_server "$PORT" done # Build the command for the proxy server with all the hosts and ports PROXY_CMD="python3 ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py --port 8192" # Add all prefill hosts and ports - PROXY_CMD+=" --prefiller-hosts ${PREFILL_HOSTS[@]}" - PROXY_CMD+=" --prefiller-ports ${PREFILL_PORTS[@]}" + PROXY_CMD+=" --prefiller-hosts ${PREFILL_HOSTS[*]}" + PROXY_CMD+=" --prefiller-ports ${PREFILL_PORTS[*]}" # Add all decode hosts and ports - PROXY_CMD+=" --decoder-hosts ${DECODE_HOSTS[@]}" - PROXY_CMD+=" --decoder-ports ${DECODE_PORTS[@]}" + PROXY_CMD+=" --decoder-hosts ${DECODE_HOSTS[*]}" + PROXY_CMD+=" --decoder-ports ${DECODE_PORTS[*]}" # Start the proxy server echo "Starting proxy server with command: $PROXY_CMD" @@ -271,7 +271,7 @@ run_tests_for_model() { # Run lm eval for this model echo "Running tests for $model_name" - TEST_MODEL=$model_name python3 -m pytest -s -x ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/test_accuracy.py + TEST_MODEL=$model_name python3 -m pytest -s -x "${GIT_ROOT}"/tests/v1/kv_connector/nixl_integration/test_accuracy.py # Clean up before running next model cleanup_instances diff --git a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh index c48b452e2..23b2a0b1c 100755 --- a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh @@ -114,10 +114,10 @@ run_tests_for_model() { eval "$FULL_CMD &" # Wait for all instances to start - echo "Waiting for prefill instance on port $PORT to start..." - wait_for_server $PREFILL_PORT - echo "Waiting for decode instance on port $PORT to start..." - wait_for_server $DECODE_PORT + echo "Waiting for prefill instance on port $PREFILL_PORT to start..." + wait_for_server "$PREFILL_PORT" + echo "Waiting for decode instance on port $DECODE_PORT to start..." + wait_for_server "$DECODE_PORT" # Build the command for the proxy server with all the hosts and ports PROXY_PORT=8192 @@ -133,7 +133,7 @@ run_tests_for_model() { # Run lm eval for this model echo "Running tests for $model_name" - PREFILL_PORT=$PREFILL_PORT DECODE_PORT=$DECODE_PORT PROXY_PORT=$PROXY_PORT python -m pytest -s -v ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/test_edge_cases.py + PREFILL_PORT=$PREFILL_PORT DECODE_PORT=$DECODE_PORT PROXY_PORT=$PROXY_PORT python -m pytest -s -v "${GIT_ROOT}"/tests/v1/kv_connector/nixl_integration/test_edge_cases.py # Clean up before running next model cleanup_instances diff --git a/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh index fa1738bb3..407542eb8 100644 --- a/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh @@ -63,8 +63,8 @@ launch_baseline() { --block-size ${BLOCK_SIZE} \ --gpu-memory-utilization 0.5 \ --enforce-eager" - echo ${BASELINE_BASE_CMD} - ssh -tt ${BASELINE_HOST} "${BASELINE_BASE_CMD}" & + echo "${BASELINE_BASE_CMD}" + ssh -tt "${BASELINE_HOST}" "${BASELINE_BASE_CMD}" & } launch_pd() { @@ -103,17 +103,17 @@ launch_pd() { --gpu-memory-utilization 0.5 \ --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"cpu\"}'" - echo ${PREFILL_BASE_CMD} - echo ${DECODE_BASE_CMD} + echo "${PREFILL_BASE_CMD}" + echo "${DECODE_BASE_CMD}" sleep 2 # execute on hosts - ssh -tt ${PREFILL_HOST} "${PREFILL_BASE_CMD}" & - ssh -tt ${DECODE_HOST} "${DECODE_BASE_CMD}" & + ssh -tt "${PREFILL_HOST}" "${PREFILL_BASE_CMD}" & + ssh -tt "${DECODE_HOST}" "${DECODE_BASE_CMD}" & sleep 1 - wait_for_server ${PREFILL_HOST} ${PREFILL_PORT} + wait_for_server "${PREFILL_HOST}" "${PREFILL_PORT}" sleep 1 - wait_for_server ${DECODE_HOST} ${DECODE_PORT} + wait_for_server "${DECODE_HOST}" "${DECODE_PORT}" sleep 1 } @@ -123,21 +123,21 @@ launch_pd_proxy(){ --prefiller-host ${PREFILL_HOST} --prefiller-port ${PREFILL_PORT} \ --decoder-host ${DECODE_HOST} --decoder-port ${DECODE_PORT} \ --host=${PROXY_HOST} --port ${PROXY_PORT}" - echo ${PROXY_BASE_CMD} - ssh -tt ${PROXY_HOST} "${PROXY_BASE_CMD}" & + echo "${PROXY_BASE_CMD}" + ssh -tt "${PROXY_HOST}" "${PROXY_BASE_CMD}" & } run_tests(){ local service_url=$1 local mode=$2 - python3 ${EXP_ROOT}/test_disagg_accuracy.py --service_url=${service_url} --model_name=${MODEL_NAME} --mode=${mode} --file_name=${OUTPUT_FILE} + python3 "${EXP_ROOT}"/test_disagg_accuracy.py --service_url="${service_url}" --model_name="${MODEL_NAME}" --mode="${mode}" --file_name="${OUTPUT_FILE}" } # run non-disagg. baseline & save outputs launch_baseline sleep 2 -wait_for_server ${BASELINE_HOST} ${BASELINE_PORT} +wait_for_server "${BASELINE_HOST}" "${BASELINE_PORT}" run_tests "http://${BASELINE_HOST}:${BASELINE_PORT}" "baseline" cleanup sleep 10 @@ -150,7 +150,7 @@ sleep 10 run_tests "http://${PROXY_HOST}:${PROXY_PORT}" "disagg" echo "-----P/D success----" -rm ${OUTPUT_FILE} +rm "${OUTPUT_FILE}" cleanup exit 0 \ No newline at end of file diff --git a/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh b/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh index 3d6382237..f32ef5e76 100644 --- a/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh @@ -86,17 +86,17 @@ launch_pd() { --gpu-memory-utilization 0.5 \ --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"cpu\"}'" - echo ${PREFILL_BASE_CMD} - echo ${DECODE_BASE_CMD} + echo "${PREFILL_BASE_CMD}" + echo "${DECODE_BASE_CMD}" sleep 2 # execute on hosts - ssh -tt ${PREFILL_HOST} "${PREFILL_BASE_CMD}" & - ssh -tt ${DECODE_HOST} "${DECODE_BASE_CMD}" & + ssh -tt "${PREFILL_HOST}" "${PREFILL_BASE_CMD}" & + ssh -tt "${DECODE_HOST}" "${DECODE_BASE_CMD}" & sleep 1 - wait_for_server ${PREFILL_HOST} ${PREFILL_PORT} + wait_for_server "${PREFILL_HOST}" "${PREFILL_PORT}" sleep 1 - wait_for_server ${DECODE_HOST} ${DECODE_PORT} + wait_for_server "${DECODE_HOST}" "${DECODE_PORT}" sleep 1 } @@ -106,8 +106,8 @@ launch_pd_proxy(){ --prefiller-host ${PREFILL_HOST} --prefiller-port ${PREFILL_PORT} \ --decoder-host ${DECODE_HOST} --decoder-port ${DECODE_PORT} \ --host=${PROXY_HOST} --port ${PROXY_PORT}" - echo ${PROXY_BASE_CMD} - ssh -tt ${PROXY_HOST} "${PROXY_BASE_CMD}" & + echo "${PROXY_BASE_CMD}" + ssh -tt "${PROXY_HOST}" "${PROXY_BASE_CMD}" & } @@ -121,4 +121,4 @@ PREFILL_PORT=${PREFILL_PORT} \ DECODE_HOST=${DECODE_HOST} \ DECODE_PORT=${DECODE_PORT} \ PROXY_HOST=${PROXY_HOST} \ -PROXY_PORT=${PROXY_PORT} python -m pytest -s -v ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/test_edge_cases.py \ No newline at end of file +PROXY_PORT=${PROXY_PORT} python -m pytest -s -v "${GIT_ROOT}"/tests/v1/kv_connector/nixl_integration/test_edge_cases.py diff --git a/tools/ep_kernels/elastic_ep/install_eep_libraries.sh b/tools/ep_kernels/elastic_ep/install_eep_libraries.sh index 9d7dc1032..fe7b86215 100755 --- a/tools/ep_kernels/elastic_ep/install_eep_libraries.sh +++ b/tools/ep_kernels/elastic_ep/install_eep_libraries.sh @@ -23,7 +23,7 @@ while getopts "w:n" opt; do done if [ ! -d "$WORKSPACE" ]; then - mkdir -p $WORKSPACE + mkdir -p "$WORKSPACE" fi @@ -31,7 +31,7 @@ fi pip3 install cmake torch ninja # build nvshmem -pushd $WORKSPACE +pushd "$WORKSPACE" # Reset NVSHMEM build if requested if [ "$INSTALL_NVSHMEM" = true ]; then mkdir -p nvshmem_src @@ -69,15 +69,15 @@ export NVSHMEM_BUILD_HYDRA_LAUNCHER=0 export NVSHMEM_BUILD_TXZ_PACKAGE=0 export NVSHMEM_TIMEOUT_DEVICE_POLLING=0 -cmake -G Ninja -S . -B $WORKSPACE/nvshmem_build/ -DCMAKE_INSTALL_PREFIX=$WORKSPACE/nvshmem_install -cmake --build $WORKSPACE/nvshmem_build/ --target install +cmake -G Ninja -S . -B "$WORKSPACE"/nvshmem_build/ -DCMAKE_INSTALL_PREFIX="$WORKSPACE"/nvshmem_install +cmake --build "$WORKSPACE"/nvshmem_build/ --target install popd export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem_install:$CMAKE_PREFIX_PATH # build and install pplx, require pytorch installed -pushd $WORKSPACE +pushd "$WORKSPACE" git clone https://github.com/ppl-ai/pplx-kernels cd pplx-kernels # see https://github.com/pypa/pip/issues/9955#issuecomment-838065925 diff --git a/tools/ep_kernels/install_python_libraries.sh b/tools/ep_kernels/install_python_libraries.sh index 89da24f95..148cb6e18 100755 --- a/tools/ep_kernels/install_python_libraries.sh +++ b/tools/ep_kernels/install_python_libraries.sh @@ -14,7 +14,7 @@ DEEPEP_COMMIT_HASH=${DEEPEP_COMMIT_HASH:-"73b6ea4"} NVSHMEM_VER=${NVSHMEM_VER:-"3.3.24"} # Default supports both CUDA 12 and 13 WORKSPACE=${WORKSPACE:-$(pwd)/ep_kernels_workspace} MODE=${MODE:-install} -CUDA_VERSION_MAJOR=$(${CUDA_HOME}/bin/nvcc --version | egrep -o "release [0-9]+" | cut -d ' ' -f 2) +CUDA_VERSION_MAJOR=$("${CUDA_HOME}"/bin/nvcc --version | grep -E -o "release [0-9]+" | cut -d ' ' -f 2) # Parse arguments while [[ $# -gt 0 ]]; do diff --git a/tools/flashinfer-build.sh b/tools/flashinfer-build.sh index b3cc6c308..8bb630070 100755 --- a/tools/flashinfer-build.sh +++ b/tools/flashinfer-build.sh @@ -5,8 +5,6 @@ set -ex # FlashInfer configuration FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" -FLASHINFER_GIT_REF="${FLASHINFER_GIT_REF}" -CUDA_VERSION="${CUDA_VERSION}" BUILD_WHEEL="${BUILD_WHEEL:-true}" if [[ -z "${FLASHINFER_GIT_REF}" ]]; then @@ -23,7 +21,7 @@ echo "🏗️ Building FlashInfer ${FLASHINFER_GIT_REF} for CUDA ${CUDA_VERSION # Clone FlashInfer git clone --depth 1 --recursive --shallow-submodules \ - --branch ${FLASHINFER_GIT_REF} \ + --branch "${FLASHINFER_GIT_REF}" \ ${FLASHINFER_GIT_REPO} flashinfer # Set CUDA arch list based on CUDA version @@ -44,7 +42,7 @@ echo "🏗️ Building FlashInfer AOT for arches: ${FI_TORCH_CUDA_ARCH_LIST}" pushd flashinfer # Make sure the wheel is built for the correct CUDA version - export UV_TORCH_BACKEND=cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + export UV_TORCH_BACKEND=cu$(echo "$CUDA_VERSION" | cut -d. -f1,2 | tr -d '.') # Build AOT kernels export TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" @@ -63,4 +61,4 @@ pushd flashinfer popd # Cleanup -rm -rf flashinfer \ No newline at end of file +rm -rf flashinfer diff --git a/tools/install_deepgemm.sh b/tools/install_deepgemm.sh index 1c316ee78..0e1adda97 100755 --- a/tools/install_deepgemm.sh +++ b/tools/install_deepgemm.sh @@ -65,7 +65,7 @@ fi # Extract major and minor version numbers CUDA_MAJOR="${CUDA_VERSION%%.*}" -CUDA_MINOR="${CUDA_VERSION#${CUDA_MAJOR}.}" +CUDA_MINOR="${CUDA_VERSION#"${CUDA_MAJOR}".}" CUDA_MINOR="${CUDA_MINOR%%.*}" echo "CUDA version: $CUDA_VERSION (major: $CUDA_MAJOR, minor: $CUDA_MINOR)" @@ -92,7 +92,7 @@ git checkout "$DEEPGEMM_GIT_REF" # Clean previous build artifacts # (Based on https://github.com/deepseek-ai/DeepGEMM/blob/main/install.sh) -rm -rf build dist *.egg-info +rm -rf -- build dist *.egg-info 2>/dev/null || true # Build wheel echo "🏗️ Building DeepGEMM wheel..." diff --git a/tools/pre_commit/shellcheck.baseline b/tools/pre_commit/shellcheck.baseline deleted file mode 100644 index 7433bb331..000000000 --- a/tools/pre_commit/shellcheck.baseline +++ /dev/null @@ -1,89 +0,0 @@ -benchmarks/auto_tune/auto_tune.sh:SC2034 -benchmarks/auto_tune/auto_tune.sh:SC2086 -benchmarks/auto_tune/batch_auto_tune.sh:SC2086 -benchmarks/run_structured_output_benchmark.sh:SC2028 -benchmarks/run_structured_output_benchmark.sh:SC2034 -benchmarks/run_structured_output_benchmark.sh:SC2086 -.buildkite/image_build/image_build_cpu_arm64.sh:SC2086 -.buildkite/image_build/image_build_cpu.sh:SC2086 -.buildkite/image_build/image_build_hpu.sh:SC2086 -.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh:SC2086 -.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh:SC2034 -.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh:SC2027 -.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh:SC2086 -.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh:SC2126 -.buildkite/scripts/annotate-rocm-release.sh:SC2086 -.buildkite/scripts/cache-rocm-base-wheels.sh:SC2012 -.buildkite/scripts/cherry-pick-from-milestone.sh:SC2064 -.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh:SC2086 -.buildkite/scripts/hardware_ci/run-cpu-test.sh:SC2086 -.buildkite/scripts/hardware_ci/run-hpu-test.sh:SC2086 -.buildkite/scripts/hardware_ci/run-npu-test.sh:SC1090 -.buildkite/scripts/hardware_ci/run-npu-test.sh:SC2006 -.buildkite/scripts/hardware_ci/run-npu-test.sh:SC2086 -.buildkite/scripts/hardware_ci/run-npu-test.sh:SC2181 -.buildkite/scripts/hardware_ci/run-xpu-test.sh:SC2086 -.buildkite/scripts/push-nightly-builds.sh:SC2086 -.buildkite/scripts/run-multi-node-test.sh:SC2086 -.buildkite/scripts/run-multi-node-test.sh:SC2089 -.buildkite/scripts/run-multi-node-test.sh:SC2090 -.buildkite/scripts/run-prime-rl-test.sh:SC2086 -.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh:SC2086 -.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh:SC2086 -.buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh:SC2086 -.buildkite/scripts/tpu/docker_run_bm.sh:SC1090 -.buildkite/scripts/tpu/docker_run_bm.sh:SC2086 -.buildkite/scripts/tpu/run_bm.sh:SC2034 -.buildkite/scripts/tpu/run_bm.sh:SC2086 -.buildkite/scripts/upload-nightly-wheels.sh:SC2086 -.buildkite/scripts/upload-nightly-wheels.sh:SC2115 -.buildkite/scripts/upload-nightly-wheels.sh:SC2236 -.buildkite/scripts/upload-release-wheels-pypi.sh:SC2086 -.buildkite/scripts/upload-rocm-wheels.sh:SC2012 -examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh:SC2086 -examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh:SC2086 -examples/online_serving/disaggregated_prefill.sh:SC2086 -examples/online_serving/disaggregated_serving/kv_events.sh:SC2086 -examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh:SC2046 -examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh:SC2086 -examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh:SC2317 -examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh:SC2046 -examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh:SC2086 -examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh:SC2317 -examples/online_serving/elastic_ep/bench.sh:SC2086 -examples/online_serving/elastic_ep/serve_deepseek_v2.sh:SC2086 -examples/online_serving/multi-node-serving.sh:SC2006 -examples/online_serving/multi-node-serving.sh:SC2086 -examples/online_serving/multi-node-serving.sh:SC2181 -examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh:SC2046 -examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh:SC2126 -examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh:SC2181 -examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh:SC2206 -examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh:SC2086 -examples/pooling/embed/openai_embedding_long_text/service.sh:SC2086 -tests/standalone_tests/python_only_compile.sh:SC2086 -tests/v1/ec_connector/integration/run_epd_correctness_test.sh:SC2086 -tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh:SC2086 -tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh:SC2005 -tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh:SC2086 -tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh:SC2124 -tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh:SC2126 -tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh:SC2206 -tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh:SC2086 -tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh:SC2153 -tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh:SC2086 -tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh:SC2089 -tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh:SC2090 -tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh:SC2086 -tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh:SC2089 -tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh:SC2090 -tools/ep_kernels/elastic_ep/install_eep_libraries.sh:SC2086 -tools/ep_kernels/install_python_libraries.sh:SC2086 -tools/ep_kernels/install_python_libraries.sh:SC2196 -tools/flashinfer-build.sh:SC2086 -tools/flashinfer-build.sh:SC2269 -tools/install_deepgemm.sh:SC2035 -tools/install_deepgemm.sh:SC2295 -tools/pre_commit/shellcheck.sh:SC2016 -tools/vllm-rocm/generate-rocm-wheels-root-index.sh:SC2295 -tools/vllm-tpu/build.sh:SC2145 diff --git a/tools/pre_commit/shellcheck.sh b/tools/pre_commit/shellcheck.sh index 4adee5d57..557f41f29 100755 --- a/tools/pre_commit/shellcheck.sh +++ b/tools/pre_commit/shellcheck.sh @@ -2,7 +2,6 @@ set -euo pipefail scversion="stable" -baseline="tools/pre_commit/shellcheck.baseline" if [ -d "shellcheck-${scversion}" ]; then export PATH="$PATH:$(pwd)/shellcheck-${scversion}" @@ -20,38 +19,6 @@ if ! [ -x "$(command -v shellcheck)" ]; then fi # TODO - fix warnings in .buildkite/scripts/hardware_ci/run-amd-test.sh -# collects warnings as "file:SCcode" pairs for baseline comparison. -collect() { - find . -path ./.git -prune -o -name "*.sh" \ - -not -path "./.buildkite/scripts/hardware_ci/run-amd-test.sh" -print0 | \ - xargs -0 sh -c 'for f in "$@"; do git check-ignore -q "$f" || shellcheck -s bash -f gcc "$f" || true; done' -- | \ - sed -nE 's|^\./||; s|^([^:]+):[0-9]+:[0-9]+:.*\[(SC[0-9]+)\]$|\1:\2|p' | \ - sort -u -} - -if [[ "${1:-}" == "--generate-baseline" ]]; then - collect > "$baseline" - echo "Wrote baseline to $baseline" - exit 0 -fi - -if [[ ! -f "$baseline" ]]; then - echo "Baseline not found: $baseline (run: $0 --generate-baseline)" - exit 1 -fi - -current="$(mktemp)" -trap 'rm -f "$current"' EXIT -collect > "$current" - -# finds new warnings not in baseline -new_errors="$(comm -23 "$current" <(sort -u "$baseline") || true)" -if [ -n "$new_errors" ]; then - echo "$new_errors" | cut -d: -f1 | sort -u | while IFS= read -r file; do - if [[ -f "$file" ]]; then - codes=$(echo "$new_errors" | awk -F: -v f="$file" '$1==f {print $2}' | paste -sd ',' -) - shellcheck -s bash --include="$codes" "$file" 2>&1 || true - fi - done - exit 1 -fi +find . -path ./.git -prune -o -name "*.sh" \ + -not -path "./.buildkite/scripts/hardware_ci/run-amd-test.sh" -print0 | \ + xargs -0 sh -c "for f in \"\$@\"; do git check-ignore -q \"\$f\" || shellcheck -s bash \"\$f\"; done" -- diff --git a/tools/vllm-rocm/generate-rocm-wheels-root-index.sh b/tools/vllm-rocm/generate-rocm-wheels-root-index.sh index 02b4fbdd0..87b5c3228 100755 --- a/tools/vllm-rocm/generate-rocm-wheels-root-index.sh +++ b/tools/vllm-rocm/generate-rocm-wheels-root-index.sh @@ -190,7 +190,7 @@ echo "" # List what would be uploaded echo "Files to upload:" find "$WORK_DIR/output" -name "*.html" -type f | while read -r file; do - rel_path="${file#$WORK_DIR/output/}" + rel_path="${file#"$WORK_DIR"/output/}" echo " rocm/$rel_path" done echo "" diff --git a/tools/vllm-tpu/build.sh b/tools/vllm-tpu/build.sh index 45ef8dfcb..aa46a5298 100755 --- a/tools/vllm-tpu/build.sh +++ b/tools/vllm-tpu/build.sh @@ -38,7 +38,7 @@ if ! grep -q "name = \"vllm-tpu\"" "$PYPROJECT_FILE"; then cp "$PYPROJECT_FILE" "${PYPROJECT_FILE}.bak" sed -i '0,/^name = "vllm"/s//name = "vllm-tpu"/' "$PYPROJECT_FILE" - echo "Patching ${CHANGE_FILE_LIST[@]} vllm to vllm-tpu..." + echo "Patching ${CHANGE_FILE_LIST[*]} vllm to vllm-tpu..." # patching # importlib.metadata.version('vllm') -> importlib.metadata.version('vllm-tpu') # importlib.metadata.version("vllm") -> importlib.metadata.version("vllm-tpu")