diff --git a/.buildkite/image_build/image_build_cpu.sh b/.buildkite/image_build/image_build_cpu.sh
index a69732f43..2d5e49ecd 100755
--- a/.buildkite/image_build/image_build_cpu.sh
+++ b/.buildkite/image_build/image_build_cpu.sh
@@ -11,10 +11,10 @@ REPO=$2
 BUILDKITE_COMMIT=$3
 
 # authenticate with AWS ECR
-aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $REGISTRY
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
 
 # skip build if image already exists
-if [[ -z $(docker manifest inspect $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu) ]]; then
+if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu) ]]; then
   echo "Image not found, proceeding with build..."
 else
   echo "Image found"
@@ -24,13 +24,13 @@ fi
 # build
 docker build --file docker/Dockerfile.cpu \
   --build-arg max_jobs=16 \
-  --build-arg buildkite_commit=$BUILDKITE_COMMIT \
+  --build-arg buildkite_commit="$BUILDKITE_COMMIT" \
   --build-arg VLLM_CPU_AVX512BF16=true \
   --build-arg VLLM_CPU_AVX512VNNI=true \
   --build-arg VLLM_CPU_AMXBF16=true \
-  --tag $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu \
+  --tag "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu \
   --target vllm-test \
   --progress plain .
 
 # push
-docker push $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu
+docker push "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu
diff --git a/.buildkite/image_build/image_build_cpu_arm64.sh b/.buildkite/image_build/image_build_cpu_arm64.sh
index 615298b65..3f25fbaec 100755
--- a/.buildkite/image_build/image_build_cpu_arm64.sh
+++ b/.buildkite/image_build/image_build_cpu_arm64.sh
@@ -11,10 +11,10 @@ REPO=$2
 BUILDKITE_COMMIT=$3
 
 # authenticate with AWS ECR
-aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $REGISTRY
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
 
 # skip build if image already exists
-if [[ -z $(docker manifest inspect $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu) ]]; then
+if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu) ]]; then
   echo "Image not found, proceeding with build..."
 else
   echo "Image found"
@@ -24,10 +24,10 @@ fi
 # build
 docker build --file docker/Dockerfile.cpu \
   --build-arg max_jobs=16 \
-  --build-arg buildkite_commit=$BUILDKITE_COMMIT \
-  --tag $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu \
+  --build-arg buildkite_commit="$BUILDKITE_COMMIT" \
+  --tag "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu \
   --target vllm-test \
   --progress plain .
 
 # push
-docker push $REGISTRY/$REPO:$BUILDKITE_COMMIT-cpu
+docker push "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu
diff --git a/.buildkite/image_build/image_build_hpu.sh b/.buildkite/image_build/image_build_hpu.sh
index 192447ef4..60fa1789f 100755
--- a/.buildkite/image_build/image_build_hpu.sh
+++ b/.buildkite/image_build/image_build_hpu.sh
@@ -11,10 +11,10 @@ REPO=$2
 BUILDKITE_COMMIT=$3
 
 # authenticate with AWS ECR
-aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin $REGISTRY
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
 
 # skip build if image already exists
-if [[ -z $(docker manifest inspect $REGISTRY/$REPO:$BUILDKITE_COMMIT-hpu) ]]; then
+if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-hpu) ]]; then
   echo "Image not found, proceeding with build..."
 else
   echo "Image found"
@@ -25,10 +25,10 @@ fi
 docker build \
   --file tests/pytorch_ci_hud_benchmark/Dockerfile.hpu \
   --build-arg max_jobs=16 \
-  --build-arg buildkite_commit=$BUILDKITE_COMMIT \
-  --tag $REGISTRY/$REPO:$BUILDKITE_COMMIT-hpu \
+  --build-arg buildkite_commit="$BUILDKITE_COMMIT" \
+  --tag "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-hpu \
   --progress plain \
   https://github.com/vllm-project/vllm-gaudi.git
 
 # push
-docker push $REGISTRY/$REPO:$BUILDKITE_COMMIT-hpu
+docker push "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-hpu
diff --git a/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh
index 02371f3dd..518af9a66 100755
--- a/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh
+++ b/.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh
@@ -41,4 +41,4 @@ lm_eval --model vllm-vlm \
   --tasks chartqa \
   --batch_size auto \
   --apply_chat_template \
-  --limit $LIMIT
+  --limit "$LIMIT"
diff --git a/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh b/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh
index c5128cea6..e3c6e16bd 100644
--- a/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh
+++ b/.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh
@@ -20,14 +20,11 @@ usage() {
     echo
 }
 
-while getopts "m:b:l:f:t:" OPT; do
+while getopts "m:l:f:t:" OPT; do
   case ${OPT} in
     m )
         MODEL="$OPTARG"
         ;;
-    b )
-        BATCH_SIZE="$OPTARG"
-        ;;
     l )
         LIMIT="$OPTARG"
         ;;
diff --git a/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh b/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh
index 7dabcf517..2ad599ff1 100755
--- a/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh
+++ b/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh
@@ -15,11 +15,11 @@ DTYPE_FILTER="${DTYPE_FILTER:-}"
 check_gpus() {
   if command -v nvidia-smi; then
     # check the number of GPUs and GPU type.
-    declare -g gpu_count=$(nvidia-smi --list-gpus | wc -l)
+    declare -g gpu_count=$(nvidia-smi --list-gpus | grep -c . || true)
   elif command -v amd-smi; then
-    declare -g gpu_count=$(amd-smi list | grep 'GPU' | wc -l)
+    declare -g gpu_count=$(amd-smi list | grep -c 'GPU' || true)
   elif command -v hl-smi; then
-    declare -g gpu_count=$(hl-smi --list | grep -i "Module ID" | wc -l)
+    declare -g gpu_count=$(hl-smi --list | grep -ci "Module ID" || true)
   fi
 
   if [[ $gpu_count -gt 0 ]]; then
@@ -47,7 +47,7 @@ check_cpus() {
   declare -g numa_count=$(lscpu | grep "NUMA node(s):" | awk '{print $3}')
   if [[ $numa_count -gt 0 ]]; then
     echo "NUMA found."
-    echo $numa_count
+    echo "$numa_count"
   else
     echo "Need at least 1 NUMA to run benchmarking."
     exit 1
@@ -434,7 +434,7 @@ run_serving_tests() {
 
       # iterate over different max_concurrency
       for max_concurrency in $max_concurrency_list; do
-        new_test_name=$test_name"_qps_"$qps"_concurrency_"$max_concurrency
+        new_test_name="${test_name}_qps_${qps}_concurrency_${max_concurrency}"
         echo " new test name $new_test_name"
         # pass the tensor parallel size, the compilation mode, and the optimization
         # level to the client so that they can be used on the benchmark dashboard
@@ -471,7 +471,7 @@ run_serving_tests() {
 
     # clean up
     if [[ "${DRY_RUN:-0}" != "1" ]]; then
-      kill -9 $server_pid
+      kill -9 "$server_pid"
       kill_gpu_processes
     fi
   done
diff --git a/.buildkite/scripts/annotate-rocm-release.sh b/.buildkite/scripts/annotate-rocm-release.sh
index 8e7dbfb9e..0a817890c 100755
--- a/.buildkite/scripts/annotate-rocm-release.sh
+++ b/.buildkite/scripts/annotate-rocm-release.sh
@@ -25,7 +25,7 @@ S3_REGION="${AWS_DEFAULT_REGION:-us-west-2}"
 S3_URL="http://${S3_BUCKET}.s3-website-${S3_REGION}.amazonaws.com"
 
 # Format ROCm version for path (e.g., "7.1" -> "rocm710")
-ROCM_VERSION_PATH="rocm$(echo ${ROCM_VERSION} | tr -d '.')"
+ROCM_VERSION_PATH="rocm$(echo "${ROCM_VERSION}" | tr -d '.')"
 ROCM_PATH="rocm/${BUILDKITE_COMMIT}/${ROCM_VERSION_PATH}"
 buildkite-agent annotate --style 'success' --context 'rocm-release-workflow' << EOF
 ## ROCm Wheel and Docker Image Releases
diff --git a/.buildkite/scripts/cache-rocm-base-wheels.sh b/.buildkite/scripts/cache-rocm-base-wheels.sh
index be2447250..060d09db4 100755
--- a/.buildkite/scripts/cache-rocm-base-wheels.sh
+++ b/.buildkite/scripts/cache-rocm-base-wheels.sh
@@ -83,7 +83,7 @@ case "${1:-}" in
             exit 1
         fi
 
-        WHEEL_COUNT=$(ls artifacts/rocm-base-wheels/*.whl 2>/dev/null | wc -l)
+        WHEEL_COUNT=$(find artifacts/rocm-base-wheels -maxdepth 1 -name '*.whl' 2>/dev/null | wc -l)
         if [[ "$WHEEL_COUNT" -eq 0 ]]; then
             echo "ERROR: No wheels found in artifacts/rocm-base-wheels/" >&2
             exit 1
@@ -110,9 +110,9 @@ case "${1:-}" in
 
         echo ""
         echo "Downloaded wheels:"
-        ls -lh artifacts/rocm-base-wheels/
+        find artifacts/rocm-base-wheels -maxdepth 1 -name '*.whl' -exec ls -lh {} \;
 
-        WHEEL_COUNT=$(ls artifacts/rocm-base-wheels/*.whl 2>/dev/null | wc -l)
+        WHEEL_COUNT=$(find artifacts/rocm-base-wheels -maxdepth 1 -name '*.whl' 2>/dev/null | wc -l)
         echo ""
         echo "Total: $WHEEL_COUNT wheels"
         echo "========================================"
diff --git a/.buildkite/scripts/cherry-pick-from-milestone.sh b/.buildkite/scripts/cherry-pick-from-milestone.sh
index 99eb36acd..67f30930b 100755
--- a/.buildkite/scripts/cherry-pick-from-milestone.sh
+++ b/.buildkite/scripts/cherry-pick-from-milestone.sh
@@ -134,7 +134,7 @@ log_info "Fetching merged PRs from milestone '${MILESTONE}'..."
 
 # Store PR data in a temp file
 PR_DATA=$(mktemp)
-trap "rm -f $PR_DATA" EXIT
+trap 'rm -f "$PR_DATA"' EXIT
 
 if ! gh pr list --state merged --search "milestone:${MILESTONE}" \
     --limit 1000 \
diff --git a/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh b/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh
index 3728f73fa..75ae2765e 100755
--- a/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh
+++ b/.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh
@@ -27,7 +27,7 @@ function cpu_tests() {
   podman exec -it "$container_id" bash -c "
     export TORCH_COMPILE_DISABLE=1
     set -xve
-    python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m" >> $HOME/test_basic.log
+    python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m" >> "$HOME"/test_basic.log
 
   # Run basic model test
   podman exec -it "$container_id" bash -c "
@@ -43,7 +43,7 @@ function cpu_tests() {
     pytest -v -s tests/models/language/generation/test_common.py::test_models[False-False-5-32-google/gemma-1.1-2b-it]
     pytest -v -s tests/models/language/pooling/test_classification.py::test_models[float-jason9693/Qwen2.5-1.5B-apeach]
     # TODO: Below test case tests/models/language/pooling/test_embedding.py::test_models[True-ssmits/Qwen2-7B-Instruct-embed-base] fails on ppc64le. Disabling it for time being.
-    # pytest -v -s tests/models/language/pooling/test_embedding.py -m cpu_model" >> $HOME/test_rest.log
+    # pytest -v -s tests/models/language/pooling/test_embedding.py -m cpu_model" >> "$HOME"/test_rest.log
 }
 
 # All of CPU tests are expected to be finished less than 40 mins.
diff --git a/.buildkite/scripts/hardware_ci/run-cpu-test.sh b/.buildkite/scripts/hardware_ci/run-cpu-test.sh
index c32b051ca..db75ad308 100644
--- a/.buildkite/scripts/hardware_ci/run-cpu-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-cpu-test.sh
@@ -16,5 +16,5 @@ echo "--- :docker: Building Docker image"
 docker build --progress plain --tag "$IMAGE_NAME" --target vllm-test -f docker/Dockerfile.cpu .
 
 # Run the image, setting --shm-size=4g for tensor parallel.
-docker run --rm --cpuset-cpus=$CORE_RANGE --cpuset-mems=$NUMA_NODE -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 --shm-size=4g $IMAGE_NAME \
-        timeout $TIMEOUT_VAL bash -c "set -euox pipefail; echo \"--- Print packages\"; pip list; echo \"--- Running tests\"; ${TEST_COMMAND}"
+docker run --rm --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 --shm-size=4g "$IMAGE_NAME" \
+        timeout "$TIMEOUT_VAL" bash -c "set -euox pipefail; echo \"--- Print packages\"; pip list; echo \"--- Running tests\"; ${TEST_COMMAND}"
diff --git a/.buildkite/scripts/hardware_ci/run-hpu-test.sh b/.buildkite/scripts/hardware_ci/run-hpu-test.sh
index 7df696eb2..c6a556e21 100644
--- a/.buildkite/scripts/hardware_ci/run-hpu-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-hpu-test.sh
@@ -7,7 +7,7 @@ set -exuo pipefail
 # Try building the docker image
 image_name="hpu/upstream-vllm-ci:${BUILDKITE_COMMIT}"
 container_name="hpu-upstream-vllm-ci-${BUILDKITE_COMMIT}-container"
-cat <<EOF | docker build -t ${image_name} -f - .
+cat <<EOF | docker build -t "${image_name}" -f - .
 FROM gaudi-base-image:latest
 
 COPY ./ /workspace/vllm
@@ -39,12 +39,12 @@ EOF
 # functions, while other platforms only need one remove_docker_container
 # function.
 EXITCODE=1
-remove_docker_containers() { docker rm -f ${container_name} || true; }
+remove_docker_containers() { docker rm -f "${container_name}" || true; }
 trap 'remove_docker_containers; exit $EXITCODE;' EXIT
 remove_docker_containers
 
 echo "Running HPU plugin v1 test"
-docker run --rm --runtime=habana --name=${container_name} --network=host \
+docker run --rm --runtime=habana --name="${container_name}" --network=host \
   -e HABANA_VISIBLE_DEVICES=all \
   -e VLLM_SKIP_WARMUP=true \
   -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true \
diff --git a/.buildkite/scripts/hardware_ci/run-npu-test.sh b/.buildkite/scripts/hardware_ci/run-npu-test.sh
index 0db1abe37..9d33a8c0b 100644
--- a/.buildkite/scripts/hardware_ci/run-npu-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-npu-test.sh
@@ -41,6 +41,7 @@ get_config() {
         echo "Error: file '${TEST_RUN_CONFIG_FILE}' does not exist in the warehouse" >&2
         exit 1
     fi
+    # shellcheck source=/dev/null
     source "${TEST_RUN_CONFIG_FILE}"
     echo "Base docker image name that get from configuration: ${BASE_IMAGE_NAME}"
     return 0
@@ -48,9 +49,8 @@ get_config() {
 
 # get test running configuration.
 fetch_vllm_test_cfg
-get_config
 # Check if the function call was successful. If not, exit the script.
-if [ $? -ne 0 ]; then
+if ! get_config; then
   exit 1
 fi
 
@@ -62,14 +62,14 @@ agent_idx=$(echo "${BUILDKITE_AGENT_NAME}" | awk -F'-' '{print $(NF-1)}')
 echo "agent_idx: ${agent_idx}"
 builder_name="cachebuilder${agent_idx}"
 builder_cache_dir="/mnt/docker-cache${agent_idx}"
-mkdir -p ${builder_cache_dir}
+mkdir -p "${builder_cache_dir}"
 
 # Try building the docker image
 cat <<EOF | DOCKER_BUILDKIT=1 docker build \
-    --add-host cache-service-vllm.nginx-pypi-cache.svc.cluster.local:${PYPI_CACHE_HOST} \
-    --builder ${builder_name} --cache-from type=local,src=${builder_cache_dir} \
-                           --cache-to type=local,dest=${builder_cache_dir},mode=max \
-    --progress=plain --load -t ${image_name} -f - .
+    --add-host cache-service-vllm.nginx-pypi-cache.svc.cluster.local:"${PYPI_CACHE_HOST}" \
+    --builder "${builder_name}" --cache-from type=local,src="${builder_cache_dir}" \
+                           --cache-to type=local,dest="${builder_cache_dir}",mode=max \
+    --progress=plain --load -t "${image_name}" -f - .
 FROM ${BASE_IMAGE_NAME}
 
 # Define environments
@@ -116,7 +116,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
     export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
     source /usr/local/Ascend/nnal/atb/set_env.sh && \
-    export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
+    export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/$(uname -i)-linux/devlib && \
     python3 -m pip install -v -e /workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/
 
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
@@ -139,7 +139,7 @@ trap remove_docker_container EXIT
 # Generate corresponding --device args based on BUILDKITE_AGENT_NAME
 # Ascend NPU BUILDKITE_AGENT_NAME format is {hostname}-{agent_idx}-{npu_card_num}cards, and agent_idx starts from 1.
 #   e.g. atlas-a2-001-1-2cards means this is the 1-th agent on atlas-a2-001 host, and it has 2 NPU cards.
-#   returns --device /dev/davinci0 --device /dev/davinci1
+#   returns one argument per line: --device, /dev/davinciX, ...
 parse_and_gen_devices() {
     local input="$1"
     local index cards_num
@@ -151,29 +151,24 @@ parse_and_gen_devices() {
         return 1
     fi
 
-    local devices=""
     local i=0
     while (( i < cards_num )); do
         local dev_idx=$(((index - 1)*cards_num + i ))
-        devices="$devices --device /dev/davinci${dev_idx}"
+        printf '%s\n' "--device"
+        printf '%s\n' "/dev/davinci${dev_idx}"
         ((i++))
     done
-
-    # trim leading space
-    devices="${devices#"${devices%%[![:space:]]*}"}"
-    # Output devices: assigned to the caller variable
-    printf '%s' "$devices"
 }
 
-devices=$(parse_and_gen_devices "${BUILDKITE_AGENT_NAME}") || exit 1
+mapfile -t device_args < <(parse_and_gen_devices "${BUILDKITE_AGENT_NAME}") || exit 1
 
 # Run the image and execute the Out-Of-Tree (OOT) platform interface test case on Ascend NPU hardware.
 # This test checks whether the OOT platform interface is functioning properly in conjunction with
 # the hardware plugin vllm-ascend.
 model_cache_dir=/mnt/modelscope${agent_idx}
-mkdir -p ${model_cache_dir}
+mkdir -p "${model_cache_dir}"
 docker run \
-    ${devices} \
+    "${device_args[@]}" \
     --device /dev/davinci_manager \
     --device /dev/devmm_svm \
     --device /dev/hisi_hdc \
@@ -182,7 +177,7 @@ docker run \
     -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ \
     -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \
     -v /etc/ascend_install.info:/etc/ascend_install.info \
-    -v ${model_cache_dir}:/root/.cache/modelscope \
+    -v "${model_cache_dir}":/root/.cache/modelscope \
     --entrypoint="" \
     --name "${container_name}" \
     "${image_name}" \
diff --git a/.buildkite/scripts/hardware_ci/run-xpu-test.sh b/.buildkite/scripts/hardware_ci/run-xpu-test.sh
index b52dd7826..2daf1534b 100644
--- a/.buildkite/scripts/hardware_ci/run-xpu-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-xpu-test.sh
@@ -8,7 +8,7 @@ image_name="xpu/vllm-ci:${BUILDKITE_COMMIT}"
 container_name="xpu_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
 
 # Try building the docker image
-docker build -t ${image_name} -f docker/Dockerfile.xpu .
+docker build -t "${image_name}" -f docker/Dockerfile.xpu .
 
 # Setup cleanup
 remove_docker_container() {
diff --git a/.buildkite/scripts/push-nightly-builds.sh b/.buildkite/scripts/push-nightly-builds.sh
index 98e80fd99..20c372a95 100755
--- a/.buildkite/scripts/push-nightly-builds.sh
+++ b/.buildkite/scripts/push-nightly-builds.sh
@@ -21,16 +21,16 @@ echo "Pushing original tag $ORIG_TAG_NAME$ORIG_TAG_SUFFIX to new nightly tag nam
 
 # pull original arch-dependent images from AWS ECR Public
 aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7
-docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$ORIG_TAG_NAME-x86_64$ORIG_TAG_SUFFIX
-docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$ORIG_TAG_NAME-aarch64$ORIG_TAG_SUFFIX
+docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:"$ORIG_TAG_NAME"-x86_64"$ORIG_TAG_SUFFIX"
+docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:"$ORIG_TAG_NAME"-aarch64"$ORIG_TAG_SUFFIX"
 # tag arch-dependent images
-docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$ORIG_TAG_NAME-x86_64$ORIG_TAG_SUFFIX vllm/vllm-openai:$TAG_NAME-x86_64
-docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$ORIG_TAG_NAME-aarch64$ORIG_TAG_SUFFIX vllm/vllm-openai:$TAG_NAME-aarch64
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:"$ORIG_TAG_NAME"-x86_64"$ORIG_TAG_SUFFIX" vllm/vllm-openai:"$TAG_NAME"-x86_64
+docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:"$ORIG_TAG_NAME"-aarch64"$ORIG_TAG_SUFFIX" vllm/vllm-openai:"$TAG_NAME"-aarch64
 # push arch-dependent images to DockerHub
-docker push vllm/vllm-openai:$TAG_NAME-x86_64
-docker push vllm/vllm-openai:$TAG_NAME-aarch64
+docker push vllm/vllm-openai:"$TAG_NAME"-x86_64
+docker push vllm/vllm-openai:"$TAG_NAME"-aarch64
 # push arch-independent manifest to DockerHub
-docker manifest create vllm/vllm-openai:$TAG_NAME vllm/vllm-openai:$TAG_NAME-x86_64 vllm/vllm-openai:$TAG_NAME-aarch64 --amend
-docker manifest create vllm/vllm-openai:$TAG_NAME-$BUILDKITE_COMMIT vllm/vllm-openai:$TAG_NAME-x86_64 vllm/vllm-openai:$TAG_NAME-aarch64 --amend
-docker manifest push vllm/vllm-openai:$TAG_NAME
-docker manifest push vllm/vllm-openai:$TAG_NAME-$BUILDKITE_COMMIT
+docker manifest create vllm/vllm-openai:"$TAG_NAME" vllm/vllm-openai:"$TAG_NAME"-x86_64 vllm/vllm-openai:"$TAG_NAME"-aarch64 --amend
+docker manifest create vllm/vllm-openai:"$TAG_NAME"-"$BUILDKITE_COMMIT" vllm/vllm-openai:"$TAG_NAME"-x86_64 vllm/vllm-openai:"$TAG_NAME"-aarch64 --amend
+docker manifest push vllm/vllm-openai:"$TAG_NAME"
+docker manifest push vllm/vllm-openai:"$TAG_NAME"-"$BUILDKITE_COMMIT"
diff --git a/.buildkite/scripts/run-multi-node-test.sh b/.buildkite/scripts/run-multi-node-test.sh
index c0911f17b..c305b2e1b 100755
--- a/.buildkite/scripts/run-multi-node-test.sh
+++ b/.buildkite/scripts/run-multi-node-test.sh
@@ -67,7 +67,7 @@ start_nodes() {
         # 3. map the huggingface cache directory to the container
         # 3. assign ip addresses to the containers (head node: 192.168.10.10, worker nodes:
         #    starting from 192.168.10.11)
-        docker run -d $GPU_DEVICES --shm-size=10.24gb -e HF_TOKEN \
+        docker run -d "$GPU_DEVICES" --shm-size=10.24gb -e HF_TOKEN \
             -v ~/.cache/huggingface:/root/.cache/huggingface --name "node$node" \
             --network docker-net --ip 192.168.10.$((10 + $node)) --rm "$DOCKER_IMAGE" \
             /bin/bash -c "tail -f /dev/null"
diff --git a/.buildkite/scripts/run-prime-rl-test.sh b/.buildkite/scripts/run-prime-rl-test.sh
index 3fb7c82c8..a3f2bf8bf 100755
--- a/.buildkite/scripts/run-prime-rl-test.sh
+++ b/.buildkite/scripts/run-prime-rl-test.sh
@@ -29,7 +29,7 @@ fi
 if ! command -v uv &> /dev/null; then
     echo "Installing UV package manager..."
     curl -LsSf https://astral.sh/uv/install.sh | sh
-    source $HOME/.local/bin/env
+    source "$HOME"/.local/bin/env
 fi
 
 # Clone Prime-RL repository at specific branch for reproducible tests
diff --git a/.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh b/.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh
index 463969cbc..e26273bba 100644
--- a/.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh
+++ b/.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh
@@ -51,14 +51,14 @@ for BACK in "${BACKENDS[@]}"; do
     --enable-eplb \
     --trust-remote-code \
     --max-model-len 2048 \
-    --all2all-backend $BACK \
-    --port $PORT &
+    --all2all-backend "$BACK" \
+    --port "$PORT" &
   SERVER_PID=$!
-  wait_for_server $PORT
+  wait_for_server "$PORT"
 
   TAG=$(echo "$MODEL" | tr '/: \\n' '_____')
   OUT="${OUT_DIR}/${TAG}_${BACK}.json"
-  python3 tests/evals/gsm8k/gsm8k_eval.py --host http://127.0.0.1 --port $PORT --num-questions ${NUM_Q} --save-results ${OUT}
+  python3 tests/evals/gsm8k/gsm8k_eval.py --host http://127.0.0.1 --port "$PORT" --num-questions "${NUM_Q}" --save-results "${OUT}"
   python3 - <<PY
 import json; acc=json.load(open('${OUT}'))['accuracy']
 print(f"${MODEL} ${BACK}: accuracy {acc:.3f}")
diff --git a/.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh b/.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh
index d0921c569..729a0fb7f 100644
--- a/.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh
+++ b/.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh
@@ -47,20 +47,20 @@ for BACK in "${BACKENDS[@]}"; do
   vllm serve "$MODEL" \
     --enforce-eager \
     --enable-eplb \
-    --all2all-backend $BACK \
+    --all2all-backend "$BACK" \
     --eplb-config '{"window_size":10, "step_interval":100, "num_redundant_experts":0, "log_balancedness":true}' \
-    --tensor-parallel-size ${TENSOR_PARALLEL_SIZE} \
-    --data-parallel-size ${DATA_PARALLEL_SIZE} \
+    --tensor-parallel-size "${TENSOR_PARALLEL_SIZE}" \
+    --data-parallel-size "${DATA_PARALLEL_SIZE}" \
     --enable-expert-parallel \
     --trust-remote-code \
     --max-model-len 2048 \
-    --port $PORT &
+    --port "$PORT" &
   SERVER_PID=$!
-  wait_for_server $PORT
+  wait_for_server "$PORT"
 
   TAG=$(echo "$MODEL" | tr '/: \\n' '_____')
   OUT="${OUT_DIR}/${TAG}_${BACK}.json"
-  python3 tests/evals/gsm8k/gsm8k_eval.py --host http://127.0.0.1 --port $PORT --num-questions ${NUM_Q} --save-results ${OUT}
+  python3 tests/evals/gsm8k/gsm8k_eval.py --host http://127.0.0.1 --port "$PORT" --num-questions "${NUM_Q}" --save-results "${OUT}"
   python3 - <<PY
 import json; acc=json.load(open('${OUT}'))['accuracy']
 print(f"${MODEL} ${BACK}: accuracy {acc:.3f}")
diff --git a/.buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh b/.buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh
index 3a9e5e6e3..e875ac466 100644
--- a/.buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh
+++ b/.buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh
@@ -51,20 +51,20 @@ for BACK in "${BACKENDS[@]}"; do
     --tensor-parallel-size 4 \
     --enable-expert-parallel \
     --enable-eplb \
-    --all2all-backend $BACK \
+    --all2all-backend "$BACK" \
     --eplb-config '{"window_size":200,"step_interval":600,"use_async":true}' \
     --speculative-config '{"method":"qwen3_next_mtp","num_speculative_tokens":1}' \
     --trust-remote-code \
     --max-model-len 2048 \
     --gpu-memory-utilization 0.9 \
     "${PLATFORM_ARGS[@]}" \
-    --port $PORT &
+    --port "$PORT" &
   SERVER_PID=$!
-  wait_for_server $PORT
+  wait_for_server "$PORT"
 
   TAG=$(echo "$MODEL" | tr '/: \\n' '_____')
   OUT="${OUT_DIR}/${TAG}_${BACK}.json"
-  python3 tests/evals/gsm8k/gsm8k_eval.py --host http://127.0.0.1 --port $PORT --num-questions ${NUM_Q} --save-results ${OUT}
+  python3 tests/evals/gsm8k/gsm8k_eval.py --host http://127.0.0.1 --port "$PORT" --num-questions "${NUM_Q}" --save-results "${OUT}"
   python3 - <<PY
 import json; acc=json.load(open('${OUT}'))['accuracy']
 print(f"${MODEL} ${BACK}: accuracy {acc:.3f}")
diff --git a/.buildkite/scripts/tpu/docker_run_bm.sh b/.buildkite/scripts/tpu/docker_run_bm.sh
index 08e366118..efb632e0a 100755
--- a/.buildkite/scripts/tpu/docker_run_bm.sh
+++ b/.buildkite/scripts/tpu/docker_run_bm.sh
@@ -9,10 +9,11 @@ ENV_FILE=$1
 
 # For testing on local vm, use `set -a` to export all variables
 source /etc/environment
-source $ENV_FILE
+# shellcheck source=/dev/null
+source "$ENV_FILE"
 
 remove_docker_container() { 
-    docker rm -f $CONTAINER_NAME || true;
+    docker rm -f "$CONTAINER_NAME" || true;
 }
 
 trap remove_docker_container EXIT
@@ -41,13 +42,13 @@ echo
 echo "starting docker...$CONTAINER_NAME"
 echo    
 docker run \
- -v $DOWNLOAD_DIR:$DOWNLOAD_DIR \
- --env-file $ENV_FILE \
+ -v "$DOWNLOAD_DIR":"$DOWNLOAD_DIR" \
+ --env-file "$ENV_FILE" \
  -e HF_TOKEN="$HF_TOKEN" \
- -e TARGET_COMMIT=$BUILDKITE_COMMIT \
- -e MODEL=$MODEL \
+ -e TARGET_COMMIT="$BUILDKITE_COMMIT" \
+ -e MODEL="$MODEL" \
  -e WORKSPACE=/workspace \
- --name $CONTAINER_NAME \
+ --name "$CONTAINER_NAME" \
  -d \
  --privileged \
  --network host \
diff --git a/.buildkite/scripts/tpu/run_bm.sh b/.buildkite/scripts/tpu/run_bm.sh
index 3364fce8e..b5d001bea 100755
--- a/.buildkite/scripts/tpu/run_bm.sh
+++ b/.buildkite/scripts/tpu/run_bm.sh
@@ -42,21 +42,21 @@ echo "lanching vllm..."
 echo "logging to $VLLM_LOG"
 echo
 
-vllm serve $MODEL \
+vllm serve "$MODEL" \
  --seed 42 \
- --max-num-seqs $MAX_NUM_SEQS \
- --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
- --tensor-parallel-size $TENSOR_PARALLEL_SIZE \
+ --max-num-seqs "$MAX_NUM_SEQS" \
+ --max-num-batched-tokens "$MAX_NUM_BATCHED_TOKENS" \
+ --tensor-parallel-size "$TENSOR_PARALLEL_SIZE" \
  --no-enable-prefix-caching \
- --download_dir $DOWNLOAD_DIR \
- --max-model-len $MAX_MODEL_LEN > "$VLLM_LOG" 2>&1 &
+ --download_dir "$DOWNLOAD_DIR" \
+ --max-model-len "$MAX_MODEL_LEN" > "$VLLM_LOG" 2>&1 &
 
 
 echo "wait for 20 minutes.."
 echo
 # sleep 1200
 # wait for 10 minutes...
-for i in {1..120}; do
+for _ in {1..120}; do
     # TODO: detect other type of errors.
     if grep -Fq "raise RuntimeError" "$VLLM_LOG"; then
         echo "Detected RuntimeError, exiting."
@@ -78,11 +78,11 @@ echo "logging to $BM_LOG"
 echo
 vllm bench serve \
     --backend vllm \
-    --model $MODEL  \
+    --model "$MODEL"  \
     --dataset-name sonnet \
     --dataset-path benchmarks/sonnet_4x.txt \
-    --sonnet-input-len $INPUT_LEN \
-    --sonnet-output-len $OUTPUT_LEN \
+    --sonnet-input-len "$INPUT_LEN" \
+    --sonnet-output-len "$OUTPUT_LEN" \
     --ignore-eos > "$BM_LOG"
 
 echo "completed..."
diff --git a/.buildkite/scripts/upload-nightly-wheels.sh b/.buildkite/scripts/upload-nightly-wheels.sh
index 1af7f476a..5efcb89bf 100644
--- a/.buildkite/scripts/upload-nightly-wheels.sh
+++ b/.buildkite/scripts/upload-nightly-wheels.sh
@@ -76,16 +76,15 @@ mkdir -p "$INDICES_OUTPUT_DIR"
 # this indices have relative paths that could work as long as it is next to the wheel directory in s3
 # i.e., the wheels are always in s3://vllm-wheels/<commit>/
 # and indices can be placed in /<commit>/, or /nightly/, or /<version>/
-if [[ ! -z "$DEFAULT_VARIANT_ALIAS" ]]; then
-    alias_arg="--alias-to-default $DEFAULT_VARIANT_ALIAS"
-else
-    alias_arg=""
+alias_args=()
+if [[ -n "$DEFAULT_VARIANT_ALIAS" ]]; then
+    alias_args=(--alias-to-default "$DEFAULT_VARIANT_ALIAS")
 fi
 
 # HACK: we do not need regex module here, but it is required by pre-commit hook
 # To avoid any external dependency, we simply replace it back to the stdlib re module
 sed -i 's/import regex as re/import re/g' .buildkite/scripts/generate-nightly-index.py
-$PYTHON .buildkite/scripts/generate-nightly-index.py --version "$SUBPATH" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "commit $BUILDKITE_COMMIT" $alias_arg
+$PYTHON .buildkite/scripts/generate-nightly-index.py --version "$SUBPATH" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "commit $BUILDKITE_COMMIT" "${alias_args[@]}"
 
 # copy indices to /<commit>/ unconditionally
 echo "Uploading indices to $S3_COMMIT_PREFIX"
@@ -100,9 +99,9 @@ fi
 # re-generate and copy to /<pure_version>/ only if it does not have "dev" in the version
 if [[ "$version" != *"dev"* ]]; then
     echo "Re-generating indices for /$pure_version/"
-    rm -rf "$INDICES_OUTPUT_DIR/*"
+    rm -rf "${INDICES_OUTPUT_DIR:?}/*"
     mkdir -p "$INDICES_OUTPUT_DIR"
     # wheel-dir is overridden to be the commit directory, so that the indices point to the correct wheel path
-    $PYTHON .buildkite/scripts/generate-nightly-index.py --version "$pure_version" --wheel-dir "$SUBPATH" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "version $pure_version" $alias_arg
+    $PYTHON .buildkite/scripts/generate-nightly-index.py --version "$pure_version" --wheel-dir "$SUBPATH" --current-objects "$obj_json" --output-dir "$INDICES_OUTPUT_DIR" --comment "version $pure_version" "${alias_args[@]}"
     aws s3 cp --recursive "$INDICES_OUTPUT_DIR/" "s3://$BUCKET/$pure_version/"
 fi
diff --git a/.buildkite/scripts/upload-release-wheels-pypi.sh b/.buildkite/scripts/upload-release-wheels-pypi.sh
index 75f519168..dacdb6e92 100644
--- a/.buildkite/scripts/upload-release-wheels-pypi.sh
+++ b/.buildkite/scripts/upload-release-wheels-pypi.sh
@@ -7,7 +7,7 @@ SUBPATH=$BUILDKITE_COMMIT
 S3_COMMIT_PREFIX="s3://$BUCKET/$SUBPATH/"
 
 RELEASE_VERSION=$(buildkite-agent meta-data get release-version)
-GIT_VERSION=$(git describe --exact-match --tags $BUILDKITE_COMMIT 2>/dev/null)
+GIT_VERSION=$(git describe --exact-match --tags "$BUILDKITE_COMMIT" 2>/dev/null)
 
 echo "Release version from Buildkite: $RELEASE_VERSION"
 
@@ -55,7 +55,7 @@ mkdir -p $DIST_DIR
 aws s3 cp --recursive --exclude "*" --include "vllm-${PURE_VERSION}*.whl" --exclude "*dev*" --exclude "*rc[0-9]*" "$S3_COMMIT_PREFIX" $DIST_DIR
 echo "Wheels copied to local directory"
 # generate source tarball
-git archive --format=tar.gz --output="$DIST_DIR/vllm-${PURE_VERSION}.tar.gz" $BUILDKITE_COMMIT
+git archive --format=tar.gz --output="$DIST_DIR/vllm-${PURE_VERSION}.tar.gz" "$BUILDKITE_COMMIT"
 ls -la $DIST_DIR
 
 # upload wheels to PyPI (only default variant, i.e. files without '+' in the name)
@@ -65,6 +65,6 @@ if [[ -z "$PYPI_WHEEL_FILES" ]]; then
   exit 1
 fi
 
-python3 -m twine check $PYPI_WHEEL_FILES
-python3 -m twine upload --non-interactive --verbose $PYPI_WHEEL_FILES
+python3 -m twine check "$PYPI_WHEEL_FILES"
+python3 -m twine upload --non-interactive --verbose "$PYPI_WHEEL_FILES"
 echo "Wheels uploaded to PyPI"
diff --git a/.buildkite/scripts/upload-rocm-wheels.sh b/.buildkite/scripts/upload-rocm-wheels.sh
index bb555bc84..a42848a16 100755
--- a/.buildkite/scripts/upload-rocm-wheels.sh
+++ b/.buildkite/scripts/upload-rocm-wheels.sh
@@ -55,7 +55,7 @@ mkdir -p all-rocm-wheels
 cp artifacts/rocm-base-wheels/*.whl all-rocm-wheels/ 2>/dev/null || true
 cp artifacts/rocm-vllm-wheel/*.whl all-rocm-wheels/ 2>/dev/null || true
 
-WHEEL_COUNT=$(ls all-rocm-wheels/*.whl 2>/dev/null | wc -l)
+WHEEL_COUNT=$(find all-rocm-wheels -maxdepth 1 -name '*.whl' 2>/dev/null | wc -l)
 echo "Total wheels to upload: $WHEEL_COUNT"
 
 if [ "$WHEEL_COUNT" -eq 0 ]; then
@@ -115,7 +115,7 @@ if [[ "$BUILDKITE_BRANCH" == "main" && "$BUILDKITE_PULL_REQUEST" == "false" ]] |
 fi
 
 # Extract version from vLLM wheel and update version-specific index
-VLLM_WHEEL=$(ls all-rocm-wheels/vllm*.whl 2>/dev/null | head -1)
+VLLM_WHEEL=$(find all-rocm-wheels -maxdepth 1 -name 'vllm*.whl' 2>/dev/null | head -1)
 if [ -n "$VLLM_WHEEL" ]; then
     VERSION=$(unzip -p "$VLLM_WHEEL" '**/METADATA' | grep '^Version: ' | cut -d' ' -f2)
     echo "Version in wheel: $VERSION"
diff --git a/benchmarks/auto_tune/auto_tune.sh b/benchmarks/auto_tune/auto_tune.sh
index a245e2022..efb234a2d 100644
--- a/benchmarks/auto_tune/auto_tune.sh
+++ b/benchmarks/auto_tune/auto_tune.sh
@@ -46,10 +46,10 @@ echo "VLLM_LOGGING_LEVEL=$VLLM_LOGGING_LEVEL"
 echo "RESULT_FILE=$RESULT"
 echo "====================== AUTO TUNEPARAMETERS ===================="
 
-rm -rf $LOG_FOLDER
-rm -rf $PROFILE_PATH
-mkdir -p $LOG_FOLDER
-mkdir -p $PROFILE_PATH
+rm -rf "$LOG_FOLDER"
+rm -rf "$PROFILE_PATH"
+mkdir -p "$LOG_FOLDER"
+mkdir -p "$PROFILE_PATH"
 
 cd "$BASE/vllm"
 
@@ -114,7 +114,7 @@ start_server() {
 
     # wait for 10 minutes...
     server_started=0
-    for i in {1..60}; do
+    for _ in {1..60}; do
         # This line checks whether the server is still alive or not,
         # since that we should always have permission to send signal to the server process.
         kill -0 $server_pid 2> /dev/null || break
@@ -145,12 +145,12 @@ run_benchmark() {
     local vllm_log="$LOG_FOLDER/vllm_log_${max_num_seqs}_${max_num_batched_tokens}.txt"
     echo "vllm_log: $vllm_log"
     echo
-    rm -f $vllm_log
+    rm -f "$vllm_log"
     pkill -if "vllm serve" || true
 
     echo "starting server..."
     # Call start_server without a profile_dir to avoid profiling overhead
-    start_server $gpu_memory_utilization $max_num_seqs $max_num_batched_tokens $vllm_log ""
+    start_server "$gpu_memory_utilization" "$max_num_seqs" "$max_num_batched_tokens" "$vllm_log" ""
     result=$?
     if [[ "$result" -eq 1 ]]; then
         echo "server failed to start. gpu_memory_utilization:$gpu_memory_utilization, max_num_seqs:$max_num_seqs, max_num_batched_tokens: $max_num_batched_tokens"
@@ -168,15 +168,15 @@ run_benchmark() {
     # --profile flag is removed from this call
     vllm bench serve \
         --backend vllm \
-        --model $MODEL  \
+        --model "$MODEL"  \
         --dataset-name random \
         --random-input-len $adjusted_input_len \
-        --random-output-len $OUTPUT_LEN \
+        --random-output-len "$OUTPUT_LEN" \
         --ignore-eos \
         --disable-tqdm \
         --request-rate inf \
         --percentile-metrics ttft,tpot,itl,e2el \
-        --goodput e2el:$MAX_LATENCY_ALLOWED_MS \
+        --goodput e2el:"$MAX_LATENCY_ALLOWED_MS" \
         --num-prompts 1000 \
         --random-prefix-len $prefix_len \
         --host "$HOSTNAME" \
@@ -195,20 +195,20 @@ run_benchmark() {
         request_rate=$((${throughput%.*} + 1))
         while ((request_rate > 0)); do
             # clear prefix cache
-            curl -X POST http://${HOSTNAME}:8004/reset_prefix_cache
+            curl -X POST http://"${HOSTNAME}":8004/reset_prefix_cache
             sleep 5
             bm_log="$LOG_FOLDER/bm_log_${max_num_seqs}_${max_num_batched_tokens}_requestrate_${request_rate}.txt"
             vllm bench serve \
                 --backend vllm \
-                --model $MODEL  \
+                --model "$MODEL"  \
                 --dataset-name random \
                 --random-input-len $adjusted_input_len \
-                --random-output-len $OUTPUT_LEN \
+                --random-output-len "$OUTPUT_LEN" \
                 --ignore-eos \
                 --disable-tqdm \
                 --request-rate $request_rate \
                 --percentile-metrics ttft,tpot,itl,e2el \
-                --goodput e2el:$MAX_LATENCY_ALLOWED_MS \
+                --goodput e2el:"$MAX_LATENCY_ALLOWED_MS" \
                 --num-prompts 100 \
                 --random-prefix-len $prefix_len \
                 --host "$HOSTNAME" \
@@ -255,7 +255,7 @@ gpu_memory_utilization=0.98
 find_gpu_memory_utilization=0
 while (( $(echo "$gpu_memory_utilization >= 0.9" | bc -l) )); do
     # Pass empty string for profile_dir argument
-    start_server $gpu_memory_utilization "${num_seqs_list[-1]}" "${num_batched_tokens_list[-1]}" "$LOG_FOLDER/vllm_log_gpu_memory_utilization_$gpu_memory_utilization.log" ""
+    start_server "$gpu_memory_utilization" "${num_seqs_list[-1]}" "${num_batched_tokens_list[-1]}" "$LOG_FOLDER/vllm_log_gpu_memory_utilization_$gpu_memory_utilization.log" ""
     result=$?
     if [[ "$result" -eq 0 ]]; then
         find_gpu_memory_utilization=1
@@ -274,7 +274,7 @@ fi
 
 for num_seqs in "${num_seqs_list[@]}"; do
     for num_batched_tokens in "${num_batched_tokens_list[@]}"; do
-        run_benchmark $num_seqs $num_batched_tokens $gpu_memory_utilization
+        run_benchmark "$num_seqs" "$num_batched_tokens" "$gpu_memory_utilization"
     done
 done
 echo "finish permutations"
@@ -285,7 +285,7 @@ echo "finish permutations"
 if (( $(echo "$best_throughput > 0" | bc -l) )); then
     echo
     echo "Benchmark tuning finished. Now running profiling on the best configuration found..."
-    echo "Best config: max_num_seqs: $best_max_num_seqs, max_num_batched_tokens: $best_num_batched_tokens, throughput: $best_throughput"
+    echo "Best config: max_num_seqs: $best_max_num_seqs, max_num_batched_tokens: $best_num_batched_tokens, throughput: $best_throughput, goodput: $best_goodput"
     echo
 
     vllm_log="$LOG_FOLDER/vllm_log_BEST_PROFILE.txt"
@@ -293,7 +293,7 @@ if (( $(echo "$best_throughput > 0" | bc -l) )); then
 
     # Start server with the best params and profiling ENABLED
     echo "Starting server for profiling..."
-    start_server $gpu_memory_utilization $best_max_num_seqs $best_num_batched_tokens "$vllm_log" "$PROFILE_PATH"
+    start_server "$gpu_memory_utilization" "$best_max_num_seqs" "$best_num_batched_tokens" "$vllm_log" "$PROFILE_PATH"
 
     # Run benchmark with the best params and the --profile flag
     echo "Running benchmark with profiling..."
@@ -301,15 +301,15 @@ if (( $(echo "$best_throughput > 0" | bc -l) )); then
     adjusted_input_len=$(( INPUT_LEN - prefix_len ))
     vllm bench serve \
         --backend vllm \
-        --model $MODEL \
+        --model "$MODEL" \
         --dataset-name random \
         --random-input-len $adjusted_input_len \
-        --random-output-len $OUTPUT_LEN \
+        --random-output-len "$OUTPUT_LEN" \
         --ignore-eos \
         --disable-tqdm \
-        --request-rate $best_request_rate \
+        --request-rate "$best_request_rate" \
         --percentile-metrics ttft,tpot,itl,e2el \
-        --goodput e2el:$MAX_LATENCY_ALLOWED_MS \
+        --goodput e2el:"$MAX_LATENCY_ALLOWED_MS" \
         --num-prompts 100 \
         --random-prefix-len $prefix_len \
         --host "$HOSTNAME" \
diff --git a/benchmarks/auto_tune/batch_auto_tune.sh b/benchmarks/auto_tune/batch_auto_tune.sh
index 57ef20daf..0f3ef0f03 100755
--- a/benchmarks/auto_tune/batch_auto_tune.sh
+++ b/benchmarks/auto_tune/batch_auto_tune.sh
@@ -64,7 +64,7 @@ for i in $(seq 0 $(($num_runs - 1))); do
   else
     STATUS="FAILURE"
     ((FAILURE_COUNT++))
-    FAILED_RUNS+=("Run #$((i+1)): $(echo $run_object | jq -c .)")
+    FAILED_RUNS+=("Run #$((i+1)): $(echo "$run_object" | jq -c .)")
   fi
 
   RUN_OUTPUT=$(<"$RUN_OUTPUT_FILE")
diff --git a/benchmarks/run_structured_output_benchmark.sh b/benchmarks/run_structured_output_benchmark.sh
index b043ab83e..bc40ed83f 100755
--- a/benchmarks/run_structured_output_benchmark.sh
+++ b/benchmarks/run_structured_output_benchmark.sh
@@ -71,7 +71,7 @@ while [[ $# -gt 0 ]]; do
       usage
       ;;
     *)
-      echo "Unknown argument: $1\n"
+      printf "Unknown argument: %s\n" "$1"
       usage
       ;;
   esac
@@ -84,15 +84,17 @@ mkdir -p "$OUTPUT_DIR"
 QPS_VALUES=(25 20 15 10 5 1)
 
 # Common parameters
-COMMON_PARAMS="--backend $BACKEND \
-               --model $MODEL \
-               --dataset $DATASET \
-               --structured-output-ratio $STRUCTURED_OUTPUT_RATIO \
-               --save-results \
-               --result-dir $OUTPUT_DIR \
-               --output-len $MAX_NEW_TOKENS \
-               --port $PORT \
-               --tokenizer-mode $TOKENIZER_MODE"
+COMMON_PARAMS=(
+  --backend "$BACKEND"
+  --model "$MODEL"
+  --dataset "$DATASET"
+  --structured-output-ratio "$STRUCTURED_OUTPUT_RATIO"
+  --save-results
+  --result-dir "$OUTPUT_DIR"
+  --output-len "$MAX_NEW_TOKENS"
+  --port "$PORT"
+  --tokenizer-mode "$TOKENIZER_MODE"
+)
 
 echo "Starting structured output benchmark with model: $MODEL"
 echo "Backend: $BACKEND"
@@ -109,17 +111,17 @@ for qps in "${QPS_VALUES[@]}"; do
   GIT_BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "unknown")
 
   # Construct filename for this run
-  FILENAME="${BACKEND}_${qps}qps_$(basename $MODEL)_${DATASET}_${GIT_HASH}.json"
+  FILENAME="${BACKEND}_${qps}qps_$(basename "$MODEL")_${DATASET}_${GIT_HASH}_${GIT_BRANCH}.json"
 
   NUM_PROMPTS=$(echo "$TOTAL_SECONDS * $qps" | bc)
   NUM_PROMPTS=${NUM_PROMPTS%.*}  # Remove fractional part
   echo "Running benchmark with $NUM_PROMPTS prompts"
 
   # Run the benchmark
-  python "$SCRIPT_DIR/benchmark_serving_structured_output.py" $COMMON_PARAMS \
-    --request-rate $qps \
+  python "$SCRIPT_DIR/benchmark_serving_structured_output.py" "${COMMON_PARAMS[@]}" \
+    --request-rate "$qps" \
     --result-filename "$FILENAME" \
-    --num-prompts $NUM_PROMPTS
+    --num-prompts "$NUM_PROMPTS"
 
   echo "Completed benchmark with QPS: $qps"
   echo "----------------------------------------"
diff --git a/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh b/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh
index 95a418374..19459acc9 100644
--- a/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh
+++ b/examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh
@@ -8,7 +8,7 @@ declare -a PIDS=()
 ###############################################################################
 MODEL="${MODEL:-Qwen/Qwen2.5-VL-3B-Instruct}"
 LOG_PATH="${LOG_PATH:-./logs}"
-mkdir -p $LOG_PATH
+mkdir -p "$LOG_PATH"
 
 ENCODE_PORT="${ENCODE_PORT:-19534}"
 PREFILL_PORT="${PREFILL_PORT:-19535}"
@@ -84,10 +84,10 @@ trap cleanup TERM
 
 # clear previous cache
 echo "remove previous ec cache folder"
-rm -rf $EC_SHARED_STORAGE_PATH
+rm -rf "$EC_SHARED_STORAGE_PATH"
 
 echo "make ec cache folder"
-mkdir -p $EC_SHARED_STORAGE_PATH
+mkdir -p "$EC_SHARED_STORAGE_PATH"
 
 ###############################################################################
 # Encoder worker
@@ -100,7 +100,7 @@ CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
     --no-enable-prefix-caching \
     --max-num-batched-tokens 114688 \
     --max-num-seqs 128 \
-    --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+    --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
     --ec-transfer-config '{
         "ec_connector": "ECExampleConnector",
         "ec_role": "ec_producer",
@@ -124,7 +124,7 @@ vllm serve "$MODEL" \
     --enforce-eager \
     --enable-request-id-headers \
     --max-num-seqs 128 \
-    --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+    --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
     --ec-transfer-config '{
         "ec_connector": "ECExampleConnector",
         "ec_role": "ec_consumer",
@@ -152,7 +152,7 @@ vllm serve "$MODEL" \
     --enforce-eager \
     --enable-request-id-headers \
     --max-num-seqs 128 \
-    --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+    --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
     --kv-transfer-config '{
         "kv_connector": "NixlConnector",
         "kv_role": "kv_consumer"
@@ -162,9 +162,9 @@ vllm serve "$MODEL" \
 PIDS+=($!)
 
 # Wait for workers
-wait_for_server $ENCODE_PORT
-wait_for_server $PREFILL_PORT
-wait_for_server $DECODE_PORT
+wait_for_server "$ENCODE_PORT"
+wait_for_server "$PREFILL_PORT"
+wait_for_server "$DECODE_PORT"
 
 ###############################################################################
 # Proxy
@@ -179,7 +179,7 @@ python disagg_epd_proxy.py \
 
 PIDS+=($!)
 
-wait_for_server $PROXY_PORT
+wait_for_server "$PROXY_PORT"
 echo "All services are up!"
 
 ###############################################################################
@@ -187,14 +187,14 @@ echo "All services are up!"
 ###############################################################################
 echo "Running benchmark (stream)..."
 vllm bench serve \
-  --model               $MODEL \
+  --model               "$MODEL" \
   --backend             openai-chat \
   --endpoint            /v1/chat/completions \
   --dataset-name        hf \
   --dataset-path        lmarena-ai/VisionArena-Chat \
   --seed                0 \
-  --num-prompts         $NUM_PROMPTS \
-  --port                $PROXY_PORT
+  --num-prompts         "$NUM_PROMPTS" \
+  --port                "$PROXY_PORT"
 
 PIDS+=($!)
 
@@ -202,10 +202,10 @@ PIDS+=($!)
 # Single request with local image
 ###############################################################################
 echo "Running single request with local image (non-stream)..."
-curl http://127.0.0.1:${PROXY_PORT}/v1/chat/completions \
+curl http://127.0.0.1:"${PROXY_PORT}"/v1/chat/completions \
     -H "Content-Type: application/json" \
     -d '{
-    "model": "'${MODEL}'",
+    "model": "'"${MODEL}"'",
     "messages": [
     {"role": "system", "content": "You are a helpful assistant."},
     {"role": "user", "content": [
diff --git a/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh b/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh
index c4a591d74..18c278b2a 100644
--- a/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh
+++ b/examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh
@@ -8,7 +8,7 @@ declare -a PIDS=()
 ###############################################################################
 MODEL="${MODEL:-Qwen/Qwen2.5-VL-3B-Instruct}"
 LOG_PATH="${LOG_PATH:-./logs}"
-mkdir -p $LOG_PATH
+mkdir -p "$LOG_PATH"
 
 ENCODE_PORT="${ENCODE_PORT:-19534}"
 PREFILL_DECODE_PORT="${PREFILL_DECODE_PORT:-19535}"
@@ -78,10 +78,10 @@ trap cleanup TERM
 
 # clear previous cache
 echo "remove previous ec cache folder"
-rm -rf $EC_SHARED_STORAGE_PATH
+rm -rf "$EC_SHARED_STORAGE_PATH"
 
 echo "make ec cache folder"
-mkdir -p $EC_SHARED_STORAGE_PATH
+mkdir -p "$EC_SHARED_STORAGE_PATH"
 
 ###############################################################################
 # Encoder worker
@@ -94,7 +94,7 @@ CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
     --no-enable-prefix-caching \
     --max-num-batched-tokens 114688 \
     --max-num-seqs 128 \
-    --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+    --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
     --ec-transfer-config '{
         "ec_connector": "ECExampleConnector",
         "ec_role": "ec_producer",
@@ -115,7 +115,7 @@ CUDA_VISIBLE_DEVICES="$GPU_PD" vllm serve "$MODEL" \
     --enforce-eager \
     --enable-request-id-headers \
     --max-num-seqs 128 \
-    --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+    --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
     --ec-transfer-config '{
         "ec_connector": "ECExampleConnector",
         "ec_role": "ec_consumer",
@@ -128,8 +128,8 @@ CUDA_VISIBLE_DEVICES="$GPU_PD" vllm serve "$MODEL" \
 PIDS+=($!)
 
 # Wait for workers
-wait_for_server $ENCODE_PORT
-wait_for_server $PREFILL_DECODE_PORT
+wait_for_server "$ENCODE_PORT"
+wait_for_server "$PREFILL_DECODE_PORT"
 
 ###############################################################################
 # Proxy
@@ -144,7 +144,7 @@ python disagg_epd_proxy.py \
 
 PIDS+=($!)
 
-wait_for_server $PROXY_PORT
+wait_for_server "$PROXY_PORT"
 echo "All services are up!"
 
 ###############################################################################
@@ -152,14 +152,14 @@ echo "All services are up!"
 ###############################################################################
 echo "Running benchmark (stream)..."
 vllm bench serve \
-  --model               $MODEL \
+  --model               "$MODEL" \
   --backend             openai-chat \
   --endpoint            /v1/chat/completions \
   --dataset-name        hf \
   --dataset-path        lmarena-ai/VisionArena-Chat \
   --seed                0 \
-  --num-prompts         $NUM_PROMPTS \
-  --port                $PROXY_PORT
+  --num-prompts         "$NUM_PROMPTS" \
+  --port                "$PROXY_PORT"
 
 PIDS+=($!)
 
@@ -167,10 +167,10 @@ PIDS+=($!)
 # Single request with local image
 ###############################################################################
 echo "Running single request with local image (non-stream)..."
-curl http://127.0.0.1:${PROXY_PORT}/v1/chat/completions \
+curl http://127.0.0.1:"${PROXY_PORT}"/v1/chat/completions \
     -H "Content-Type: application/json" \
     -d '{
-    "model": "'${MODEL}'",
+    "model": "'"${MODEL}"'",
     "messages": [
     {"role": "system", "content": "You are a helpful assistant."},
     {"role": "user", "content": [
diff --git a/examples/online_serving/disaggregated_prefill.sh b/examples/online_serving/disaggregated_prefill.sh
index cd2f2e44a..3022711d7 100644
--- a/examples/online_serving/disaggregated_prefill.sh
+++ b/examples/online_serving/disaggregated_prefill.sh
@@ -54,7 +54,7 @@ wait_for_server() {
 # You can also adjust --kv-ip and --kv-port for distributed inference.
 
 # prefilling instance, which is the KV producer
-CUDA_VISIBLE_DEVICES=0 vllm serve $MODEL_NAME \
+CUDA_VISIBLE_DEVICES=0 vllm serve "$MODEL_NAME" \
     --host 0.0.0.0 \
     --port 8100 \
     --max-model-len 100 \
@@ -64,7 +64,7 @@ CUDA_VISIBLE_DEVICES=0 vllm serve $MODEL_NAME \
     '{"kv_connector":"P2pNcclConnector","kv_role":"kv_producer","kv_rank":0,"kv_parallel_size":2,"kv_buffer_size":"1e9","kv_port":"14579","kv_connector_extra_config":{"proxy_ip":"'"$VLLM_HOST_IP"'","proxy_port":"30001","http_ip":"'"$VLLM_HOST_IP"'","http_port":"8100","send_type":"PUT_ASYNC"}}' &
 
 # decoding instance, which is the KV consumer  
-CUDA_VISIBLE_DEVICES=1 vllm serve $MODEL_NAME \
+CUDA_VISIBLE_DEVICES=1 vllm serve "$MODEL_NAME" \
     --host 0.0.0.0 \
     --port 8200 \
     --max-model-len 100 \
diff --git a/examples/online_serving/disaggregated_serving/kv_events.sh b/examples/online_serving/disaggregated_serving/kv_events.sh
index a111db217..533a12cb0 100644
--- a/examples/online_serving/disaggregated_serving/kv_events.sh
+++ b/examples/online_serving/disaggregated_serving/kv_events.sh
@@ -34,7 +34,7 @@ wait_for_server() {
     done" && return 0 || return 1
 }
 
-vllm serve $MODEL_NAME \
+vllm serve "$MODEL_NAME" \
     --port 8100 \
     --max-model-len 100 \
     --enforce-eager \
diff --git a/examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh b/examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh
index e38d377c3..5a3b939a9 100644
--- a/examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh
+++ b/examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh
@@ -143,7 +143,7 @@ main() {
     IFS=',' read -ra BOOTSTRAP_PORT_ARRAY <<< "$BOOTSTRAP_PORTS"
     IFS=',' read -ra DECODE_PORT_ARRAY <<< "$DECODE_PORTS"
 
-    proxy_param=""
+    proxy_args=()
 
     # =============================================================================
     # Launch Prefill Servers (X Producers)
@@ -156,12 +156,12 @@ main() {
         local bootstrap_port=${BOOTSTRAP_PORT_ARRAY[$i]}
 
         echo "  Prefill server $((i+1)): GPU $gpu_id, Port $port, Bootstrap Port $bootstrap_port"
-        VLLM_MOONCAKE_BOOTSTRAP_PORT=$bootstrap_port CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \
-        --port $port \
+        VLLM_MOONCAKE_BOOTSTRAP_PORT=$bootstrap_port CUDA_VISIBLE_DEVICES=$gpu_id vllm serve "$MODEL" \
+        --port "$port" \
         --kv-transfer-config \
         "{\"kv_connector\":\"MooncakeConnector\",\"kv_role\":\"kv_producer\"}" > prefill$((i+1)).log 2>&1 &
         PIDS+=($!)
-        proxy_param="${proxy_param} --prefill http://0.0.0.0:${port} $bootstrap_port"
+        proxy_args+=(--prefill "http://0.0.0.0:${port}" "$bootstrap_port")
     done
 
     # =============================================================================
@@ -174,12 +174,12 @@ main() {
         local port=${DECODE_PORT_ARRAY[$i]}
 
         echo "  Decode server $((i+1)): GPU $gpu_id, Port $port"
-        CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \
-        --port $port \
+        CUDA_VISIBLE_DEVICES=$gpu_id vllm serve "$MODEL" \
+        --port "$port" \
         --kv-transfer-config \
         "{\"kv_connector\":\"MooncakeConnector\",\"kv_role\":\"kv_consumer\"}" > decode$((i+1)).log 2>&1 &
         PIDS+=($!)
-        proxy_param="${proxy_param} --decode http://0.0.0.0:${port}"
+        proxy_args+=(--decode "http://0.0.0.0:${port}")
     done
 
     # =============================================================================
@@ -187,7 +187,7 @@ main() {
     # =============================================================================
     echo ""
     echo "Starting proxy server on port $PROXY_PORT..."
-    python3 mooncake_connector_proxy.py $proxy_param --port $PROXY_PORT > proxy.log 2>&1 &
+    python3 mooncake_connector_proxy.py "${proxy_args[@]}" --port "$PROXY_PORT" > proxy.log 2>&1 &
     PIDS+=($!)
 
     # =============================================================================
@@ -196,9 +196,10 @@ main() {
     echo ""
     echo "Waiting for all servers to start..."
     for port in "${PREFILL_PORT_ARRAY[@]}" "${DECODE_PORT_ARRAY[@]}"; do
-        if ! wait_for_server $port; then
+        if ! wait_for_server "$port"; then
             echo "Failed to start server on port $port"
             cleanup
+            # shellcheck disable=SC2317
             exit 1
         fi
     done
@@ -209,8 +210,8 @@ main() {
     # =============================================================================
     # Run Benchmark
     # =============================================================================
-    vllm bench serve --port $PROXY_PORT --seed $(date +%s) \
-        --backend vllm --model $MODEL \
+    vllm bench serve --port "$PROXY_PORT" --seed "$(date +%s)" \
+        --backend vllm --model "$MODEL" \
         --dataset-name random --random-input-len 7500 --random-output-len 200 \
         --num-prompts 200 --burstiness 100 --request-rate 2 | tee benchmark.log
 
diff --git a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh
index 1e7acccb4..603f9eb91 100644
--- a/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh
+++ b/examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh
@@ -166,10 +166,10 @@ main() {
         local kv_port=$((21001 + i))
 
         echo "  Prefill server $((i+1)): GPU $gpu_id, Port $port, KV Port $kv_port"
-        CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \
+        CUDA_VISIBLE_DEVICES=$gpu_id vllm serve "$MODEL" \
         --enforce-eager \
         --host 0.0.0.0 \
-        --port $port \
+        --port "$port" \
         --tensor-parallel-size 1 \
         --seed 1024 \
         --dtype float16 \
@@ -194,10 +194,10 @@ main() {
         local kv_port=$((22001 + i))
 
         echo "  Decode server $((i+1)): GPU $gpu_id, Port $port, KV Port $kv_port"
-        CUDA_VISIBLE_DEVICES=$gpu_id vllm serve $MODEL \
+        CUDA_VISIBLE_DEVICES=$gpu_id vllm serve "$MODEL" \
         --enforce-eager \
         --host 0.0.0.0 \
-        --port $port \
+        --port "$port" \
         --tensor-parallel-size 1 \
         --seed 1024 \
         --dtype float16 \
@@ -217,9 +217,10 @@ main() {
     echo ""
     echo "Waiting for all servers to start..."
     for port in "${PREFILL_PORT_ARRAY[@]}" "${DECODE_PORT_ARRAY[@]}"; do
-        if ! wait_for_server $port; then
+        if ! wait_for_server "$port"; then
             echo "Failed to start server on port $port"
             cleanup
+            # shellcheck disable=SC2317
             exit 1
         fi
     done
@@ -231,8 +232,8 @@ main() {
     # Run Benchmark
     # =============================================================================
     cd ../../../benchmarks/
-    vllm bench serve --port 10001 --seed $(date +%s) \
-        --model $MODEL \
+    vllm bench serve --port 10001 --seed "$(date +%s)" \
+        --model "$MODEL" \
         --dataset-name random --random-input-len 7500 --random-output-len 200 \
         --num-prompts 200 --burstiness 100 --request-rate 2 | tee benchmark.log
 
diff --git a/examples/online_serving/elastic_ep/bench.sh b/examples/online_serving/elastic_ep/bench.sh
index e47631465..4f5dede43 100644
--- a/examples/online_serving/elastic_ep/bench.sh
+++ b/examples/online_serving/elastic_ep/bench.sh
@@ -50,8 +50,8 @@ while [[ $# -gt 0 ]]; do
 done
 
 vllm bench serve \
-    --model $MODEL_NAME \
-    --host $HOST \
-    --port $PORT \
-    --num-prompts $NUM_PROMPTS \
-    --request-rate $REQUEST_RATE
+    --model "$MODEL_NAME" \
+    --host "$HOST" \
+    --port "$PORT" \
+    --num-prompts "$NUM_PROMPTS" \
+    --request-rate "$REQUEST_RATE"
diff --git a/examples/online_serving/elastic_ep/serve_deepseek_v2.sh b/examples/online_serving/elastic_ep/serve_deepseek_v2.sh
index 20bf598c0..b4e922099 100644
--- a/examples/online_serving/elastic_ep/serve_deepseek_v2.sh
+++ b/examples/online_serving/elastic_ep/serve_deepseek_v2.sh
@@ -57,15 +57,15 @@ echo "Starting vLLM server for $MODEL_NAME with data parallel size: $DATA_PARALL
 export RAY_DEDUP_LOGS=0
 export VLLM_USE_DEEP_GEMM=1
 
-vllm serve $MODEL_NAME \
-    --data-parallel-size $DATA_PARALLEL_SIZE \
-    --data-parallel-size-local $DATA_PARALLEL_SIZE \
+vllm serve "$MODEL_NAME" \
+    --data-parallel-size "$DATA_PARALLEL_SIZE" \
+    --data-parallel-size-local "$DATA_PARALLEL_SIZE" \
     --data-parallel-backend ray \
     --enforce-eager \
     --enable-expert-parallel \
     --enable-eplb \
     --all2all-backend pplx \
-    --num-redundant-experts $REDUNDANT_EXPERTS \
+    --num-redundant-experts "$REDUNDANT_EXPERTS" \
     --trust-remote-code \
-    --host $HOST \
-    --port $PORT
+    --host "$HOST" \
+    --port "$PORT"
diff --git a/examples/online_serving/multi-node-serving.sh b/examples/online_serving/multi-node-serving.sh
index 3fc5502fb..d2823bb8f 100644
--- a/examples/online_serving/multi-node-serving.sh
+++ b/examples/online_serving/multi-node-serving.sh
@@ -57,8 +57,7 @@ case "$subcommand" in
 
     # Retry until the worker node connects to the head node or the timeout expires.
     for (( i=0; i < $ray_init_timeout; i+=5 )); do
-      ray start --address=$ray_address:$ray_port --block "${start_params[@]}"
-      if [ $? -eq 0 ]; then
+      if ray start --address="$ray_address":"$ray_port" --block "${start_params[@]}"; then
         echo "Worker: Ray runtime started with head address $ray_address:$ray_port"
         exit 0
       fi
@@ -95,12 +94,12 @@ case "$subcommand" in
     fi
 
     # Start the Ray head node.
-    ray start --head --port=$ray_port "${start_params[@]}"
+    ray start --head --port="$ray_port" "${start_params[@]}"
 
     # Poll Ray until every worker node is active.
     for (( i=0; i < $ray_init_timeout; i+=5 )); do
-        active_nodes=`python3 -c 'import ray; ray.init(); print(sum(node["Alive"] for node in ray.nodes()))'`
-        if [ $active_nodes -eq $ray_cluster_size ]; then
+        active_nodes=$(python3 -c 'import ray; ray.init(); print(sum(node["Alive"] for node in ray.nodes()))')
+        if [ "$active_nodes" -eq "$ray_cluster_size" ]; then
           echo "All ray workers are active and the ray cluster is initialized successfully."
           exit 0
         fi
diff --git a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh
index a409c49b5..3636d7e99 100644
--- a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh
+++ b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh
@@ -22,11 +22,10 @@ check_hf_token() {
 
 check_num_gpus() {
     # can you check if the number of GPUs are >=2 via nvidia-smi/rocm-smi?
-    which rocm-smi > /dev/null 2>&1
-    if [ $? -ne 0 ]; then
+    if ! which rocm-smi > /dev/null 2>&1; then
 	num_gpus=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
     else
-	num_gpus=$(rocm-smi --showid | grep Instinct | wc -l)
+	num_gpus=$(rocm-smi --showid | grep -c Instinct)
     fi
 
     if [ "$num_gpus" -lt 2 ]; then
@@ -39,8 +38,7 @@ check_num_gpus() {
 
 ensure_python_library_installed() {
     echo "Checking if $1 is installed..."
-    python3 -c "import $1" > /dev/null 2>&1
-    if [ $? -ne 0 ]; then
+    if ! python3 -c "import $1" > /dev/null 2>&1; then
         if [ "$1" == "nixl" ]; then
             echo "$1 is not installed. Please refer to https://github.com/ai-dynamo/nixl for installation."
         else
@@ -102,12 +100,12 @@ main() {
     bash disagg_vllm_launcher.sh prefiller \
         > >(tee prefiller.log) 2>&1 &
     prefiller_pid=$!
-    PIDS+=($prefiller_pid)
+    PIDS+=("$prefiller_pid")
 
     bash disagg_vllm_launcher.sh decoder  \
         > >(tee decoder.log)  2>&1 &
     decoder_pid=$!
-    PIDS+=($decoder_pid)
+    PIDS+=("$decoder_pid")
 
     python3 disagg_proxy_server.py \
         --host localhost \
@@ -118,7 +116,7 @@ main() {
         --decoder-port 8200  \
         > >(tee proxy.log)    2>&1 &
     proxy_pid=$!
-    PIDS+=($proxy_pid)
+    PIDS+=("$proxy_pid")
 
     wait_for_server 8100
     wait_for_server 8200
@@ -128,7 +126,7 @@ main() {
 
     # begin benchmark
     cd ../../../../benchmarks/
-    vllm bench serve --port 9000 --seed $(date +%s) \
+    vllm bench serve --port 9000 --seed "$(date +%s)" \
         --model meta-llama/Llama-3.1-8B-Instruct \
         --dataset-name random --random-input-len 7500 --random-output-len 200 \
         --num-prompts 200 --burstiness 100 --request-rate 3.6 | tee benchmark.log
diff --git a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
index 682df45d9..363c35028 100644
--- a/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
+++ b/examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh
@@ -34,7 +34,7 @@ if [[ $1 == "prefiller" ]]; then
         VLLM_ENABLE_V1_MULTIPROCESSING=1 \
         VLLM_WORKER_MULTIPROC_METHOD=spawn \
         CUDA_VISIBLE_DEVICES=0 \
-        vllm serve $MODEL \
+        vllm serve "$MODEL" \
         --port 8100 \
         --enforce-eager \
         --kv-transfer-config \
@@ -51,7 +51,7 @@ elif [[ $1 == "decoder" ]]; then
         VLLM_ENABLE_V1_MULTIPROCESSING=1 \
         VLLM_WORKER_MULTIPROC_METHOD=spawn \
         CUDA_VISIBLE_DEVICES=1 \
-        vllm serve $MODEL \
+        vllm serve "$MODEL" \
         --port 8200 \
         --enforce-eager \
         --kv-transfer-config \
diff --git a/examples/pooling/embed/openai_embedding_long_text/service.sh b/examples/pooling/embed/openai_embedding_long_text/service.sh
index 0353b8f5a..37a8b625b 100644
--- a/examples/pooling/embed/openai_embedding_long_text/service.sh
+++ b/examples/pooling/embed/openai_embedding_long_text/service.sh
@@ -103,7 +103,7 @@ vllm serve "$MODEL_NAME" \
   --tensor-parallel-size "$GPU_COUNT" \
   --enforce-eager \
   --pooler-config "$POOLER_CONFIG" \
-  --served-model-name ${MODEL_CODE} \
+  --served-model-name "${MODEL_CODE}" \
   --api-key "$API_KEY" \
   --trust-remote-code \
   --port "$PORT" \
diff --git a/tests/standalone_tests/python_only_compile.sh b/tests/standalone_tests/python_only_compile.sh
index ebf199a50..adfab1139 100644
--- a/tests/standalone_tests/python_only_compile.sh
+++ b/tests/standalone_tests/python_only_compile.sh
@@ -6,7 +6,7 @@ set -e
 
 merge_base_commit=$(git merge-base HEAD origin/main)
 echo "INFO: current merge base commit with main: $merge_base_commit"
-git show --oneline -s $merge_base_commit
+git show --oneline -s "$merge_base_commit"
 
 # test whether the metadata.json url is valid, retry each 3 minutes up to 5 times
 # this avoids cumbersome error messages & manual retries in case the precompiled wheel
@@ -40,7 +40,7 @@ for i in {1..5}; do
         fi
     fi
     # failure handling & retry logic
-    if [ $i -eq 5 ]; then
+    if [ "$i" -eq 5 ]; then
         echo "ERROR: metadata is still not available after 5 attempts."
         echo "ERROR: Please check whether the precompiled wheel for commit $merge_base_commit is available."
         echo " NOTE: If $merge_base_commit is a new commit on main, maybe try again after its release pipeline finishes."
diff --git a/tests/v1/ec_connector/integration/run_epd_correctness_test.sh b/tests/v1/ec_connector/integration/run_epd_correctness_test.sh
index 0c2666306..ffe9cac38 100644
--- a/tests/v1/ec_connector/integration/run_epd_correctness_test.sh
+++ b/tests/v1/ec_connector/integration/run_epd_correctness_test.sh
@@ -24,7 +24,7 @@ MODEL="${MODEL:-Qwen/Qwen2.5-VL-3B-Instruct}"
 # Set 1 to use multimodal prompts; else to use text-only
 USE_MM_PROMPTS="${USE_MM_PROMPTS:-1}"
 MM_FLAG=""
-if [ $USE_MM_PROMPTS = "1" ]; then
+if [ "$USE_MM_PROMPTS" = "1" ]; then
     MM_FLAG="--use_mm_prompts"
 fi
 
@@ -51,7 +51,7 @@ LOG_PATH="${LOG_PATH:-/tmp}"
 BASELINE_FILE="${BASELINE_FILE:-/tmp/vllm_baseline.txt}"
 BASELINE_PD_FILE="${BASELINE_PD_FILE:-/tmp/vllm_epd_baseline.txt}"
 
-mkdir -p $LOG_PATH
+mkdir -p "$LOG_PATH"
 
 # Trap the SIGINT signal (triggered by Ctrl+C)
 trap 'kill $(jobs -pr)' SIGINT SIGTERM EXIT
@@ -87,20 +87,20 @@ run_baseline() {
     # Start baseline instance
     echo "Starting baseline instance on GPU $GPU_SINGLE, port $PORT"
     CUDA_VISIBLE_DEVICES="$GPU_SINGLE" vllm serve "$MODEL" \
-        --port $PORT \
+        --port "$PORT" \
         --enforce-eager \
         --gpu-memory-utilization 0.7 \
         --max-num-seqs 128 \
-        --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
-        > $LOG_PATH/baseline.log 2>&1 &
+        --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
+        > "$LOG_PATH"/baseline.log 2>&1 &
     
     local BASELINE_PID=$!
     
     # Wait for baseline to start
     echo "Waiting for baseline instance to start..."
-    wait_for_server $PORT
+    wait_for_server "$PORT"
 
-    curl http://127.0.0.1:$PORT/v1/models
+    curl http://127.0.0.1:"$PORT"/v1/models
     echo ""
     
     # Run test in baseline mode
@@ -139,14 +139,14 @@ run_epd_1e_1pd() {
     # Start encoder instance
     echo "Starting encoder instance on GPU $GPU_E, port $ENCODE_PORT"
     CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
-        --port $ENCODE_PORT \
+        --port "$ENCODE_PORT" \
         --enforce-eager \
         --gpu-memory-utilization 0.01 \
         --enable-request-id-headers \
         --no-enable-prefix-caching \
         --max-num-batched-tokens 114688 \
         --max-num-seqs 128 \
-        --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+        --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
         --ec-transfer-config '{
             "ec_connector": "ECExampleConnector",
             "ec_role": "ec_producer",
@@ -154,18 +154,18 @@ run_epd_1e_1pd() {
                 "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
             }
         }' \
-        > $LOG_PATH/1e1pd_encoder.log 2>&1 &
+        > "$LOG_PATH"/1e1pd_encoder.log 2>&1 &
     PIDS+=($!)
     
     # Start prefill+decode instance
     echo "Starting PD instance on GPU $GPU_PD, port $PREFILL_DECODE_PORT"
     CUDA_VISIBLE_DEVICES="$GPU_PD" vllm serve "$MODEL" \
-        --port $PREFILL_DECODE_PORT \
+        --port "$PREFILL_DECODE_PORT" \
         --enforce-eager \
         --gpu-memory-utilization 0.7 \
         --enable-request-id-headers \
         --max-num-seqs 128 \
-        --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+        --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
         --ec-transfer-config '{
             "ec_connector": "ECExampleConnector",
             "ec_role": "ec_consumer",
@@ -173,32 +173,32 @@ run_epd_1e_1pd() {
                 "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
             }
         }' \
-        > $LOG_PATH/1e1pd_pd.log 2>&1 &
+        > "$LOG_PATH"/1e1pd_pd.log 2>&1 &
     PIDS+=($!)
     
     # Wait for instances to start
     echo "Waiting for encoder instance..."
-    wait_for_server $ENCODE_PORT
+    wait_for_server "$ENCODE_PORT"
     echo "Waiting for PD instance..."
-    wait_for_server $PREFILL_DECODE_PORT
+    wait_for_server "$PREFILL_DECODE_PORT"
 
     # Start proxy
     echo "Starting EPD proxy on port $PROXY_PORT"
     python "${GIT_ROOT}/examples/online_serving/disaggregated_encoder/disagg_epd_proxy.py" \
         --host "0.0.0.0" \
-        --port $PROXY_PORT \
+        --port "$PROXY_PORT" \
         --encode-servers-urls "http://localhost:$ENCODE_PORT" \
         --prefill-servers-urls "disable" \
         --decode-servers-urls "http://localhost:$PREFILL_DECODE_PORT" \
-        > $LOG_PATH/1e1pd_proxy.log 2>&1 &
+        > "$LOG_PATH"/1e1pd_proxy.log 2>&1 &
     PIDS+=($!)
     
     # Wait for proxy
     echo "Waiting for proxy..."
-    wait_for_server $PROXY_PORT
+    wait_for_server "$PROXY_PORT"
 
-    curl http://127.0.0.1:$PROXY_PORT/v1/models
-    curl http://127.0.0.1:$PROXY_PORT/health
+    curl http://127.0.0.1:"$PROXY_PORT"/v1/models
+    curl http://127.0.0.1:"$PROXY_PORT"/health
     echo ""
 
     echo "All EPD (1E+1PD) services are up!"
@@ -217,7 +217,7 @@ run_epd_1e_1pd() {
     echo "✓✓ 1E+1PD Correctness Test finished"
     echo "Stopping EPD (1E+1PD) instances..."
     for pid in "${PIDS[@]}"; do
-        kill $pid 2>/dev/null || true
+        kill "$pid" 2>/dev/null || true
     done
     sleep 2
     cleanup_instances
@@ -244,17 +244,17 @@ run_baseline_1p_1d() {
     CUDA_VISIBLE_DEVICES="$GPU_P" \
     VLLM_NIXL_SIDE_CHANNEL_PORT=5559 \
     vllm serve "$MODEL" \
-        --port $PREFILL_PORT \
+        --port "$PREFILL_PORT" \
         --enforce-eager \
         --gpu-memory-utilization 0.7 \
         --enable-request-id-headers \
         --max-num-seqs 128 \
-        --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+        --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
         --kv-transfer-config '{
             "kv_connector": "NixlConnector",
             "kv_role": "kv_producer"
         }' \
-        > $LOG_PATH/1p1d_prefill.log 2>&1 &
+        > "$LOG_PATH"/1p1d_prefill.log 2>&1 &
     PIDS+=($!)
     
     # Start decode instance
@@ -262,40 +262,40 @@ run_baseline_1p_1d() {
     CUDA_VISIBLE_DEVICES="$GPU_D" \
     VLLM_NIXL_SIDE_CHANNEL_PORT=6000 \
     vllm serve "$MODEL" \
-        --port $DECODE_PORT \
+        --port "$DECODE_PORT" \
         --enforce-eager \
         --gpu-memory-utilization 0.7 \
         --enable-request-id-headers \
         --max-num-seqs 128 \
-        --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+        --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
         --kv-transfer-config '{
             "kv_connector": "NixlConnector",
             "kv_role": "kv_consumer"
         }' \
-        > $LOG_PATH/1p1d_decode.log 2>&1 &
+        > "$LOG_PATH"/1p1d_decode.log 2>&1 &
     PIDS+=($!)
     
     # Wait for instances to start
     echo "Waiting for prefill instance..."
-    wait_for_server $PREFILL_PORT
+    wait_for_server "$PREFILL_PORT"
     echo "Waiting for decode instance..."
-    wait_for_server $DECODE_PORT
+    wait_for_server "$DECODE_PORT"
     
     # Start proxy
     echo "Starting EPD proxy on port $PROXY_PORT"
     python "${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py" \
         --host "0.0.0.0" \
-        --port $PROXY_PORT \
-        --prefiller-ports $PREFILL_PORT \
-        --decoder-ports $DECODE_PORT \
-        > $LOG_PATH/1p1d_proxy.log 2>&1 &
+        --port "$PROXY_PORT" \
+        --prefiller-ports "$PREFILL_PORT" \
+        --decoder-ports "$DECODE_PORT" \
+        > "$LOG_PATH"/1p1d_proxy.log 2>&1 &
     PIDS+=($!)
     
     # Wait for proxy
     echo "Waiting for proxy..."
-    wait_for_server $PROXY_PORT
+    wait_for_server "$PROXY_PORT"
 
-    curl http://127.0.0.1:$PROXY_PORT/healthcheck
+    curl http://127.0.0.1:"$PROXY_PORT"/healthcheck
     echo ""
 
     echo "All PD (1P+1D) services are up!"
@@ -313,7 +313,7 @@ run_baseline_1p_1d() {
     # Cleanup
     echo "Stopping PD (1P+1D) instances..."
     for pid in "${PIDS[@]}"; do
-        kill $pid 2>/dev/null || true
+        kill "$pid" 2>/dev/null || true
     done
     sleep 2
     cleanup_instances
@@ -339,14 +339,14 @@ run_epd_1e_1p_1d() {
     # Start encoder instance
     echo "Starting encoder instance on GPU $GPU_E, port $ENCODE_PORT"
     CUDA_VISIBLE_DEVICES="$GPU_E" vllm serve "$MODEL" \
-        --port $ENCODE_PORT \
+        --port "$ENCODE_PORT" \
         --enforce-eager \
         --gpu-memory-utilization 0.01 \
         --enable-request-id-headers \
         --no-enable-prefix-caching \
         --max-num-batched-tokens 114688 \
         --max-num-seqs 128 \
-        --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+        --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
         --ec-transfer-config '{
             "ec_connector": "ECExampleConnector",
             "ec_role": "ec_producer",
@@ -354,7 +354,7 @@ run_epd_1e_1p_1d() {
                 "shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
             }
         }' \
-        > $LOG_PATH/1e1p1d_encoder.log 2>&1 &
+        > "$LOG_PATH"/1e1p1d_encoder.log 2>&1 &
     PIDS+=($!)
     
     # Start prefill instance
@@ -362,12 +362,12 @@ run_epd_1e_1p_1d() {
     CUDA_VISIBLE_DEVICES="$GPU_P" \
     VLLM_NIXL_SIDE_CHANNEL_PORT=5559 \
     vllm serve "$MODEL" \
-        --port $PREFILL_PORT \
+        --port "$PREFILL_PORT" \
         --enforce-eager \
         --gpu-memory-utilization 0.7 \
         --enable-request-id-headers \
         --max-num-seqs 128 \
-        --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+        --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
         --ec-transfer-config '{
             "ec_connector": "ECExampleConnector",
             "ec_role": "ec_consumer",
@@ -379,7 +379,7 @@ run_epd_1e_1p_1d() {
             "kv_connector": "NixlConnector",
             "kv_role": "kv_producer"
         }' \
-        > $LOG_PATH/1e1p1d_prefill.log 2>&1 &
+        > "$LOG_PATH"/1e1p1d_prefill.log 2>&1 &
     PIDS+=($!)
     
     # Start decode instance
@@ -387,44 +387,44 @@ run_epd_1e_1p_1d() {
     CUDA_VISIBLE_DEVICES="$GPU_D" \
     VLLM_NIXL_SIDE_CHANNEL_PORT=6000 \
     vllm serve "$MODEL" \
-        --port $DECODE_PORT \
+        --port "$DECODE_PORT" \
         --enforce-eager \
         --gpu-memory-utilization 0.7 \
         --enable-request-id-headers \
         --max-num-seqs 128 \
-        --allowed-local-media-path ${GIT_ROOT}/tests/v1/ec_connector/integration \
+        --allowed-local-media-path "${GIT_ROOT}"/tests/v1/ec_connector/integration \
         --kv-transfer-config '{
             "kv_connector": "NixlConnector",
             "kv_role": "kv_consumer"
         }' \
-        > $LOG_PATH/1e1p1d_decode.log 2>&1 &
+        > "$LOG_PATH"/1e1p1d_decode.log 2>&1 &
     PIDS+=($!)
     
     # Wait for instances to start
     echo "Waiting for encoder instance..."
-    wait_for_server $ENCODE_PORT
+    wait_for_server "$ENCODE_PORT"
     echo "Waiting for prefill instance..."
-    wait_for_server $PREFILL_PORT
+    wait_for_server "$PREFILL_PORT"
     echo "Waiting for decode instance..."
-    wait_for_server $DECODE_PORT
+    wait_for_server "$DECODE_PORT"
     
     # Start proxy
     echo "Starting EPD proxy on port $PROXY_PORT"
     python "${GIT_ROOT}/examples/online_serving/disaggregated_encoder/disagg_epd_proxy.py" \
         --host "0.0.0.0" \
-        --port $PROXY_PORT \
+        --port "$PROXY_PORT" \
         --encode-servers-urls "http://localhost:$ENCODE_PORT" \
         --prefill-servers-urls "http://localhost:$PREFILL_PORT" \
         --decode-servers-urls "http://localhost:$DECODE_PORT" \
-        > $LOG_PATH/1e1p1d_proxy.log 2>&1 &
+        > "$LOG_PATH"/1e1p1d_proxy.log 2>&1 &
     PIDS+=($!)
     
     # Wait for proxy
     echo "Waiting for proxy..."
-    wait_for_server $PROXY_PORT
+    wait_for_server "$PROXY_PORT"
 
-    curl http://127.0.0.1:$PROXY_PORT/v1/models
-    curl http://127.0.0.1:$PROXY_PORT/health
+    curl http://127.0.0.1:"$PROXY_PORT"/v1/models
+    curl http://127.0.0.1:"$PROXY_PORT"/health
     echo ""
 
     echo "All EPD (1E+1P+1D) services are up!"
@@ -443,7 +443,7 @@ run_epd_1e_1p_1d() {
     echo "✓✓ 1E+1P+1D Correctness Test finished"
     echo "Stopping EPD (1E+1P+1D) instances..."
     for pid in "${PIDS[@]}"; do
-        kill $pid 2>/dev/null || true
+        kill "$pid" 2>/dev/null || true
     done
     sleep 2
     cleanup_instances
diff --git a/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
index cdbcdca54..abdf88ad6 100755
--- a/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh
@@ -32,9 +32,14 @@ run_tests() {
 
   echo "=== Running tests (${label}) ==="
   for cfg in "${configs[@]}"; do
+    local -a cfg_parts extra_args_parts
+    read -r -a cfg_parts <<< "$cfg"
+    read -r -a extra_args_parts <<< "$extra_args"
+
     echo "-> Running with ${cfg} ${extra_args:+and ${extra_args}}"
     # Use 'env' to safely set variables without eval
-    if ! env ${cfg} bash "${SCRIPT}" ${extra_args}; then
+    # keep argv splitting safe and SC2086-clean via arrays.
+    if ! env "${cfg_parts[@]}" bash "${SCRIPT}" "${extra_args_parts[@]}"; then
       echo "❌ Test failed for config: ${cfg} ${extra_args:+(${extra_args})}"
       exit 1
     fi
diff --git a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
index 560ce4407..58ae42126 100755
--- a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
@@ -109,9 +109,9 @@ get_model_args() {
 
 get_num_gpus() {
   if [[ "$SMI_BIN" == *"nvidia"* ]]; then
-    echo "$($SMI_BIN --query-gpu=name --format=csv,noheader | wc -l)"
+    $SMI_BIN --query-gpu=name --format=csv,noheader | wc -l
   elif [[ "$SMI_BIN" == *"rocm"* ]]; then
-    echo "$($SMI_BIN -l | grep GPU | wc -l)"
+    $SMI_BIN -l | grep -c GPU
   else
     # works for non-cuda platforms,
     # assuming at least 1 device and
@@ -182,7 +182,7 @@ run_tests_for_model() {
 
     # Store host and port for proxy configuration
     PREFILL_HOSTS+=("localhost")
-    PREFILL_PORTS+=($PORT)
+    PREFILL_PORTS+=("$PORT")
   done
 
   # Start decode instances
@@ -237,30 +237,30 @@ run_tests_for_model() {
 
     # Store host and port for proxy configuration
     DECODE_HOSTS+=("localhost")
-    DECODE_PORTS+=($PORT)
+    DECODE_PORTS+=("$PORT")
   done
 
   # Wait for all instances to start
   for PORT in "${PREFILL_PORTS[@]}"; do
     echo "Waiting for prefill instance on port $PORT to start..."
-    wait_for_server $PORT
+    wait_for_server "$PORT"
   done
 
   for PORT in "${DECODE_PORTS[@]}"; do
     echo "Waiting for decode instance on port $PORT to start..."
-    wait_for_server $PORT
+    wait_for_server "$PORT"
   done
 
   # Build the command for the proxy server with all the hosts and ports
   PROXY_CMD="python3 ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/toy_proxy_server.py --port 8192"
 
   # Add all prefill hosts and ports
-  PROXY_CMD+=" --prefiller-hosts ${PREFILL_HOSTS[@]}"
-  PROXY_CMD+=" --prefiller-ports ${PREFILL_PORTS[@]}"
+  PROXY_CMD+=" --prefiller-hosts ${PREFILL_HOSTS[*]}"
+  PROXY_CMD+=" --prefiller-ports ${PREFILL_PORTS[*]}"
 
   # Add all decode hosts and ports
-  PROXY_CMD+=" --decoder-hosts ${DECODE_HOSTS[@]}"
-  PROXY_CMD+=" --decoder-ports ${DECODE_PORTS[@]}"
+  PROXY_CMD+=" --decoder-hosts ${DECODE_HOSTS[*]}"
+  PROXY_CMD+=" --decoder-ports ${DECODE_PORTS[*]}"
 
   # Start the proxy server
   echo "Starting proxy server with command: $PROXY_CMD"
@@ -271,7 +271,7 @@ run_tests_for_model() {
 
   # Run lm eval for this model
   echo "Running tests for $model_name"
-  TEST_MODEL=$model_name python3 -m pytest -s -x ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/test_accuracy.py
+  TEST_MODEL=$model_name python3 -m pytest -s -x "${GIT_ROOT}"/tests/v1/kv_connector/nixl_integration/test_accuracy.py
 
   # Clean up before running next model
   cleanup_instances
diff --git a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh
index c48b452e2..23b2a0b1c 100755
--- a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh
@@ -114,10 +114,10 @@ run_tests_for_model() {
   eval "$FULL_CMD &"
 
   # Wait for all instances to start
-  echo "Waiting for prefill instance on port $PORT to start..."
-  wait_for_server $PREFILL_PORT
-  echo "Waiting for decode instance on port $PORT to start..."
-  wait_for_server $DECODE_PORT
+  echo "Waiting for prefill instance on port $PREFILL_PORT to start..."
+  wait_for_server "$PREFILL_PORT"
+  echo "Waiting for decode instance on port $DECODE_PORT to start..."
+  wait_for_server "$DECODE_PORT"
 
   # Build the command for the proxy server with all the hosts and ports
   PROXY_PORT=8192
@@ -133,7 +133,7 @@ run_tests_for_model() {
 
   # Run lm eval for this model
   echo "Running tests for $model_name"
-  PREFILL_PORT=$PREFILL_PORT DECODE_PORT=$DECODE_PORT PROXY_PORT=$PROXY_PORT python -m pytest -s -v ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/test_edge_cases.py
+  PREFILL_PORT=$PREFILL_PORT DECODE_PORT=$DECODE_PORT PROXY_PORT=$PROXY_PORT python -m pytest -s -v "${GIT_ROOT}"/tests/v1/kv_connector/nixl_integration/test_edge_cases.py
 
   # Clean up before running next model
   cleanup_instances
diff --git a/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh
index fa1738bb3..407542eb8 100644
--- a/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh
@@ -63,8 +63,8 @@ launch_baseline() {
       --block-size ${BLOCK_SIZE} \
       --gpu-memory-utilization 0.5 \
       --enforce-eager"
-  echo ${BASELINE_BASE_CMD}
-  ssh -tt ${BASELINE_HOST} "${BASELINE_BASE_CMD}" &
+  echo "${BASELINE_BASE_CMD}"
+  ssh -tt "${BASELINE_HOST}" "${BASELINE_BASE_CMD}" &
 }
 
 launch_pd() {
@@ -103,17 +103,17 @@ launch_pd() {
       --gpu-memory-utilization 0.5 \
       --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"cpu\"}'"
 
-  echo ${PREFILL_BASE_CMD}
-  echo ${DECODE_BASE_CMD}
+  echo "${PREFILL_BASE_CMD}"
+  echo "${DECODE_BASE_CMD}"
   sleep 2
 
   # execute on hosts
-  ssh -tt ${PREFILL_HOST} "${PREFILL_BASE_CMD}" &
-  ssh -tt ${DECODE_HOST} "${DECODE_BASE_CMD}" &
+  ssh -tt "${PREFILL_HOST}" "${PREFILL_BASE_CMD}" &
+  ssh -tt "${DECODE_HOST}" "${DECODE_BASE_CMD}" &
   sleep 1
-  wait_for_server ${PREFILL_HOST} ${PREFILL_PORT}
+  wait_for_server "${PREFILL_HOST}" "${PREFILL_PORT}"
   sleep 1
-  wait_for_server ${DECODE_HOST} ${DECODE_PORT}
+  wait_for_server "${DECODE_HOST}" "${DECODE_PORT}"
   sleep 1
 }
 
@@ -123,21 +123,21 @@ launch_pd_proxy(){
   --prefiller-host ${PREFILL_HOST} --prefiller-port ${PREFILL_PORT} \
   --decoder-host ${DECODE_HOST} --decoder-port ${DECODE_PORT} \
   --host=${PROXY_HOST} --port ${PROXY_PORT}"
-  echo ${PROXY_BASE_CMD}
-  ssh -tt ${PROXY_HOST} "${PROXY_BASE_CMD}" &
+  echo "${PROXY_BASE_CMD}"
+  ssh -tt "${PROXY_HOST}" "${PROXY_BASE_CMD}" &
 }
 
 run_tests(){
   local service_url=$1
   local mode=$2
-  python3 ${EXP_ROOT}/test_disagg_accuracy.py --service_url=${service_url} --model_name=${MODEL_NAME} --mode=${mode} --file_name=${OUTPUT_FILE}
+  python3 "${EXP_ROOT}"/test_disagg_accuracy.py --service_url="${service_url}" --model_name="${MODEL_NAME}" --mode="${mode}" --file_name="${OUTPUT_FILE}"
 }
 
 
 # run non-disagg. baseline & save outputs
 launch_baseline
 sleep 2
-wait_for_server ${BASELINE_HOST} ${BASELINE_PORT}
+wait_for_server "${BASELINE_HOST}" "${BASELINE_PORT}"
 run_tests "http://${BASELINE_HOST}:${BASELINE_PORT}" "baseline"
 cleanup
 sleep 10
@@ -150,7 +150,7 @@ sleep 10
 run_tests "http://${PROXY_HOST}:${PROXY_PORT}" "disagg"
 echo "-----P/D success----"
 
-rm ${OUTPUT_FILE}
+rm "${OUTPUT_FILE}"
 cleanup
 
 exit 0
\ No newline at end of file
diff --git a/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh b/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh
index 3d6382237..f32ef5e76 100644
--- a/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh
+++ b/tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh
@@ -86,17 +86,17 @@ launch_pd() {
       --gpu-memory-utilization 0.5 \
       --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"cpu\"}'"
 
-  echo ${PREFILL_BASE_CMD}
-  echo ${DECODE_BASE_CMD}
+  echo "${PREFILL_BASE_CMD}"
+  echo "${DECODE_BASE_CMD}"
   sleep 2
 
   # execute on hosts
-  ssh -tt ${PREFILL_HOST} "${PREFILL_BASE_CMD}" &
-  ssh -tt ${DECODE_HOST} "${DECODE_BASE_CMD}" &
+  ssh -tt "${PREFILL_HOST}" "${PREFILL_BASE_CMD}" &
+  ssh -tt "${DECODE_HOST}" "${DECODE_BASE_CMD}" &
   sleep 1
-  wait_for_server ${PREFILL_HOST} ${PREFILL_PORT}
+  wait_for_server "${PREFILL_HOST}" "${PREFILL_PORT}"
   sleep 1
-  wait_for_server ${DECODE_HOST} ${DECODE_PORT}
+  wait_for_server "${DECODE_HOST}" "${DECODE_PORT}"
   sleep 1
 }
 
@@ -106,8 +106,8 @@ launch_pd_proxy(){
   --prefiller-host ${PREFILL_HOST} --prefiller-port ${PREFILL_PORT} \
   --decoder-host ${DECODE_HOST} --decoder-port ${DECODE_PORT} \
   --host=${PROXY_HOST} --port ${PROXY_PORT}"
-  echo ${PROXY_BASE_CMD}
-  ssh -tt ${PROXY_HOST} "${PROXY_BASE_CMD}" &
+  echo "${PROXY_BASE_CMD}"
+  ssh -tt "${PROXY_HOST}" "${PROXY_BASE_CMD}" &
 }
 
 
@@ -121,4 +121,4 @@ PREFILL_PORT=${PREFILL_PORT} \
 DECODE_HOST=${DECODE_HOST} \
 DECODE_PORT=${DECODE_PORT} \
 PROXY_HOST=${PROXY_HOST} \
-PROXY_PORT=${PROXY_PORT} python -m pytest -s -v ${GIT_ROOT}/tests/v1/kv_connector/nixl_integration/test_edge_cases.py
\ No newline at end of file
+PROXY_PORT=${PROXY_PORT} python -m pytest -s -v "${GIT_ROOT}"/tests/v1/kv_connector/nixl_integration/test_edge_cases.py
diff --git a/tools/ep_kernels/elastic_ep/install_eep_libraries.sh b/tools/ep_kernels/elastic_ep/install_eep_libraries.sh
index 9d7dc1032..fe7b86215 100755
--- a/tools/ep_kernels/elastic_ep/install_eep_libraries.sh
+++ b/tools/ep_kernels/elastic_ep/install_eep_libraries.sh
@@ -23,7 +23,7 @@ while getopts "w:n" opt; do
 done
 
 if [ ! -d "$WORKSPACE" ]; then
-    mkdir -p $WORKSPACE
+    mkdir -p "$WORKSPACE"
 fi
 
 
@@ -31,7 +31,7 @@ fi
 pip3 install cmake torch ninja
 
 # build nvshmem
-pushd $WORKSPACE
+pushd "$WORKSPACE"
 # Reset NVSHMEM build if requested
 if [ "$INSTALL_NVSHMEM" = true ]; then
     mkdir -p nvshmem_src
@@ -69,15 +69,15 @@ export NVSHMEM_BUILD_HYDRA_LAUNCHER=0
 export NVSHMEM_BUILD_TXZ_PACKAGE=0
 export NVSHMEM_TIMEOUT_DEVICE_POLLING=0
 
-cmake -G Ninja -S . -B $WORKSPACE/nvshmem_build/ -DCMAKE_INSTALL_PREFIX=$WORKSPACE/nvshmem_install
-cmake --build $WORKSPACE/nvshmem_build/ --target install
+cmake -G Ninja -S . -B "$WORKSPACE"/nvshmem_build/ -DCMAKE_INSTALL_PREFIX="$WORKSPACE"/nvshmem_install
+cmake --build "$WORKSPACE"/nvshmem_build/ --target install
 
 popd
 
 export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem_install:$CMAKE_PREFIX_PATH
 
 # build and install pplx, require pytorch installed
-pushd $WORKSPACE
+pushd "$WORKSPACE"
 git clone https://github.com/ppl-ai/pplx-kernels
 cd pplx-kernels
 # see https://github.com/pypa/pip/issues/9955#issuecomment-838065925
diff --git a/tools/ep_kernels/install_python_libraries.sh b/tools/ep_kernels/install_python_libraries.sh
index 89da24f95..148cb6e18 100755
--- a/tools/ep_kernels/install_python_libraries.sh
+++ b/tools/ep_kernels/install_python_libraries.sh
@@ -14,7 +14,7 @@ DEEPEP_COMMIT_HASH=${DEEPEP_COMMIT_HASH:-"73b6ea4"}
 NVSHMEM_VER=${NVSHMEM_VER:-"3.3.24"}  # Default supports both CUDA 12 and 13
 WORKSPACE=${WORKSPACE:-$(pwd)/ep_kernels_workspace}
 MODE=${MODE:-install}
-CUDA_VERSION_MAJOR=$(${CUDA_HOME}/bin/nvcc --version | egrep -o "release [0-9]+" | cut -d ' ' -f 2)
+CUDA_VERSION_MAJOR=$("${CUDA_HOME}"/bin/nvcc --version | grep -E -o "release [0-9]+" | cut -d ' ' -f 2)
 
 # Parse arguments
 while [[ $# -gt 0 ]]; do
diff --git a/tools/flashinfer-build.sh b/tools/flashinfer-build.sh
index b3cc6c308..8bb630070 100755
--- a/tools/flashinfer-build.sh
+++ b/tools/flashinfer-build.sh
@@ -5,8 +5,6 @@ set -ex
 
 # FlashInfer configuration
 FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
-FLASHINFER_GIT_REF="${FLASHINFER_GIT_REF}"
-CUDA_VERSION="${CUDA_VERSION}"
 BUILD_WHEEL="${BUILD_WHEEL:-true}"
 
 if [[ -z "${FLASHINFER_GIT_REF}" ]]; then
@@ -23,7 +21,7 @@ echo "🏗️  Building FlashInfer ${FLASHINFER_GIT_REF} for CUDA ${CUDA_VERSION
 
 # Clone FlashInfer
 git clone --depth 1 --recursive --shallow-submodules \
-    --branch ${FLASHINFER_GIT_REF} \
+    --branch "${FLASHINFER_GIT_REF}" \
     ${FLASHINFER_GIT_REPO} flashinfer
 
 # Set CUDA arch list based on CUDA version
@@ -44,7 +42,7 @@ echo "🏗️ Building FlashInfer AOT for arches: ${FI_TORCH_CUDA_ARCH_LIST}"
 
 pushd flashinfer
     # Make sure the wheel is built for the correct CUDA version
-    export UV_TORCH_BACKEND=cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
+    export UV_TORCH_BACKEND=cu$(echo "$CUDA_VERSION" | cut -d. -f1,2 | tr -d '.')
 
     # Build AOT kernels
     export TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}"
@@ -63,4 +61,4 @@ pushd flashinfer
 popd
 
 # Cleanup
-rm -rf flashinfer
\ No newline at end of file
+rm -rf flashinfer
diff --git a/tools/install_deepgemm.sh b/tools/install_deepgemm.sh
index 1c316ee78..0e1adda97 100755
--- a/tools/install_deepgemm.sh
+++ b/tools/install_deepgemm.sh
@@ -65,7 +65,7 @@ fi
 
 # Extract major and minor version numbers
 CUDA_MAJOR="${CUDA_VERSION%%.*}"
-CUDA_MINOR="${CUDA_VERSION#${CUDA_MAJOR}.}"
+CUDA_MINOR="${CUDA_VERSION#"${CUDA_MAJOR}".}"
 CUDA_MINOR="${CUDA_MINOR%%.*}"
 echo "CUDA version: $CUDA_VERSION (major: $CUDA_MAJOR, minor: $CUDA_MINOR)"
 
@@ -92,7 +92,7 @@ git checkout "$DEEPGEMM_GIT_REF"
 
 # Clean previous build artifacts
 # (Based on https://github.com/deepseek-ai/DeepGEMM/blob/main/install.sh)
-rm -rf build dist *.egg-info
+rm -rf -- build dist *.egg-info 2>/dev/null || true
 
 # Build wheel
 echo "🏗️  Building DeepGEMM wheel..."
diff --git a/tools/pre_commit/shellcheck.baseline b/tools/pre_commit/shellcheck.baseline
deleted file mode 100644
index 7433bb331..000000000
--- a/tools/pre_commit/shellcheck.baseline
+++ /dev/null
@@ -1,89 +0,0 @@
-benchmarks/auto_tune/auto_tune.sh:SC2034
-benchmarks/auto_tune/auto_tune.sh:SC2086
-benchmarks/auto_tune/batch_auto_tune.sh:SC2086
-benchmarks/run_structured_output_benchmark.sh:SC2028
-benchmarks/run_structured_output_benchmark.sh:SC2034
-benchmarks/run_structured_output_benchmark.sh:SC2086
-.buildkite/image_build/image_build_cpu_arm64.sh:SC2086
-.buildkite/image_build/image_build_cpu.sh:SC2086
-.buildkite/image_build/image_build_hpu.sh:SC2086
-.buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh:SC2086
-.buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh:SC2034
-.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh:SC2027
-.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh:SC2086
-.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh:SC2126
-.buildkite/scripts/annotate-rocm-release.sh:SC2086
-.buildkite/scripts/cache-rocm-base-wheels.sh:SC2012
-.buildkite/scripts/cherry-pick-from-milestone.sh:SC2064
-.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh:SC2086
-.buildkite/scripts/hardware_ci/run-cpu-test.sh:SC2086
-.buildkite/scripts/hardware_ci/run-hpu-test.sh:SC2086
-.buildkite/scripts/hardware_ci/run-npu-test.sh:SC1090
-.buildkite/scripts/hardware_ci/run-npu-test.sh:SC2006
-.buildkite/scripts/hardware_ci/run-npu-test.sh:SC2086
-.buildkite/scripts/hardware_ci/run-npu-test.sh:SC2181
-.buildkite/scripts/hardware_ci/run-xpu-test.sh:SC2086
-.buildkite/scripts/push-nightly-builds.sh:SC2086
-.buildkite/scripts/run-multi-node-test.sh:SC2086
-.buildkite/scripts/run-multi-node-test.sh:SC2089
-.buildkite/scripts/run-multi-node-test.sh:SC2090
-.buildkite/scripts/run-prime-rl-test.sh:SC2086
-.buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh:SC2086
-.buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh:SC2086
-.buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh:SC2086
-.buildkite/scripts/tpu/docker_run_bm.sh:SC1090
-.buildkite/scripts/tpu/docker_run_bm.sh:SC2086
-.buildkite/scripts/tpu/run_bm.sh:SC2034
-.buildkite/scripts/tpu/run_bm.sh:SC2086
-.buildkite/scripts/upload-nightly-wheels.sh:SC2086
-.buildkite/scripts/upload-nightly-wheels.sh:SC2115
-.buildkite/scripts/upload-nightly-wheels.sh:SC2236
-.buildkite/scripts/upload-release-wheels-pypi.sh:SC2086
-.buildkite/scripts/upload-rocm-wheels.sh:SC2012
-examples/online_serving/disaggregated_encoder/disagg_1e1p1d_example.sh:SC2086
-examples/online_serving/disaggregated_encoder/disagg_1e1pd_example.sh:SC2086
-examples/online_serving/disaggregated_prefill.sh:SC2086
-examples/online_serving/disaggregated_serving/kv_events.sh:SC2086
-examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh:SC2046
-examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh:SC2086
-examples/online_serving/disaggregated_serving/mooncake_connector/run_mooncake_connector.sh:SC2317
-examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh:SC2046
-examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh:SC2086
-examples/online_serving/disaggregated_serving_p2p_nccl_xpyd/disagg_example_p2p_nccl_xpyd.sh:SC2317
-examples/online_serving/elastic_ep/bench.sh:SC2086
-examples/online_serving/elastic_ep/serve_deepseek_v2.sh:SC2086
-examples/online_serving/multi-node-serving.sh:SC2006
-examples/online_serving/multi-node-serving.sh:SC2086
-examples/online_serving/multi-node-serving.sh:SC2181
-examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh:SC2046
-examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh:SC2126
-examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh:SC2181
-examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_example_nixl.sh:SC2206
-examples/others/lmcache/disagg_prefill_lmcache_v1/disagg_vllm_launcher.sh:SC2086
-examples/pooling/embed/openai_embedding_long_text/service.sh:SC2086
-tests/standalone_tests/python_only_compile.sh:SC2086
-tests/v1/ec_connector/integration/run_epd_correctness_test.sh:SC2086
-tests/v1/kv_connector/nixl_integration/config_sweep_accuracy_test.sh:SC2086
-tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh:SC2005
-tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh:SC2086
-tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh:SC2124
-tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh:SC2126
-tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh:SC2206
-tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh:SC2086
-tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh:SC2153
-tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh:SC2086
-tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh:SC2089
-tests/v1/kv_connector/nixl_integration/run_tpu_disagg_accuracy_test.sh:SC2090
-tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh:SC2086
-tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh:SC2089
-tests/v1/kv_connector/nixl_integration/run_tpu_edge_case_test.sh:SC2090
-tools/ep_kernels/elastic_ep/install_eep_libraries.sh:SC2086
-tools/ep_kernels/install_python_libraries.sh:SC2086
-tools/ep_kernels/install_python_libraries.sh:SC2196
-tools/flashinfer-build.sh:SC2086
-tools/flashinfer-build.sh:SC2269
-tools/install_deepgemm.sh:SC2035
-tools/install_deepgemm.sh:SC2295
-tools/pre_commit/shellcheck.sh:SC2016
-tools/vllm-rocm/generate-rocm-wheels-root-index.sh:SC2295
-tools/vllm-tpu/build.sh:SC2145
diff --git a/tools/pre_commit/shellcheck.sh b/tools/pre_commit/shellcheck.sh
index 4adee5d57..557f41f29 100755
--- a/tools/pre_commit/shellcheck.sh
+++ b/tools/pre_commit/shellcheck.sh
@@ -2,7 +2,6 @@
 set -euo pipefail
 
 scversion="stable"
-baseline="tools/pre_commit/shellcheck.baseline"
 
 if [ -d "shellcheck-${scversion}" ]; then
     export PATH="$PATH:$(pwd)/shellcheck-${scversion}"
@@ -20,38 +19,6 @@ if ! [ -x "$(command -v shellcheck)" ]; then
 fi
 
 # TODO - fix warnings in .buildkite/scripts/hardware_ci/run-amd-test.sh
-# collects warnings as "file:SCcode" pairs for baseline comparison.
-collect() {
-  find . -path ./.git -prune -o -name "*.sh" \
-    -not -path "./.buildkite/scripts/hardware_ci/run-amd-test.sh" -print0 | \
-    xargs -0 sh -c 'for f in "$@"; do git check-ignore -q "$f" || shellcheck -s bash -f gcc "$f" || true; done' -- | \
-    sed -nE 's|^\./||; s|^([^:]+):[0-9]+:[0-9]+:.*\[(SC[0-9]+)\]$|\1:\2|p' | \
-    sort -u
-}
-
-if [[ "${1:-}" == "--generate-baseline" ]]; then
-  collect > "$baseline"
-  echo "Wrote baseline to $baseline"
-  exit 0
-fi
-
-if [[ ! -f "$baseline" ]]; then
-  echo "Baseline not found: $baseline (run: $0 --generate-baseline)"
-  exit 1
-fi
-
-current="$(mktemp)"
-trap 'rm -f "$current"' EXIT
-collect > "$current"
-
-# finds new warnings not in baseline
-new_errors="$(comm -23 "$current" <(sort -u "$baseline") || true)"
-if [ -n "$new_errors" ]; then
-  echo "$new_errors" | cut -d: -f1 | sort -u | while IFS= read -r file; do
-    if [[ -f "$file" ]]; then
-      codes=$(echo "$new_errors" | awk -F: -v f="$file" '$1==f {print $2}' | paste -sd ',' -)
-      shellcheck -s bash --include="$codes" "$file" 2>&1 || true
-    fi
-  done
-  exit 1
-fi
+find . -path ./.git -prune -o -name "*.sh" \
+  -not -path "./.buildkite/scripts/hardware_ci/run-amd-test.sh" -print0 | \
+  xargs -0 sh -c "for f in \"\$@\"; do git check-ignore -q \"\$f\" || shellcheck -s bash \"\$f\"; done" --
diff --git a/tools/vllm-rocm/generate-rocm-wheels-root-index.sh b/tools/vllm-rocm/generate-rocm-wheels-root-index.sh
index 02b4fbdd0..87b5c3228 100755
--- a/tools/vllm-rocm/generate-rocm-wheels-root-index.sh
+++ b/tools/vllm-rocm/generate-rocm-wheels-root-index.sh
@@ -190,7 +190,7 @@ echo ""
 # List what would be uploaded
 echo "Files to upload:"
 find "$WORK_DIR/output" -name "*.html" -type f | while read -r file; do
-    rel_path="${file#$WORK_DIR/output/}"
+    rel_path="${file#"$WORK_DIR"/output/}"
     echo "  rocm/$rel_path"
 done
 echo ""
diff --git a/tools/vllm-tpu/build.sh b/tools/vllm-tpu/build.sh
index 45ef8dfcb..aa46a5298 100755
--- a/tools/vllm-tpu/build.sh
+++ b/tools/vllm-tpu/build.sh
@@ -38,7 +38,7 @@ if ! grep -q "name = \"vllm-tpu\"" "$PYPROJECT_FILE"; then
     cp "$PYPROJECT_FILE" "${PYPROJECT_FILE}.bak"
     sed -i '0,/^name = "vllm"/s//name = "vllm-tpu"/' "$PYPROJECT_FILE"
 
-    echo "Patching ${CHANGE_FILE_LIST[@]} vllm to vllm-tpu..."
+    echo "Patching ${CHANGE_FILE_LIST[*]} vllm to vllm-tpu..."
     # patching
     #   importlib.metadata.version('vllm') -> importlib.metadata.version('vllm-tpu')
     #   importlib.metadata.version("vllm") -> importlib.metadata.version("vllm-tpu")