[CI/Build] Add shell script linting using shellcheck (#7925)

Signed-off-by: Russell Bryant <rbryant@redhat.com>
2024-11-07 13:17:29 -05:00
parent de0e61a323
commit 3be5b26a76
28 changed files with 204 additions and 129 deletions
--- a/.buildkite/nightly-benchmarks/scripts/launch-server.sh
+++ b/.buildkite/nightly-benchmarks/scripts/launch-server.sh
@@ -50,31 +50,30 @@ launch_trt_server() {
  git clone https://github.com/triton-inference-server/tensorrtllm_backend.git
  git lfs install
  cd tensorrtllm_backend
-  git checkout $trt_llm_version
-  tensorrtllm_backend_dir=$(pwd)
+  git checkout "$trt_llm_version"
  git submodule update --init --recursive

  # build trtllm engine
  cd /tensorrtllm_backend
-  cd ./tensorrt_llm/examples/${model_type}
+  cd "./tensorrt_llm/examples/${model_type}"
  python3 convert_checkpoint.py \
-    --model_dir ${model_path} \
-    --dtype ${model_dtype} \
-    --tp_size ${model_tp_size} \
-    --output_dir ${trt_model_path}
+    --model_dir "${model_path}" \
+    --dtype "${model_dtype}" \
+    --tp_size "${model_tp_size}" \
+    --output_dir "${trt_model_path}"
  trtllm-build \
-    --checkpoint_dir ${trt_model_path} \
+    --checkpoint_dir "${trt_model_path}" \
    --use_fused_mlp \
    --reduce_fusion disable \
    --workers 8 \
-    --gpt_attention_plugin ${model_dtype} \
-    --gemm_plugin ${model_dtype} \
-    --tp_size ${model_tp_size} \
-    --max_batch_size ${max_batch_size} \
-    --max_input_len ${max_input_len} \
-    --max_seq_len ${max_seq_len} \
-    --max_num_tokens ${max_num_tokens} \
-    --output_dir ${trt_engine_path}
+    --gpt_attention_plugin "${model_dtype}" \
+    --gemm_plugin "${model_dtype}" \
+    --tp_size "${model_tp_size}" \
+    --max_batch_size "${max_batch_size}" \
+    --max_input_len "${max_input_len}" \
+    --max_seq_len "${max_seq_len}" \
+    --max_num_tokens "${max_num_tokens}" \
+    --output_dir "${trt_engine_path}"

  # handle triton protobuf files and launch triton server
  cd /tensorrtllm_backend
@@ -82,15 +81,15 @@ launch_trt_server() {
  cp -r all_models/inflight_batcher_llm/* triton_model_repo/
  cd triton_model_repo
  rm -rf ./tensorrt_llm/1/*
-  cp -r ${trt_engine_path}/* ./tensorrt_llm/1
+  cp -r "${trt_engine_path}"/* ./tensorrt_llm/1
  python3 ../tools/fill_template.py -i tensorrt_llm/config.pbtxt triton_backend:tensorrtllm,engine_dir:/tensorrtllm_backend/triton_model_repo/tensorrt_llm/1,decoupled_mode:true,batching_strategy:inflight_fused_batching,batch_scheduler_policy:guaranteed_no_evict,exclude_input_in_output:true,triton_max_batch_size:2048,max_queue_delay_microseconds:0,max_beam_width:1,max_queue_size:2048,enable_kv_cache_reuse:false
-  python3 ../tools/fill_template.py -i preprocessing/config.pbtxt triton_max_batch_size:2048,tokenizer_dir:$model_path,preprocessing_instance_count:5
-  python3 ../tools/fill_template.py -i postprocessing/config.pbtxt triton_max_batch_size:2048,tokenizer_dir:$model_path,postprocessing_instance_count:5,skip_special_tokens:false
-  python3 ../tools/fill_template.py -i ensemble/config.pbtxt triton_max_batch_size:$max_batch_size
-  python3 ../tools/fill_template.py -i tensorrt_llm_bls/config.pbtxt triton_max_batch_size:$max_batch_size,decoupled_mode:true,accumulate_tokens:"False",bls_instance_count:1
+  python3 ../tools/fill_template.py -i preprocessing/config.pbtxt "triton_max_batch_size:2048,tokenizer_dir:$model_path,preprocessing_instance_count:5"
+  python3 ../tools/fill_template.py -i postprocessing/config.pbtxt "triton_max_batch_size:2048,tokenizer_dir:$model_path,postprocessing_instance_count:5,skip_special_tokens:false"
+  python3 ../tools/fill_template.py -i ensemble/config.pbtxt triton_max_batch_size:"$max_batch_size"
+  python3 ../tools/fill_template.py -i tensorrt_llm_bls/config.pbtxt "triton_max_batch_size:$max_batch_size,decoupled_mode:true,accumulate_tokens:False,bls_instance_count:1"
  cd /tensorrtllm_backend
  python3 scripts/launch_triton_server.py \
-    --world_size=${model_tp_size} \
+    --world_size="${model_tp_size}" \
    --model_repo=/tensorrtllm_backend/triton_model_repo &

 }
@@ -98,10 +97,7 @@ launch_trt_server() {
 launch_tgi_server() {
  model=$(echo "$common_params" | jq -r '.model')
  tp=$(echo "$common_params" | jq -r '.tp')
-  dataset_name=$(echo "$common_params" | jq -r '.dataset_name')
-  dataset_path=$(echo "$common_params" | jq -r '.dataset_path')
  port=$(echo "$common_params" | jq -r '.port')
-  num_prompts=$(echo "$common_params" | jq -r '.num_prompts')
  server_args=$(json2args "$server_params")

  if echo "$common_params" | jq -e 'has("fp8")' >/dev/null; then
@@ -129,10 +125,7 @@ launch_tgi_server() {
 launch_lmdeploy_server() {
  model=$(echo "$common_params" | jq -r '.model')
  tp=$(echo "$common_params" | jq -r '.tp')
-  dataset_name=$(echo "$common_params" | jq -r '.dataset_name')
-  dataset_path=$(echo "$common_params" | jq -r '.dataset_path')
  port=$(echo "$common_params" | jq -r '.port')
-  num_prompts=$(echo "$common_params" | jq -r '.num_prompts')
  server_args=$(json2args "$server_params")

  server_command="lmdeploy serve api_server $model \
@@ -149,10 +142,7 @@ launch_sglang_server() {

  model=$(echo "$common_params" | jq -r '.model')
  tp=$(echo "$common_params" | jq -r '.tp')
-  dataset_name=$(echo "$common_params" | jq -r '.dataset_name')
-  dataset_path=$(echo "$common_params" | jq -r '.dataset_path')
  port=$(echo "$common_params" | jq -r '.port')
-  num_prompts=$(echo "$common_params" | jq -r '.num_prompts')
  server_args=$(json2args "$server_params")

  if echo "$common_params" | jq -e 'has("fp8")' >/dev/null; then
@@ -185,10 +175,7 @@ launch_vllm_server() {

  model=$(echo "$common_params" | jq -r '.model')
  tp=$(echo "$common_params" | jq -r '.tp')
-  dataset_name=$(echo "$common_params" | jq -r '.dataset_name')
-  dataset_path=$(echo "$common_params" | jq -r '.dataset_path')
  port=$(echo "$common_params" | jq -r '.port')
-  num_prompts=$(echo "$common_params" | jq -r '.num_prompts')
  server_args=$(json2args "$server_params")

  if echo "$common_params" | jq -e 'has("fp8")' >/dev/null; then
@@ -217,19 +204,19 @@ launch_vllm_server() {

 main() {

-  if [[ $CURRENT_LLM_SERVING_ENGINE == "trt" ]]; then
+  if [[ "$CURRENT_LLM_SERVING_ENGINE" == "trt" ]]; then
    launch_trt_server
  fi

-  if [[ $CURRENT_LLM_SERVING_ENGINE == "tgi" ]]; then
+  if [[ "$CURRENT_LLM_SERVING_ENGINE" == "tgi" ]]; then
    launch_tgi_server
  fi

-  if [[ $CURRENT_LLM_SERVING_ENGINE == "lmdeploy" ]]; then
+  if [[ "$CURRENT_LLM_SERVING_ENGINE" == "lmdeploy" ]]; then
    launch_lmdeploy_server
  fi

-  if [[ $CURRENT_LLM_SERVING_ENGINE == "sglang" ]]; then
+  if [[ "$CURRENT_LLM_SERVING_ENGINE" == "sglang" ]]; then
    launch_sglang_server
  fi

--- a/.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh
+++ b/.buildkite/nightly-benchmarks/scripts/nightly-annotate.sh
@@ -16,10 +16,10 @@ main() {
    fi

    # initial annotation
-    description="$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/nightly-descriptions.md"
+    #description="$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/nightly-descriptions.md"

    # download results
-    cd $VLLM_SOURCE_CODE_LOC/benchmarks
+    cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
    mkdir -p results/
    /workspace/buildkite-agent artifact download 'results/*nightly_results.json' results/
    ls
@@ -30,15 +30,15 @@ main() {
    /workspace/buildkite-agent artifact upload "results.zip"

    # upload benchmarking scripts
-    cd $VLLM_SOURCE_CODE_LOC/
+    cd "$VLLM_SOURCE_CODE_LOC/"
    zip -r nightly-benchmarks.zip .buildkite/ benchmarks/
    /workspace/buildkite-agent artifact upload "nightly-benchmarks.zip"

-    cd $VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/
+    cd "$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/"
    # upload benchmarking pipeline
    /workspace/buildkite-agent artifact upload "nightly-pipeline.yaml"

-    cd $VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/
+    cd "$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/"
    /workspace/buildkite-agent annotate --style "success" --context "nightly-benchmarks-results" --append < nightly-annotation.md
    

@@ -75,4 +75,4 @@ main() {
    # /workspace/buildkite-agent annotate --style "success" --context "nightly-benchmarks-results" --append < nightly_results.md
 }

-main "$@"
+main "$@"
--- a/.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
+++ b/.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh
@@ -12,7 +12,7 @@ check_gpus() {
    echo "Need at least 1 GPU to run benchmarking."
    exit 1
  fi
-  declare -g gpu_type=$(echo $(nvidia-smi --query-gpu=name --format=csv,noheader) | awk '{print $2}')
+  declare -g gpu_type="$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}')"
  echo "GPU type is $gpu_type"
 }

@@ -102,7 +102,7 @@ kill_gpu_processes() {
  pkill -f text-generation
  pkill -f lmdeploy

-  while [ $(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1) -ge 1000 ]; do
+  while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
    sleep 1
  done
 }
@@ -119,8 +119,8 @@ wait_for_server() {
 ensure_installed() {
  # Ensure that the given command is installed by apt-get
  local cmd=$1
-  if ! which $cmd >/dev/null; then
-    apt-get update && apt-get install -y $cmd
+  if ! which "$cmd" >/dev/null; then
+    apt-get update && apt-get install -y "$cmd"
  fi
 }

@@ -173,13 +173,11 @@ run_serving_tests() {
      echo "Reuse previous server for test case $test_name"
    else
      kill_gpu_processes
-      bash $VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/scripts/launch-server.sh \
+      bash "$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/scripts/launch-server.sh" \
        "$server_params" "$common_params"
    fi

-    wait_for_server
-
-    if [ $? -eq 0 ]; then
+    if wait_for_server; then
      echo ""
      echo "$CURRENT_LLM_SERVING_ENGINE server is up and running."
    else
@@ -190,13 +188,13 @@ run_serving_tests() {

    # prepare tokenizer
    # this is required for lmdeploy.
-    cd $VLLM_SOURCE_CODE_LOC/benchmarks
+    cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
    rm -rf /tokenizer_cache
    mkdir /tokenizer_cache
    python3 ../.buildkite/nightly-benchmarks/scripts/download-tokenizer.py \
      --model "$model" \
      --cachedir /tokenizer_cache
-    cd $VLLM_SOURCE_CODE_LOC/benchmarks
+    cd "$VLLM_SOURCE_CODE_LOC/benchmarks"


    # change model name for lmdeploy (it will not follow standard hf name)
@@ -307,11 +305,11 @@ run_serving_tests() {
 prepare_dataset() {

  # download sharegpt dataset
-  cd $VLLM_SOURCE_CODE_LOC/benchmarks
+  cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
  wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json

  # duplicate sonnet by 4x, to allow benchmarking with input length 2048
-  cd $VLLM_SOURCE_CODE_LOC/benchmarks
+  cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
  echo "" > sonnet_4x.txt
  for _ in {1..4}
  do
@@ -339,17 +337,17 @@ main() {

  prepare_dataset

-  cd $VLLM_SOURCE_CODE_LOC/benchmarks
+  cd "$VLLM_SOURCE_CODE_LOC/benchmarks"
  declare -g RESULTS_FOLDER=results/
  mkdir -p $RESULTS_FOLDER
-  BENCHMARK_ROOT=$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/
+  BENCHMARK_ROOT="$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/"

  # run the test
-  run_serving_tests $BENCHMARK_ROOT/tests/nightly-tests.json
+  run_serving_tests "$BENCHMARK_ROOT/tests/nightly-tests.json"

  # upload benchmark results to buildkite
  python3 -m pip install tabulate pandas
-  python3 $BENCHMARK_ROOT/scripts/summary-nightly-results.py
+  python3 "$BENCHMARK_ROOT/scripts/summary-nightly-results.py"
  upload_to_buildkite

 }
--- a/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
+++ b/.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
@@ -17,7 +17,7 @@ check_gpus() {
    echo "Need at least 1 GPU to run benchmarking."
    exit 1
  fi
-  declare -g gpu_type=$(echo $(nvidia-smi --query-gpu=name --format=csv,noheader) | awk '{print $2}')
+  declare -g gpu_type=$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}')
  echo "GPU type is $gpu_type"
 }

@@ -93,7 +93,7 @@ kill_gpu_processes() {


  # wait until GPU memory usage smaller than 1GB
-  while [ $(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1) -ge 1000 ]; do
+  while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
    sleep 1
  done

@@ -117,7 +117,7 @@ upload_to_buildkite() {
  fi

  # Use the determined command to annotate and upload artifacts
-  $BUILDKITE_AGENT_COMMAND annotate --style "info" --context "$BUILDKITE_LABEL-benchmark-results" <$RESULTS_FOLDER/benchmark_results.md
+  $BUILDKITE_AGENT_COMMAND annotate --style "info" --context "$BUILDKITE_LABEL-benchmark-results" < "$RESULTS_FOLDER/benchmark_results.md"
  $BUILDKITE_AGENT_COMMAND artifact upload "$RESULTS_FOLDER/*"
 }

@@ -150,7 +150,7 @@ run_latency_tests() {
    # check if there is enough GPU to run the test
    tp=$(echo "$latency_params" | jq -r '.tensor_parallel_size')
    if [[ $gpu_count -lt $tp ]]; then
-      echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $testname."
+      echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $test_name."
      continue
    fi

@@ -206,9 +206,9 @@ run_throughput_tests() {
    throughput_args=$(json2args "$throughput_params")

    # check if there is enough GPU to run the test
-    tp=$(echo $throughput_params | jq -r '.tensor_parallel_size')
+    tp=$(echo "$throughput_params" | jq -r '.tensor_parallel_size')
    if [[ $gpu_count -lt $tp ]]; then
-      echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $testname."
+      echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $test_name."
      continue
    fi

@@ -270,7 +270,7 @@ run_serving_tests() {
    # check if there is enough GPU to run the test
    tp=$(echo "$server_params" | jq -r '.tensor_parallel_size')
    if [[ $gpu_count -lt $tp ]]; then
-      echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $testname."
+      echo "Required tensor-parallel-size $tp but only $gpu_count GPU found. Skip testcase $test_name."
      continue
    fi

@@ -278,7 +278,7 @@ run_serving_tests() {
    server_model=$(echo "$server_params" | jq -r '.model')
    client_model=$(echo "$client_params" | jq -r '.model')
    if [[ $server_model != "$client_model" ]]; then
-      echo "Server model and client model must be the same. Skip testcase $testname."
+      echo "Server model and client model must be the same. Skip testcase $test_name."
      continue
    fi

@@ -293,8 +293,7 @@ run_serving_tests() {
    server_pid=$!

    # wait until the server is alive
-    wait_for_server
-    if [ $? -eq 0 ]; then
+    if wait_for_server; then
      echo ""
      echo "vllm server is up and running."
    else
--- a/.buildkite/nightly-benchmarks/scripts/wait-for-image.sh
+++ b/.buildkite/nightly-benchmarks/scripts/wait-for-image.sh
@@ -6,7 +6,7 @@ TIMEOUT_SECONDS=10

 retries=0
 while [ $retries -lt 1000 ]; do
-    if [ $(curl -s --max-time $TIMEOUT_SECONDS -L -H "Authorization: Bearer $TOKEN" -o /dev/null -w "%{http_code}" $URL) -eq 200 ]; then
+    if [ "$(curl -s --max-time "$TIMEOUT_SECONDS" -L -H "Authorization: Bearer $TOKEN" -o /dev/null -w "%{http_code}" "$URL")" -eq 200 ]; then
        exit 0
    fi

@@ -16,4 +16,4 @@ while [ $retries -lt 1000 ]; do
    sleep 5
 done

-exit 1
+exit 1