47 lines
1.4 KiB
YAML
47 lines
1.4 KiB
YAML
group: LM Eval
|
|
depends_on:
|
|
- image-build
|
|
steps:
|
|
- label: LM Eval Small Models
|
|
timeout_in_minutes: 75
|
|
source_file_dependencies:
|
|
- csrc/
|
|
- vllm/model_executor/layers/quantization
|
|
autorun_on_main: true
|
|
commands:
|
|
- pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-small.txt
|
|
|
|
- label: LM Eval Large Models (4 GPUs)(A100)
|
|
gpu: a100
|
|
optional: true
|
|
num_gpus: 4
|
|
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
|
|
source_file_dependencies:
|
|
- csrc/
|
|
- vllm/model_executor/layers/quantization
|
|
commands:
|
|
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
|
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
|
|
|
|
- label: LM Eval Large Models (4 GPUs)(H100)
|
|
gpu: h100
|
|
optional: true
|
|
num_gpus: 4
|
|
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
|
|
source_file_dependencies:
|
|
- csrc/
|
|
- vllm/model_executor/layers/quantization
|
|
commands:
|
|
- export VLLM_USE_DEEP_GEMM=0 # We found Triton is faster than DeepGEMM for H100
|
|
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large-hopper.txt --tp-size=4
|
|
|
|
- label: LM Eval Small Models (B200)
|
|
timeout_in_minutes: 120
|
|
gpu: b200
|
|
optional: true
|
|
source_file_dependencies:
|
|
- csrc/
|
|
- vllm/model_executor/layers/quantization
|
|
commands:
|
|
- pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-blackwell.txt
|