diff --git a/.buildkite/test_areas/distributed.yaml b/.buildkite/test_areas/distributed.yaml index 0a75bc50e..64911983f 100644 --- a/.buildkite/test_areas/distributed.yaml +++ b/.buildkite/test_areas/distributed.yaml @@ -146,7 +146,7 @@ steps: num_devices: 2 commands: - pytest -v -s tests/distributed/test_context_parallel.py - - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 examples/offline_inference/new_weight_syncing/rlhf_async_new_apis.py + # - VLLM_ALLOW_INSECURE_SERIALIZATION=1 python3 examples/offline_inference/new_weight_syncing/rlhf_async_new_apis.py --- failing, need to re-enable - VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model=Qwen/Qwen1.5-MoE-A2.7B -tp=1 -dp=2 --max-model-len=2048 --all2all-backend=deepep_high_throughput - pytest -v -s tests/v1/distributed/test_dbo.py diff --git a/.buildkite/test_areas/lm_eval.yaml b/.buildkite/test_areas/lm_eval.yaml index f25eae240..3e2610e70 100644 --- a/.buildkite/test_areas/lm_eval.yaml +++ b/.buildkite/test_areas/lm_eval.yaml @@ -11,17 +11,17 @@ steps: commands: - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-small.txt -- label: LM Eval Large Models (4 GPUs)(A100) - device: a100 - optional: true - num_devices: 4 - working_dir: "/vllm-workspace/.buildkite/lm-eval-harness" - source_file_dependencies: - - csrc/ - - vllm/model_executor/layers/quantization - commands: - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4 +# - label: LM Eval Large Models (4 GPUs)(A100) +# device: a100 +# optional: true +# num_devices: 4 +# working_dir: "/vllm-workspace/.buildkite/lm-eval-harness" +# source_file_dependencies: +# - csrc/ +# - vllm/model_executor/layers/quantization +# commands: +# - export VLLM_WORKER_MULTIPROC_METHOD=spawn +# - pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4 - label: LM Eval Large Models (4 GPUs)(H100) device: h100 diff --git a/.buildkite/test_areas/weight_loading.yaml b/.buildkite/test_areas/weight_loading.yaml index 3561d5707..8e86374a8 100644 --- a/.buildkite/test_areas/weight_loading.yaml +++ b/.buildkite/test_areas/weight_loading.yaml @@ -13,13 +13,13 @@ steps: commands: - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models.txt -- label: Weight Loading Multiple GPU - Large Models # optional - working_dir: "/vllm-workspace/tests" - num_devices: 2 - device: a100 - optional: true - source_file_dependencies: - - vllm/ - - tests/weight_loading - commands: - - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt +# - label: Weight Loading Multiple GPU - Large Models # optional +# working_dir: "/vllm-workspace/tests" +# num_devices: 2 +# device: a100 +# optional: true +# source_file_dependencies: +# - vllm/ +# - tests/weight_loading +# commands: +# - bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt diff --git a/tests/evals/gsm8k/configs/moe-refactor/config-h100.txt b/tests/evals/gsm8k/configs/moe-refactor/config-h100.txt index 6354deded..563d5d42c 100644 --- a/tests/evals/gsm8k/configs/moe-refactor/config-h100.txt +++ b/tests/evals/gsm8k/configs/moe-refactor/config-h100.txt @@ -12,4 +12,4 @@ Llama-4-Scout-Fp8-ModelOpt-fi-cutlass.yaml Llama-4-Scout-Fp8-ModelOpt-marlin.yaml Llama-4-Scout-Fp8-ModelOpt-triton.yaml Qwen3-30B-A3B-BF16-fi-cutlass.yaml -Qwen3-30B-A3B-BF16-triton.yaml \ No newline at end of file +Qwen3-30B-A3B-BF16-triton.yaml