diff --git a/.buildkite/test_areas/distributed.yaml b/.buildkite/test_areas/distributed.yaml index f94f831a4..331103cee 100644 --- a/.buildkite/test_areas/distributed.yaml +++ b/.buildkite/test_areas/distributed.yaml @@ -15,8 +15,29 @@ steps: - pytest -v -s distributed/test_shm_buffer.py - pytest -v -s distributed/test_shm_storage.py -- label: Distributed (2 GPUs) - timeout_in_minutes: 60 +- label: Distributed DP Tests (2 GPUs) + timeout_in_minutes: 20 + working_dir: "/vllm-workspace/tests" + num_devices: 2 + source_file_dependencies: + - vllm/distributed/ + - vllm/engine/ + - vllm/executor/ + - vllm/worker/worker_base.py + - vllm/v1/engine/ + - vllm/v1/worker/ + - tests/v1/distributed + - tests/v1/entrypoints/openai/test_multi_api_servers.py + commands: + # https://github.com/NVIDIA/nccl/issues/1838 + - export NCCL_CUMEM_HOST_ENABLE=0 + - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py + - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py + - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py + - DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py + +- label: Distributed Compile + RPC Tests (2 GPUs) + timeout_in_minutes: 20 working_dir: "/vllm-workspace/tests" num_devices: 2 source_file_dependencies: @@ -29,22 +50,31 @@ steps: - vllm/v1/worker/ - tests/compile/fullgraph/test_basic_correctness.py - tests/compile/test_wrapper.py - - tests/distributed/ - tests/entrypoints/llm/test_collective_rpc.py - - tests/v1/distributed - - tests/v1/entrypoints/openai/test_multi_api_servers.py + commands: + # https://github.com/NVIDIA/nccl/issues/1838 + - export NCCL_CUMEM_HOST_ENABLE=0 + - pytest -v -s entrypoints/llm/test_collective_rpc.py + - pytest -v -s ./compile/fullgraph/test_basic_correctness.py + - pytest -v -s ./compile/test_wrapper.py + +- label: Distributed Torchrun + Shutdown Tests (2 GPUs) + timeout_in_minutes: 20 + working_dir: "/vllm-workspace/tests" + num_devices: 2 + source_file_dependencies: + - vllm/distributed/ + - vllm/engine/ + - vllm/executor/ + - vllm/worker/worker_base.py + - vllm/v1/engine/ + - vllm/v1/worker/ + - tests/distributed/ - tests/v1/shutdown - tests/v1/worker/test_worker_memory_snapshot.py commands: # https://github.com/NVIDIA/nccl/issues/1838 - export NCCL_CUMEM_HOST_ENABLE=0 - - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py - - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py - - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py - - DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py - - pytest -v -s entrypoints/llm/test_collective_rpc.py - - pytest -v -s ./compile/fullgraph/test_basic_correctness.py - - pytest -v -s ./compile/test_wrapper.py - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed' - VLLM_TEST_SAME_HOST=1 VLLM_TEST_WITH_DEFAULT_DEVICE_SET=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed' - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown diff --git a/.buildkite/test_areas/kernels.yaml b/.buildkite/test_areas/kernels.yaml index e0be49cf3..8eba8da0b 100644 --- a/.buildkite/test_areas/kernels.yaml +++ b/.buildkite/test_areas/kernels.yaml @@ -35,7 +35,7 @@ steps: parallelism: 2 - label: Kernels MoE Test %N - timeout_in_minutes: 60 + timeout_in_minutes: 25 source_file_dependencies: - csrc/quantization/cutlass_w8a8/moe/ - csrc/moe/ @@ -47,7 +47,7 @@ steps: commands: - pytest -v -s kernels/moe --ignore=kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT - pytest -v -s kernels/moe/test_modular_oai_triton_moe.py --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT - parallelism: 2 + parallelism: 5 - label: Kernels Mamba Test timeout_in_minutes: 45