Signed-off-by: Yongji Wu <wuyongji317@gmail.com> Signed-off-by: Itay Alroy <ialroy@nvidia.com> Signed-off-by: Tyler Michael Smith <tlrmchlsmth@gmail.com> Signed-off-by: Ron Tourgeman <rtourgeman@nvidia.com> Co-authored-by: Yongji Wu <wuyongji317@gmail.com> Co-authored-by: Tyler Michael Smith <tlrmchlsmth@gmail.com> Co-authored-by: Ron Tourgeman <rtourgeman@nvidia.com>
39 lines
960 B
YAML
39 lines
960 B
YAML
group: Expert Parallelism
|
|
depends_on:
|
|
- image-build
|
|
steps:
|
|
- label: EPLB Algorithm
|
|
timeout_in_minutes: 15
|
|
working_dir: "/vllm-workspace/tests"
|
|
source_file_dependencies:
|
|
- vllm/distributed/eplb
|
|
- tests/distributed/test_eplb_algo.py
|
|
commands:
|
|
- pytest -v -s distributed/test_eplb_algo.py
|
|
|
|
- label: EPLB Execution
|
|
timeout_in_minutes: 20
|
|
working_dir: "/vllm-workspace/tests"
|
|
num_devices: 4
|
|
source_file_dependencies:
|
|
- vllm/distributed/eplb
|
|
- tests/distributed/test_eplb_execute.py
|
|
commands:
|
|
- pytest -v -s distributed/test_eplb_execute.py
|
|
- pytest -v -s distributed/test_eplb_spec_decode.py
|
|
|
|
- label: Elastic EP Scaling Test
|
|
timeout_in_minutes: 20
|
|
device: b200
|
|
optional: true
|
|
working_dir: "/vllm-workspace/tests"
|
|
num_devices: 4
|
|
source_file_dependencies:
|
|
- vllm/distributed/
|
|
- vllm/engine/
|
|
- vllm/executor/
|
|
- vllm/compilation/
|
|
- tests/distributed/
|
|
commands:
|
|
- pytest -v -s distributed/test_elastic_ep.py
|