101 lines
3.6 KiB
YAML
101 lines
3.6 KiB
YAML
group: Models - Language
|
|
depends_on:
|
|
- image-build
|
|
steps:
|
|
- label: Language Models Tests (Standard)
|
|
timeout_in_minutes: 25
|
|
torch_nightly: true
|
|
source_file_dependencies:
|
|
- vllm/
|
|
- tests/models/language
|
|
commands:
|
|
# Test standard language models, excluding a subset of slow tests
|
|
- pip freeze | grep -E 'torch'
|
|
- pytest -v -s models/language -m 'core_model and (not slow_test)'
|
|
|
|
- label: Language Models Tests (Extra Standard) %N
|
|
timeout_in_minutes: 45
|
|
torch_nightly: true
|
|
source_file_dependencies:
|
|
- vllm/model_executor/models/
|
|
- tests/models/language/pooling/test_embedding.py
|
|
- tests/models/language/generation/test_common.py
|
|
- tests/models/language/pooling/test_classification.py
|
|
commands:
|
|
# Shard slow subset of standard language models tests. Only run when model
|
|
# source is modified, or when specified test files are modified
|
|
- pip freeze | grep -E 'torch'
|
|
- pytest -v -s models/language -m 'core_model and slow_test' --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
|
|
parallelism: 2
|
|
|
|
- label: Language Models Tests (Hybrid) %N
|
|
timeout_in_minutes: 75
|
|
torch_nightly: true
|
|
source_file_dependencies:
|
|
- vllm/
|
|
- tests/models/language/generation
|
|
commands:
|
|
# Install fast path packages for testing against transformers
|
|
# Note: also needed to run plamo2 model in vLLM
|
|
- uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.3.0'
|
|
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
|
|
# Shard hybrid language model tests
|
|
- pytest -v -s models/language/generation -m hybrid_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB
|
|
parallelism: 2
|
|
|
|
- label: Language Models Test (Extended Generation) # 80min
|
|
timeout_in_minutes: 110
|
|
optional: true
|
|
source_file_dependencies:
|
|
- vllm/
|
|
- tests/models/language/generation
|
|
commands:
|
|
# Install fast path packages for testing against transformers
|
|
# Note: also needed to run plamo2 model in vLLM
|
|
- uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.3.0'
|
|
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.6.0'
|
|
- pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
|
|
mirror:
|
|
amd:
|
|
device: mi325_1
|
|
depends_on:
|
|
- image-build-amd
|
|
commands:
|
|
- uv pip install --system --no-build-isolation 'git+https://github.com/AndreasKaratzas/mamba@fix-rocm-7.0-warp-size-constexpr'
|
|
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
|
|
- pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)'
|
|
|
|
- label: Language Models Test (PPL)
|
|
timeout_in_minutes: 110
|
|
device: h200_18gb
|
|
optional: true
|
|
source_file_dependencies:
|
|
- vllm/
|
|
- tests/models/language/generation_ppl_test
|
|
commands:
|
|
- pytest -v -s models/language/generation_ppl_test
|
|
|
|
- label: Language Models Test (Extended Pooling) # 36min
|
|
timeout_in_minutes: 50
|
|
optional: true
|
|
source_file_dependencies:
|
|
- vllm/
|
|
- tests/models/language/pooling
|
|
commands:
|
|
- pytest -v -s models/language/pooling -m 'not core_model'
|
|
mirror:
|
|
amd:
|
|
device: mi325_1
|
|
depends_on:
|
|
- image-build-amd
|
|
|
|
- label: Language Models Test (MTEB)
|
|
timeout_in_minutes: 110
|
|
device: h200_18gb
|
|
optional: true
|
|
source_file_dependencies:
|
|
- vllm/
|
|
- tests/models/language/pooling_mteb_test
|
|
commands:
|
|
- pytest -v -s models/language/pooling_mteb_test
|