diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 7f1841b1c..a13e2cb78 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -536,6 +536,17 @@ steps: - pip freeze | grep -E 'torch' - pytest -v -s models/language -m core_model +- label: Language Models Test (Hybrid) # 35 min + mirror_hardwares: [amdexperimental] + torch_nightly: true + source_file_dependencies: + - vllm/ + - tests/models/language/generation + commands: + # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile. + - pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8' + - pytest -v -s models/language/generation -m hybrid_model + - label: Language Models Test (Extended Generation) # 1hr20min mirror_hardwares: [amdexperimental] optional: true @@ -545,7 +556,7 @@ steps: commands: # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile. - pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8' - - pytest -v -s models/language/generation -m 'not core_model' + - pytest -v -s models/language/generation -m '(not core_model) and (not hybrid_model)' - label: Language Models Test (Extended Pooling) # 36min mirror_hardwares: [amdexperimental] diff --git a/pyproject.toml b/pyproject.toml index e8c2403af..fb45572d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -150,6 +150,7 @@ skip_gitignore = true markers = [ "skip_global_cleanup", "core_model: enable this model test in each PR instead of only nightly", + "hybrid_model: models that contain mamba layers (including pure SSM and hybrid architectures)", "cpu_model: enable this model test in CPU tests", "split: run this test as part of a split", "distributed: run this test only in distributed GPU tests", diff --git a/tests/models/language/generation/test_hybrid.py b/tests/models/language/generation/test_hybrid.py index 90c4cd968..b2348e644 100644 --- a/tests/models/language/generation/test_hybrid.py +++ b/tests/models/language/generation/test_hybrid.py @@ -9,6 +9,9 @@ from vllm.sampling_params import SamplingParams from ...utils import check_logprobs_close, check_outputs_equal +# Mark all tests as hybrid +pytestmark = pytest.mark.hybrid_model + # NOTE: The first model in each list is taken as the primary model, # meaning that it will be used in all tests in this file # The rest of the models will only be tested by test_models