diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 3f6b67e45..adbd08b13 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -642,6 +642,16 @@ steps: commands: - pytest -v -s models/language/pooling -m 'not core_model' +- label: Language Models Test (MTEB) + timeout_in_minutes: 110 + mirror_hardwares: [amdexperimental] + optional: true + source_file_dependencies: + - vllm/ + - tests/models/language/pooling_mteb_test + commands: + - pytest -v -s models/language/pooling_mteb_test + - label: Multi-Modal Processor Test # 44min timeout_in_minutes: 60 source_file_dependencies: diff --git a/tests/entrypoints/pooling/correctness/test_mteb_embed.py b/tests/entrypoints/pooling/correctness/test_mteb_embed.py index 1601c18d9..12a4875bd 100644 --- a/tests/entrypoints/pooling/correctness/test_mteb_embed.py +++ b/tests/entrypoints/pooling/correctness/test_mteb_embed.py @@ -4,10 +4,9 @@ import os import pytest -from tests.models.language.pooling.mteb_utils import (MTEB_EMBED_TASKS, - MTEB_EMBED_TOL, - OpenAIClientMtebEncoder, - run_mteb_embed_task) +from tests.models.language.pooling_mteb_test.mteb_utils import ( + MTEB_EMBED_TASKS, MTEB_EMBED_TOL, OpenAIClientMtebEncoder, + run_mteb_embed_task) from tests.utils import RemoteOpenAIServer os.environ["VLLM_LOGGING_LEVEL"] = "WARNING" diff --git a/tests/entrypoints/pooling/correctness/test_mteb_score.py b/tests/entrypoints/pooling/correctness/test_mteb_score.py index 417f85adc..7c059d16b 100644 --- a/tests/entrypoints/pooling/correctness/test_mteb_score.py +++ b/tests/entrypoints/pooling/correctness/test_mteb_score.py @@ -4,15 +4,9 @@ import os import pytest -# yapf conflicts with isort for this block -# yapf: disable -from tests.models.language.pooling.mteb_utils import (MTEB_RERANK_LANGS, - MTEB_RERANK_TASKS, - MTEB_RERANK_TOL, - RerankClientMtebEncoder, - ScoreClientMtebEncoder, - run_mteb_rerank) -# yapf: enable +from tests.models.language.pooling_mteb_test.mteb_utils import ( + MTEB_RERANK_LANGS, MTEB_RERANK_TASKS, MTEB_RERANK_TOL, + RerankClientMtebEncoder, ScoreClientMtebEncoder, run_mteb_rerank) from tests.utils import RemoteOpenAIServer os.environ["VLLM_LOGGING_LEVEL"] = "WARNING" diff --git a/tests/models/language/pooling_mteb_test/__init__.py b/tests/models/language/pooling_mteb_test/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/models/language/pooling/mteb_utils.py b/tests/models/language/pooling_mteb_test/mteb_utils.py similarity index 100% rename from tests/models/language/pooling/mteb_utils.py rename to tests/models/language/pooling_mteb_test/mteb_utils.py diff --git a/tests/models/language/pooling/test_baai.py b/tests/models/language/pooling_mteb_test/test_baai.py similarity index 93% rename from tests/models/language/pooling/test_baai.py rename to tests/models/language/pooling_mteb_test/test_baai.py index be8cb6fa7..e131c9b10 100644 --- a/tests/models/language/pooling/test_baai.py +++ b/tests/models/language/pooling_mteb_test/test_baai.py @@ -2,10 +2,12 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import pytest -from ...utils import (CLSPoolingEmbedModelInfo, CLSPoolingRerankModelInfo, - EmbedModelInfo, LASTPoolingEmbedModelInfo, - RerankModelInfo) -from .embed_utils import correctness_test_embed_models +from tests.models.language.pooling.embed_utils import ( + correctness_test_embed_models) +from tests.models.utils import (CLSPoolingEmbedModelInfo, + CLSPoolingRerankModelInfo, EmbedModelInfo, + LASTPoolingEmbedModelInfo, RerankModelInfo) + from .mteb_utils import mteb_test_embed_models, mteb_test_rerank_models MODELS = [ diff --git a/tests/models/language/pooling/test_bge_reranker_v2_gemma.py b/tests/models/language/pooling_mteb_test/test_bge_reranker_v2_gemma.py similarity index 96% rename from tests/models/language/pooling/test_bge_reranker_v2_gemma.py rename to tests/models/language/pooling_mteb_test/test_bge_reranker_v2_gemma.py index fc888157b..1eca2a2c0 100644 --- a/tests/models/language/pooling/test_bge_reranker_v2_gemma.py +++ b/tests/models/language/pooling_mteb_test/test_bge_reranker_v2_gemma.py @@ -7,9 +7,9 @@ import pytest import torch from tests.conftest import HfRunner - -from ...utils import LASTPoolingRerankModelInfo, RerankModelInfo -from .mteb_utils import VllmMtebEncoder, mteb_test_rerank_models +from tests.models.language.pooling_mteb_test.mteb_utils import ( + VllmMtebEncoder, mteb_test_rerank_models) +from tests.models.utils import LASTPoolingRerankModelInfo, RerankModelInfo RERANK_MODELS = [ LASTPoolingRerankModelInfo("BAAI/bge-reranker-v2-gemma", diff --git a/tests/models/language/pooling/test_cross_encoder.py b/tests/models/language/pooling_mteb_test/test_cross_encoder.py similarity index 85% rename from tests/models/language/pooling/test_cross_encoder.py rename to tests/models/language/pooling_mteb_test/test_cross_encoder.py index b49908c9c..ad320fae0 100644 --- a/tests/models/language/pooling/test_cross_encoder.py +++ b/tests/models/language/pooling_mteb_test/test_cross_encoder.py @@ -2,8 +2,9 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import pytest -from ...utils import (CLSPoolingRerankModelInfo, LASTPoolingRerankModelInfo, - RerankModelInfo) +from tests.models.utils import (CLSPoolingRerankModelInfo, + LASTPoolingRerankModelInfo, RerankModelInfo) + from .mteb_utils import mteb_test_rerank_models RERANK_MODELS = [ diff --git a/tests/models/language/pooling/test_gte.py b/tests/models/language/pooling_mteb_test/test_gte.py similarity index 94% rename from tests/models/language/pooling/test_gte.py rename to tests/models/language/pooling_mteb_test/test_gte.py index 98d215b0a..9ae43fd05 100644 --- a/tests/models/language/pooling/test_gte.py +++ b/tests/models/language/pooling_mteb_test/test_gte.py @@ -3,10 +3,12 @@ import pytest -from ...utils import (CLSPoolingEmbedModelInfo, CLSPoolingRerankModelInfo, - EmbedModelInfo, LASTPoolingEmbedModelInfo, - RerankModelInfo) -from .embed_utils import correctness_test_embed_models +from tests.models.language.pooling.embed_utils import ( + correctness_test_embed_models) +from tests.models.utils import (CLSPoolingEmbedModelInfo, + CLSPoolingRerankModelInfo, EmbedModelInfo, + LASTPoolingEmbedModelInfo, RerankModelInfo) + from .mteb_utils import mteb_test_embed_models, mteb_test_rerank_models MODELS = [ diff --git a/tests/models/language/pooling/test_intfloat.py b/tests/models/language/pooling_mteb_test/test_intfloat.py similarity index 92% rename from tests/models/language/pooling/test_intfloat.py rename to tests/models/language/pooling_mteb_test/test_intfloat.py index bc9547583..0d6026898 100644 --- a/tests/models/language/pooling/test_intfloat.py +++ b/tests/models/language/pooling_mteb_test/test_intfloat.py @@ -2,8 +2,10 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import pytest -from ...utils import CLSPoolingEmbedModelInfo, EmbedModelInfo -from .embed_utils import correctness_test_embed_models +from tests.models.language.pooling.embed_utils import ( + correctness_test_embed_models) +from tests.models.utils import CLSPoolingEmbedModelInfo, EmbedModelInfo + from .mteb_utils import mteb_test_embed_models MODELS = [ diff --git a/tests/models/language/pooling/test_jina.py b/tests/models/language/pooling_mteb_test/test_jina.py similarity index 92% rename from tests/models/language/pooling/test_jina.py rename to tests/models/language/pooling_mteb_test/test_jina.py index c4e483555..0a77a78bb 100644 --- a/tests/models/language/pooling/test_jina.py +++ b/tests/models/language/pooling_mteb_test/test_jina.py @@ -4,12 +4,13 @@ from functools import partial import pytest +from tests.models.language.pooling.embed_utils import ( + check_embeddings_close, correctness_test_embed_models, matryoshka_fy) +from tests.models.utils import (CLSPoolingEmbedModelInfo, + CLSPoolingRerankModelInfo, EmbedModelInfo, + RerankModelInfo) from vllm import PoolingParams -from ...utils import (CLSPoolingEmbedModelInfo, CLSPoolingRerankModelInfo, - EmbedModelInfo, RerankModelInfo) -from .embed_utils import (check_embeddings_close, - correctness_test_embed_models, matryoshka_fy) from .mteb_utils import mteb_test_embed_models, mteb_test_rerank_models EMBEDDING_MODELS = [ diff --git a/tests/models/language/pooling/test_mxbai_rerank.py b/tests/models/language/pooling_mteb_test/test_mxbai_rerank.py similarity index 97% rename from tests/models/language/pooling/test_mxbai_rerank.py rename to tests/models/language/pooling_mteb_test/test_mxbai_rerank.py index 1731c6ae6..05ebb4ec4 100644 --- a/tests/models/language/pooling/test_mxbai_rerank.py +++ b/tests/models/language/pooling_mteb_test/test_mxbai_rerank.py @@ -6,8 +6,8 @@ import pytest import torch from tests.conftest import HfRunner +from tests.models.utils import LASTPoolingRerankModelInfo, RerankModelInfo -from ...utils import LASTPoolingRerankModelInfo, RerankModelInfo from .mteb_utils import mteb_test_rerank_models mxbai_rerank_hf_overrides = { diff --git a/tests/models/language/pooling/test_nomic.py b/tests/models/language/pooling_mteb_test/test_nomic.py similarity index 90% rename from tests/models/language/pooling/test_nomic.py rename to tests/models/language/pooling_mteb_test/test_nomic.py index 52a8ce6e6..61512fd0d 100644 --- a/tests/models/language/pooling/test_nomic.py +++ b/tests/models/language/pooling_mteb_test/test_nomic.py @@ -3,8 +3,10 @@ import pytest -from ...utils import CLSPoolingEmbedModelInfo, EmbedModelInfo -from .embed_utils import correctness_test_embed_models +from tests.models.language.pooling.embed_utils import ( + correctness_test_embed_models) +from tests.models.utils import CLSPoolingEmbedModelInfo, EmbedModelInfo + from .mteb_utils import mteb_test_embed_models MODELS = [ diff --git a/tests/models/language/pooling/test_qwen3_reranker.py b/tests/models/language/pooling_mteb_test/test_qwen3_reranker.py similarity index 98% rename from tests/models/language/pooling/test_qwen3_reranker.py rename to tests/models/language/pooling_mteb_test/test_qwen3_reranker.py index ebdacf9d0..65403081d 100644 --- a/tests/models/language/pooling/test_qwen3_reranker.py +++ b/tests/models/language/pooling_mteb_test/test_qwen3_reranker.py @@ -6,9 +6,9 @@ import pytest import torch from tests.conftest import HfRunner +from tests.models.utils import LASTPoolingRerankModelInfo, RerankModelInfo from tests.utils import multi_gpu_test -from ...utils import LASTPoolingRerankModelInfo, RerankModelInfo from .mteb_utils import mteb_test_rerank_models qwen3_reranker_hf_overrides = { diff --git a/tests/models/language/pooling/test_snowflake_arctic_embed.py b/tests/models/language/pooling_mteb_test/test_snowflake_arctic_embed.py similarity index 94% rename from tests/models/language/pooling/test_snowflake_arctic_embed.py rename to tests/models/language/pooling_mteb_test/test_snowflake_arctic_embed.py index 864f3d75e..91bad2c4e 100644 --- a/tests/models/language/pooling/test_snowflake_arctic_embed.py +++ b/tests/models/language/pooling_mteb_test/test_snowflake_arctic_embed.py @@ -3,8 +3,10 @@ import pytest -from ...utils import CLSPoolingEmbedModelInfo, EmbedModelInfo -from .embed_utils import correctness_test_embed_models +from tests.models.language.pooling.embed_utils import ( + correctness_test_embed_models) +from tests.models.utils import CLSPoolingEmbedModelInfo, EmbedModelInfo + from .mteb_utils import mteb_test_embed_models MODELS = [ diff --git a/tests/models/language/pooling/test_st_projector.py b/tests/models/language/pooling_mteb_test/test_st_projector.py similarity index 86% rename from tests/models/language/pooling/test_st_projector.py rename to tests/models/language/pooling_mteb_test/test_st_projector.py index 9301e705c..bd493e7e2 100644 --- a/tests/models/language/pooling/test_st_projector.py +++ b/tests/models/language/pooling_mteb_test/test_st_projector.py @@ -2,8 +2,9 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import pytest -from ...utils import (CLSPoolingEmbedModelInfo, EmbedModelInfo, - LASTPoolingEmbedModelInfo) +from tests.models.utils import (CLSPoolingEmbedModelInfo, EmbedModelInfo, + LASTPoolingEmbedModelInfo) + from .mteb_utils import mteb_test_embed_models # ST models with projector (Dense) layers