[Model] Support Qwen2 embeddings and use tags to select model tests (#10184)

2024-11-15 12:23:09 +08:00
parent 2885ba0e24
commit b40cf6402e
19 changed files with 252 additions and 178 deletions
--- a/tests/models/embedding/language/test_embedding.py
+++ b/tests/models/embedding/language/test_embedding.py
@@ -4,25 +4,25 @@ Run `pytest tests/models/embedding/language/test_embedding.py`.
 """
 import pytest

-from vllm.utils import current_platform
-
 from ..utils import check_embeddings_close

-# Model, Guard
-MODELS = [
-    "intfloat/e5-mistral-7b-instruct",
-    "BAAI/bge-base-en-v1.5",
-    "BAAI/bge-multilingual-gemma2",
-    "intfloat/multilingual-e5-large",
-]

-ENCODER_ONLY = [
-    "BAAI/bge-base-en-v1.5",
-    "intfloat/multilingual-e5-large",
-]
-
-
-@pytest.mark.parametrize("model", MODELS)
+@pytest.mark.parametrize(
+    "model",
+    [
+        # [Encoder-only]
+        pytest.param("BAAI/bge-base-en-v1.5",
+                     marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
+        pytest.param("intfloat/multilingual-e5-large"),
+        # [Encoder-decoder]
+        pytest.param("intfloat/e5-mistral-7b-instruct",
+                     marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
+        pytest.param("BAAI/bge-multilingual-gemma2",
+                     marks=[pytest.mark.core_model]),
+        pytest.param("ssmits/Qwen2-7B-Instruct-embed-base"),
+        pytest.param("Alibaba-NLP/gte-Qwen2-1.5B-instruct"),
+    ],
+)
@pytest.mark.parametrize("dtype", ["half"])
 def test_models(
    hf_runner,
@@ -31,9 +31,6 @@ def test_models(
    model,
    dtype: str,
 ) -> None:
-    if model not in ENCODER_ONLY and current_platform.is_cpu():
-        pytest.skip("Skip large embedding models test on CPU.")
-
    # The example_prompts has ending "\n", for example:
    # "Write a short story about a robot that dreams for the first time.\n"
    # sentence_transformers will strip the input texts, see:
@@ -46,8 +43,13 @@ def test_models(
                   is_sentence_transformer=True) as hf_model:
        hf_outputs = hf_model.encode(example_prompts)

-    with vllm_runner(model, dtype=dtype, max_model_len=None) as vllm_model:
+    with vllm_runner(model, task="embedding", dtype=dtype,
+                     max_model_len=None) as vllm_model:
        vllm_outputs = vllm_model.encode(example_prompts)
+        # This test is for verifying whether the model's extra_repr
+        # can be printed correctly.
+        print(vllm_model.model.llm_engine.model_executor.driver_worker.
+              model_runner.model)

    check_embeddings_close(
        embeddings_0_lst=hf_outputs,