[Model] Add user-configurable task for models that support both generation and embedding (#9424)

2024-10-19 02:31:58 +08:00
parent 7dbe738d65
commit 051eaf6db3
33 changed files with 451 additions and 201 deletions
--- a/tests/entrypoints/llm/test_chat.py
+++ b/tests/entrypoints/llm/test_chat.py
@@ -0,0 +1,92 @@
+from typing import List
+
+import pytest
+
+from vllm import LLM
+
+from ..openai.test_vision import TEST_IMAGE_URLS
+
+
+def test_chat():
+    llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct")
+
+    prompt1 = "Explain the concept of entropy."
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful assistant"
+        },
+        {
+            "role": "user",
+            "content": prompt1
+        },
+    ]
+    outputs = llm.chat(messages)
+    assert len(outputs) == 1
+
+
+def test_multi_chat():
+    llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct")
+
+    prompt1 = "Explain the concept of entropy."
+    prompt2 = "Explain what among us is."
+
+    conversation1 = [
+        {
+            "role": "system",
+            "content": "You are a helpful assistant"
+        },
+        {
+            "role": "user",
+            "content": prompt1
+        },
+    ]
+
+    conversation2 = [
+        {
+            "role": "system",
+            "content": "You are a helpful assistant"
+        },
+        {
+            "role": "user",
+            "content": prompt2
+        },
+    ]
+
+    messages = [conversation1, conversation2]
+
+    outputs = llm.chat(messages)
+    assert len(outputs) == 2
+
+
+@pytest.mark.parametrize("image_urls",
+                         [[TEST_IMAGE_URLS[0], TEST_IMAGE_URLS[1]]])
+def test_chat_multi_image(image_urls: List[str]):
+    llm = LLM(
+        model="microsoft/Phi-3.5-vision-instruct",
+        dtype="bfloat16",
+        max_model_len=4096,
+        max_num_seqs=5,
+        enforce_eager=True,
+        trust_remote_code=True,
+        limit_mm_per_prompt={"image": 2},
+    )
+
+    messages = [{
+        "role":
+        "user",
+        "content": [
+            *({
+                "type": "image_url",
+                "image_url": {
+                    "url": image_url
+                }
+            } for image_url in image_urls),
+            {
+                "type": "text",
+                "text": "What's in this image?"
+            },
+        ],
+    }]
+    outputs = llm.chat(messages)
+    assert len(outputs) >= 0
--- a/tests/entrypoints/llm/test_generate.py
+++ b/tests/entrypoints/llm/test_generate.py
@@ -6,7 +6,6 @@ import pytest
 from vllm import LLM, RequestOutput, SamplingParams

 from ...conftest import cleanup
-from ..openai.test_vision import TEST_IMAGE_URLS

 MODEL_NAME = "facebook/opt-125m"

@@ -104,90 +103,3 @@ def test_multiple_sampling_params(llm: LLM):
    # sampling_params is None, default params should be applied
    outputs = llm.generate(PROMPTS, sampling_params=None)
    assert len(PROMPTS) == len(outputs)
-
-
-def test_chat():
-
-    llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct")
-
-    prompt1 = "Explain the concept of entropy."
-    messages = [
-        {
-            "role": "system",
-            "content": "You are a helpful assistant"
-        },
-        {
-            "role": "user",
-            "content": prompt1
-        },
-    ]
-    outputs = llm.chat(messages)
-    assert len(outputs) == 1
-
-
-def test_multi_chat():
-
-    llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct")
-
-    prompt1 = "Explain the concept of entropy."
-    prompt2 = "Explain what among us is."
-
-    conversation1 = [
-        {
-            "role": "system",
-            "content": "You are a helpful assistant"
-        },
-        {
-            "role": "user",
-            "content": prompt1
-        },
-    ]
-
-    conversation2 = [
-        {
-            "role": "system",
-            "content": "You are a helpful assistant"
-        },
-        {
-            "role": "user",
-            "content": prompt2
-        },
-    ]
-
-    messages = [conversation1, conversation2]
-
-    outputs = llm.chat(messages)
-    assert len(outputs) == 2
-
-
-@pytest.mark.parametrize("image_urls",
-                         [[TEST_IMAGE_URLS[0], TEST_IMAGE_URLS[1]]])
-def test_chat_multi_image(image_urls: List[str]):
-    llm = LLM(
-        model="microsoft/Phi-3.5-vision-instruct",
-        dtype="bfloat16",
-        max_model_len=4096,
-        max_num_seqs=5,
-        enforce_eager=True,
-        trust_remote_code=True,
-        limit_mm_per_prompt={"image": 2},
-    )
-
-    messages = [{
-        "role":
-        "user",
-        "content": [
-            *({
-                "type": "image_url",
-                "image_url": {
-                    "url": image_url
-                }
-            } for image_url in image_urls),
-            {
-                "type": "text",
-                "text": "What's in this image?"
-            },
-        ],
-    }]
-    outputs = llm.chat(messages)
-    assert len(outputs) >= 0
--- a/tests/entrypoints/llm/test_init.py
+++ b/tests/entrypoints/llm/test_init.py
@@ -0,0 +1,22 @@
+import pytest
+
+from vllm import LLM
+
+from ...utils import error_on_warning
+
+MODEL_NAME = "facebook/opt-125m"
+
+
+def test_pos_args_deprecated():
+    with error_on_warning(DeprecationWarning):
+        LLM(model=MODEL_NAME, tokenizer=MODEL_NAME)
+
+    with error_on_warning(DeprecationWarning):
+        LLM(MODEL_NAME, tokenizer=MODEL_NAME)
+
+    with pytest.warns(DeprecationWarning, match="'tokenizer'"):
+        LLM(MODEL_NAME, MODEL_NAME)
+
+    with pytest.warns(DeprecationWarning,
+                      match="'tokenizer', 'tokenizer_mode'"):
+        LLM(MODEL_NAME, MODEL_NAME, "auto")
--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@@ -22,12 +22,12 @@ class MockHFConfig:

@dataclass
 class MockModelConfig:
+    task = "generate"
    tokenizer = MODEL_NAME
    trust_remote_code = False
    tokenizer_mode = "auto"
    max_model_len = 100
    tokenizer_revision = None
-    embedding_mode = False
    multimodal_config = MultiModalConfig()
    hf_config = MockHFConfig()

--- a/tests/entrypoints/openai/test_vision.py
+++ b/tests/entrypoints/openai/test_vision.py
@@ -23,6 +23,8 @@ TEST_IMAGE_URLS = [
@pytest.fixture(scope="module")
 def server():
    args = [
+        "--task",
+        "generate",
        "--dtype",
        "bfloat16",
        "--max-model-len",
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -18,7 +18,8 @@ PHI3V_MODEL_ID = "microsoft/Phi-3.5-vision-instruct"
@pytest.fixture(scope="module")
 def phi3v_model_config():
    return ModelConfig(PHI3V_MODEL_ID,
-                       PHI3V_MODEL_ID,
+                       task="generate",
+                       tokenizer=PHI3V_MODEL_ID,
                       tokenizer_mode="auto",
                       trust_remote_code=True,
                       dtype="bfloat16",