[Model] Add user-configurable task for models that support both generation and embedding (#9424)

2024-10-19 02:31:58 +08:00
parent 7dbe738d65
commit 051eaf6db3
33 changed files with 451 additions and 201 deletions
--- a/tests/models/decoder_only/vision_language/test_phi3v.py
+++ b/tests/models/decoder_only/vision_language/test_phi3v.py
@@ -89,6 +89,7 @@ def run_test(

    # max_model_len should be greater than image_feature_size
    with vllm_runner(model,
+                     task="generate",
                     max_model_len=4096,
                     max_num_seqs=2,
                     dtype=dtype,
--- a/tests/models/embedding/vision_language/test_phi3v.py
+++ b/tests/models/embedding/vision_language/test_phi3v.py
@@ -28,6 +28,7 @@ def test_models(
    # if we run HF first, the cuda initialization will be done and it
    # will hurt multiprocessing backend with fork method (the default method).
    with vllm_runner(model,
+                     task="embedding",
                     max_model_len=4096,
                     max_num_seqs=2,
                     dtype=dtype,
--- a/tests/models/utils.py
+++ b/tests/models/utils.py
@@ -3,7 +3,7 @@ from typing import Dict, List, Optional, Sequence, Tuple, Union

 import torch

-from vllm.config import ModelConfig
+from vllm.config import ModelConfig, TaskOption
 from vllm.inputs import InputContext
 from vllm.sequence import Logprob, PromptLogprobs, SampleLogprobs
 from vllm.utils import is_cpu
@@ -248,6 +248,7 @@ def check_logprobs_close(


 def build_model_context(model_name: str,
+                        task: TaskOption = "auto",
                        tokenizer_name: Optional[str] = None,
                        trust_remote_code: bool = False,
                        dtype: Optional[Union[str, torch.dtype]] = None,
@@ -273,7 +274,8 @@ def build_model_context(model_name: str,

    model_config = ModelConfig(
        model_name,
-        tokenizer_name,
+        task=task,
+        tokenizer=tokenizer_name,
        tokenizer_mode="auto",
        trust_remote_code=trust_remote_code,
        dtype=dtype,