[Model] Add user-configurable task for models that support both generation and embedding (#9424)
This commit is contained in:
@@ -89,6 +89,7 @@ def run_test(
|
||||
|
||||
# max_model_len should be greater than image_feature_size
|
||||
with vllm_runner(model,
|
||||
task="generate",
|
||||
max_model_len=4096,
|
||||
max_num_seqs=2,
|
||||
dtype=dtype,
|
||||
|
||||
@@ -28,6 +28,7 @@ def test_models(
|
||||
# if we run HF first, the cuda initialization will be done and it
|
||||
# will hurt multiprocessing backend with fork method (the default method).
|
||||
with vllm_runner(model,
|
||||
task="embedding",
|
||||
max_model_len=4096,
|
||||
max_num_seqs=2,
|
||||
dtype=dtype,
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Dict, List, Optional, Sequence, Tuple, Union
|
||||
|
||||
import torch
|
||||
|
||||
from vllm.config import ModelConfig
|
||||
from vllm.config import ModelConfig, TaskOption
|
||||
from vllm.inputs import InputContext
|
||||
from vllm.sequence import Logprob, PromptLogprobs, SampleLogprobs
|
||||
from vllm.utils import is_cpu
|
||||
@@ -248,6 +248,7 @@ def check_logprobs_close(
|
||||
|
||||
|
||||
def build_model_context(model_name: str,
|
||||
task: TaskOption = "auto",
|
||||
tokenizer_name: Optional[str] = None,
|
||||
trust_remote_code: bool = False,
|
||||
dtype: Optional[Union[str, torch.dtype]] = None,
|
||||
@@ -273,7 +274,8 @@ def build_model_context(model_name: str,
|
||||
|
||||
model_config = ModelConfig(
|
||||
model_name,
|
||||
tokenizer_name,
|
||||
task=task,
|
||||
tokenizer=tokenizer_name,
|
||||
tokenizer_mode="auto",
|
||||
trust_remote_code=trust_remote_code,
|
||||
dtype=dtype,
|
||||
|
||||
Reference in New Issue
Block a user