[Model] Add user-configurable task for models that support both generation and embedding (#9424)

This commit is contained in:
Cyrus Leung
2024-10-19 02:31:58 +08:00
committed by GitHub
parent 7dbe738d65
commit 051eaf6db3
33 changed files with 451 additions and 201 deletions

View File

@@ -25,7 +25,7 @@ from tests.models.utils import (TokensTextLogprobs,
from vllm import LLM, SamplingParams
from vllm.assets.image import ImageAsset
from vllm.assets.video import VideoAsset
from vllm.config import TokenizerPoolConfig
from vllm.config import TaskOption, TokenizerPoolConfig
from vllm.connections import global_http_connection
from vllm.distributed import (destroy_distributed_environment,
destroy_model_parallel,
@@ -619,6 +619,7 @@ class VllmRunner:
def __init__(
self,
model_name: str,
task: TaskOption = "auto",
tokenizer_name: Optional[str] = None,
# Use smaller max model length, otherwise bigger model cannot run due
# to kv cache size limit.
@@ -634,6 +635,7 @@ class VllmRunner:
) -> None:
self.model = LLM(
model=model_name,
task=task,
tokenizer=tokenizer_name,
trust_remote_code=True,
dtype=dtype,