[Deprecation][2/N] Replace --task with --runner and --convert (#21470)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -222,7 +222,6 @@ VLM_TEST_SETTINGS = {
|
||||
},
|
||||
marks=[large_gpu_mark(min_gb=32)],
|
||||
),
|
||||
# Check "auto" with fallback to transformers
|
||||
"internvl-transformers": VLMTestInfo(
|
||||
models=["OpenGVLab/InternVL3-1B-hf"],
|
||||
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
||||
@@ -232,7 +231,7 @@ VLM_TEST_SETTINGS = {
|
||||
use_tokenizer_eos=True,
|
||||
image_size_factors=[(0.25, 0.5, 1.0)],
|
||||
vllm_runner_kwargs={
|
||||
"model_impl": "auto",
|
||||
"model_impl": "transformers",
|
||||
},
|
||||
auto_cls=AutoModelForImageTextToText,
|
||||
marks=[pytest.mark.core_model],
|
||||
@@ -638,7 +637,7 @@ VLM_TEST_SETTINGS = {
|
||||
img_idx_to_prompt=lambda idx: f"<|image_{idx}|>\n",
|
||||
max_model_len=4096,
|
||||
max_num_seqs=2,
|
||||
task="generate",
|
||||
runner="generate",
|
||||
# use sdpa mode for hf runner since phi3v didn't work with flash_attn
|
||||
hf_model_kwargs={"_attn_implementation": "sdpa"},
|
||||
use_tokenizer_eos=True,
|
||||
|
||||
@@ -65,7 +65,7 @@ def run_test(
|
||||
# max_model_len should be greater than image_feature_size
|
||||
with vllm_runner(
|
||||
model,
|
||||
task="generate",
|
||||
runner="generate",
|
||||
max_model_len=max_model_len,
|
||||
max_num_seqs=1,
|
||||
dtype=dtype,
|
||||
|
||||
@@ -48,7 +48,7 @@ def test_models(vllm_runner, model, dtype: str, max_tokens: int) -> None:
|
||||
]
|
||||
|
||||
with vllm_runner(model,
|
||||
task="generate",
|
||||
runner="generate",
|
||||
dtype=dtype,
|
||||
limit_mm_per_prompt={"image": 2},
|
||||
max_model_len=32768,
|
||||
|
||||
@@ -99,7 +99,7 @@ def run_test(
|
||||
# max_model_len should be greater than image_feature_size
|
||||
with vllm_runner(
|
||||
model,
|
||||
task="generate",
|
||||
runner="generate",
|
||||
max_model_len=max_model_len,
|
||||
max_num_seqs=2,
|
||||
dtype=dtype,
|
||||
|
||||
@@ -267,7 +267,7 @@ def run_embedding_input_test(
|
||||
|
||||
# max_model_len should be greater than image_feature_size
|
||||
with vllm_runner(model,
|
||||
task="generate",
|
||||
runner="generate",
|
||||
max_model_len=4000,
|
||||
max_num_seqs=3,
|
||||
dtype=dtype,
|
||||
|
||||
@@ -6,7 +6,7 @@ from typing import Any, Callable, Optional
|
||||
import torch
|
||||
from transformers.models.auto.auto_factory import _BaseAutoModelClass
|
||||
|
||||
from vllm.config import TaskOption
|
||||
from vllm.config import RunnerOption
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
|
||||
from .....conftest import HfRunner, VllmRunner
|
||||
@@ -37,7 +37,7 @@ def run_test(
|
||||
vllm_runner_kwargs: Optional[dict[str, Any]],
|
||||
hf_model_kwargs: Optional[dict[str, Any]],
|
||||
patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]],
|
||||
task: TaskOption = "auto",
|
||||
runner: RunnerOption = "auto",
|
||||
distributed_executor_backend: Optional[str] = None,
|
||||
tensor_parallel_size: int = 1,
|
||||
vllm_embeddings: Optional[torch.Tensor] = None,
|
||||
@@ -83,7 +83,7 @@ def run_test(
|
||||
tensor_parallel_size=tensor_parallel_size,
|
||||
distributed_executor_backend=distributed_executor_backend,
|
||||
enforce_eager=enforce_eager,
|
||||
task=task,
|
||||
runner=runner,
|
||||
**vllm_runner_kwargs_) as vllm_model:
|
||||
tokenizer = vllm_model.llm.get_tokenizer()
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ from pytest import MarkDecorator
|
||||
from transformers import AutoModelForCausalLM
|
||||
from transformers.models.auto.auto_factory import _BaseAutoModelClass
|
||||
|
||||
from vllm.config import TaskOption
|
||||
from vllm.config import RunnerOption
|
||||
from vllm.sequence import SampleLogprobs
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
|
||||
@@ -109,7 +109,7 @@ class VLMTestInfo(NamedTuple):
|
||||
enforce_eager: bool = True
|
||||
max_model_len: int = 1024
|
||||
max_num_seqs: int = 256
|
||||
task: TaskOption = "auto"
|
||||
runner: RunnerOption = "auto"
|
||||
tensor_parallel_size: int = 1
|
||||
vllm_runner_kwargs: Optional[dict[str, Any]] = None
|
||||
|
||||
@@ -173,7 +173,7 @@ class VLMTestInfo(NamedTuple):
|
||||
"enforce_eager": self.enforce_eager,
|
||||
"max_model_len": self.max_model_len,
|
||||
"max_num_seqs": self.max_num_seqs,
|
||||
"task": self.task,
|
||||
"runner": self.runner,
|
||||
"tensor_parallel_size": self.tensor_parallel_size,
|
||||
"vllm_runner_kwargs": self.vllm_runner_kwargs,
|
||||
"hf_output_post_proc": self.hf_output_post_proc,
|
||||
|
||||
Reference in New Issue
Block a user