[Deprecation][2/N] Replace --task with --runner and --convert (#21470)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Cyrus Leung
2025-07-28 10:42:40 +08:00
committed by GitHub
parent 8f605ee309
commit 86ae693f20
94 changed files with 1117 additions and 1083 deletions

View File

@@ -222,7 +222,6 @@ VLM_TEST_SETTINGS = {
},
marks=[large_gpu_mark(min_gb=32)],
),
# Check "auto" with fallback to transformers
"internvl-transformers": VLMTestInfo(
models=["OpenGVLab/InternVL3-1B-hf"],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
@@ -232,7 +231,7 @@ VLM_TEST_SETTINGS = {
use_tokenizer_eos=True,
image_size_factors=[(0.25, 0.5, 1.0)],
vllm_runner_kwargs={
"model_impl": "auto",
"model_impl": "transformers",
},
auto_cls=AutoModelForImageTextToText,
marks=[pytest.mark.core_model],
@@ -638,7 +637,7 @@ VLM_TEST_SETTINGS = {
img_idx_to_prompt=lambda idx: f"<|image_{idx}|>\n",
max_model_len=4096,
max_num_seqs=2,
task="generate",
runner="generate",
# use sdpa mode for hf runner since phi3v didn't work with flash_attn
hf_model_kwargs={"_attn_implementation": "sdpa"},
use_tokenizer_eos=True,

View File

@@ -65,7 +65,7 @@ def run_test(
# max_model_len should be greater than image_feature_size
with vllm_runner(
model,
task="generate",
runner="generate",
max_model_len=max_model_len,
max_num_seqs=1,
dtype=dtype,

View File

@@ -48,7 +48,7 @@ def test_models(vllm_runner, model, dtype: str, max_tokens: int) -> None:
]
with vllm_runner(model,
task="generate",
runner="generate",
dtype=dtype,
limit_mm_per_prompt={"image": 2},
max_model_len=32768,

View File

@@ -99,7 +99,7 @@ def run_test(
# max_model_len should be greater than image_feature_size
with vllm_runner(
model,
task="generate",
runner="generate",
max_model_len=max_model_len,
max_num_seqs=2,
dtype=dtype,

View File

@@ -267,7 +267,7 @@ def run_embedding_input_test(
# max_model_len should be greater than image_feature_size
with vllm_runner(model,
task="generate",
runner="generate",
max_model_len=4000,
max_num_seqs=3,
dtype=dtype,

View File

@@ -6,7 +6,7 @@ from typing import Any, Callable, Optional
import torch
from transformers.models.auto.auto_factory import _BaseAutoModelClass
from vllm.config import TaskOption
from vllm.config import RunnerOption
from vllm.transformers_utils.tokenizer import AnyTokenizer
from .....conftest import HfRunner, VllmRunner
@@ -37,7 +37,7 @@ def run_test(
vllm_runner_kwargs: Optional[dict[str, Any]],
hf_model_kwargs: Optional[dict[str, Any]],
patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]],
task: TaskOption = "auto",
runner: RunnerOption = "auto",
distributed_executor_backend: Optional[str] = None,
tensor_parallel_size: int = 1,
vllm_embeddings: Optional[torch.Tensor] = None,
@@ -83,7 +83,7 @@ def run_test(
tensor_parallel_size=tensor_parallel_size,
distributed_executor_backend=distributed_executor_backend,
enforce_eager=enforce_eager,
task=task,
runner=runner,
**vllm_runner_kwargs_) as vllm_model:
tokenizer = vllm_model.llm.get_tokenizer()

View File

@@ -11,7 +11,7 @@ from pytest import MarkDecorator
from transformers import AutoModelForCausalLM
from transformers.models.auto.auto_factory import _BaseAutoModelClass
from vllm.config import TaskOption
from vllm.config import RunnerOption
from vllm.sequence import SampleLogprobs
from vllm.transformers_utils.tokenizer import AnyTokenizer
@@ -109,7 +109,7 @@ class VLMTestInfo(NamedTuple):
enforce_eager: bool = True
max_model_len: int = 1024
max_num_seqs: int = 256
task: TaskOption = "auto"
runner: RunnerOption = "auto"
tensor_parallel_size: int = 1
vllm_runner_kwargs: Optional[dict[str, Any]] = None
@@ -173,7 +173,7 @@ class VLMTestInfo(NamedTuple):
"enforce_eager": self.enforce_eager,
"max_model_len": self.max_model_len,
"max_num_seqs": self.max_num_seqs,
"task": self.task,
"runner": self.runner,
"tensor_parallel_size": self.tensor_parallel_size,
"vllm_runner_kwargs": self.vllm_runner_kwargs,
"hf_output_post_proc": self.hf_output_post_proc,