[Mypy] Better fixes for the mypy issues in vllm/config (#37902)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2026-03-25 13:14:43 +00:00
committed by GitHub
parent 34d317dcec
commit d215d1efca
35 changed files with 153 additions and 182 deletions

View File

@@ -9,7 +9,6 @@ on HuggingFace model repository.
"""
import os
from dataclasses import asdict
from typing import Any, NamedTuple
from huggingface_hub import snapshot_download
@@ -633,7 +632,7 @@ def main(args):
req_data.engine_args.limit_mm_per_prompt or {}
)
engine_args = asdict(req_data.engine_args) | {"seed": args.seed}
engine_args = vars(req_data.engine_args) | {"seed": args.seed}
if args.tensor_parallel_size is not None:
engine_args["tensor_parallel_size"] = args.tensor_parallel_size
llm = LLM(**engine_args)

View File

@@ -8,7 +8,6 @@ the explicit/implicit prompt format on enc-dec LMMs for text generation.
import os
import time
from collections.abc import Sequence
from dataclasses import asdict
from typing import NamedTuple
from vllm import LLM, EngineArgs, PromptType, SamplingParams
@@ -91,13 +90,12 @@ def main(args):
req_data = model_example_map[model]()
# Disable other modalities to save memory
engine_args = req_data.engine_args
default_limits = {"image": 0, "video": 0, "audio": 0}
req_data.engine_args.limit_mm_per_prompt = default_limits | dict(
req_data.engine_args.limit_mm_per_prompt or {}
)
engine_args = asdict(req_data.engine_args) | {"seed": args.seed}
llm = LLM(**engine_args)
limit_mm_per_prompt = default_limits | (engine_args.limit_mm_per_prompt or {})
engine_args.limit_mm_per_prompt = limit_mm_per_prompt
engine_args.seed = args.seed
llm = LLM.from_engine_args(engine_args)
prompts = req_data.prompts

View File

@@ -20,8 +20,6 @@ python load_sharded_state.py \
--max-tokens 50
"""
import dataclasses
from vllm import LLM, EngineArgs, SamplingParams
from vllm.utils.argparse_utils import FlexibleArgumentParser
@@ -64,7 +62,7 @@ def main():
print(f"Tensor parallel size: {engine_args.tensor_parallel_size}")
# Load the model using engine args
llm = LLM(**dataclasses.asdict(engine_args))
llm = LLM.from_engine_args(engine_args)
# Prepare sampling parameters
sampling_params = SamplingParams(

View File

@@ -21,7 +21,6 @@ llm = LLM(
)
"""
import dataclasses
import os
import shutil
from pathlib import Path
@@ -60,7 +59,7 @@ def main(args):
if not Path(model_path).is_dir():
raise ValueError("model path must be a local directory")
# Create LLM instance from arguments
llm = LLM(**dataclasses.asdict(engine_args))
llm = LLM.from_engine_args(engine_args)
# Prepare output directory
Path(args.output).mkdir(exist_ok=True)
# Dump worker states to output directory

View File

@@ -11,7 +11,6 @@ on HuggingFace model repository.
import os
import random
from contextlib import contextmanager
from dataclasses import asdict
from typing import NamedTuple
from huggingface_hub import snapshot_download
@@ -2434,13 +2433,13 @@ def main(args):
req_data.engine_args.limit_mm_per_prompt or {}
)
engine_args = asdict(req_data.engine_args) | {
"seed": args.seed,
"mm_processor_cache_gb": 0 if args.disable_mm_processor_cache else 4,
}
engine_args = req_data.engine_args
engine_args.seed = args.seed
mm_processor_cache_gb = 0 if args.disable_mm_processor_cache else 4
engine_args.mm_processor_cache_gb = mm_processor_cache_gb
if args.tensor_parallel_size is not None:
engine_args["tensor_parallel_size"] = args.tensor_parallel_size
llm = LLM(**engine_args)
engine_args.tensor_parallel_size = args.tensor_parallel_size
llm = LLM.from_engine_args(engine_args)
# Don't want to check the flag multiple times, so just hijack `prompts`.
prompts = (

View File

@@ -8,7 +8,6 @@ using the chat template defined by the model.
import os
from argparse import Namespace
from dataclasses import asdict
from typing import NamedTuple
from huggingface_hub import snapshot_download
@@ -1481,10 +1480,11 @@ def run_generate(
):
req_data = model_example_map[model](question, image_urls)
engine_args = asdict(req_data.engine_args) | {"seed": seed}
engine_args = req_data.engine_args
engine_args.seed = seed
if tensor_parallel_size is not None:
engine_args["tensor_parallel_size"] = tensor_parallel_size
llm = LLM(**engine_args)
engine_args.tensor_parallel_size = tensor_parallel_size
llm = LLM.from_engine_args(engine_args)
sampling_params = SamplingParams(
temperature=0.0, max_tokens=256, stop_token_ids=req_data.stop_token_ids
@@ -1521,10 +1521,11 @@ def run_chat(
req_data.engine_args.limit_mm_per_prompt or {}
)
engine_args = asdict(req_data.engine_args) | {"seed": seed}
engine_args = req_data.engine_args
engine_args.seed = seed
if tensor_parallel_size is not None:
engine_args["tensor_parallel_size"] = tensor_parallel_size
llm = LLM(**engine_args)
engine_args.tensor_parallel_size = tensor_parallel_size
llm = LLM.from_engine_args(engine_args)
sampling_params = (
SamplingParams(

View File

@@ -10,12 +10,11 @@ on HuggingFace model repository.
"""
import argparse
from dataclasses import asdict
from pathlib import Path
from PIL.Image import Image
from vllm import LLM, EngineArgs
from vllm import LLM
from vllm.multimodal.utils import fetch_image
from vllm.utils.print_utils import print_embeddings
@@ -28,14 +27,13 @@ multi_modal_data = {"image": fetch_image(image_url)}
def run_clip(seed: int):
engine_args = EngineArgs(
llm = LLM(
model="openai/clip-vit-base-patch32",
runner="pooling",
limit_mm_per_prompt={"image": 1},
seed=seed,
)
llm = LLM(**asdict(engine_args) | {"seed": seed})
print("Text embedding output:")
outputs = llm.embed(text, use_tqdm=False)
print_embeddings(outputs[0].outputs.embedding)
@@ -53,15 +51,14 @@ def run_clip(seed: int):
def run_e5_v(seed: int):
engine_args = EngineArgs(
llm = LLM(
model="royokong/e5-v",
runner="pooling",
max_model_len=4096,
limit_mm_per_prompt={"image": 1},
seed=seed,
)
llm = LLM(**asdict(engine_args) | {"seed": seed})
llama3_template = "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n" # noqa: E501
print("Text embedding output:")
@@ -108,20 +105,20 @@ def run_qwen3_vl(seed: int):
multi_modal_data["image"] = post_process_image(multi_modal_data["image"])
engine_args = EngineArgs(
model="Qwen/Qwen3-VL-Embedding-2B",
runner="pooling",
max_model_len=8192,
limit_mm_per_prompt={"image": 1},
mm_processor_kwargs={"do_resize": False} if smart_resize is not None else None,
)
default_instruction = "Represent the user's input."
image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
prompt_text = f"<|im_start|>system\n{default_instruction}<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n"
prompt_image = f"<|im_start|>system\n{default_instruction}<|im_end|>\n<|im_start|>user\n{image_placeholder}<|im_end|>\n<|im_start|>assistant\n"
prompt_image_text = f"<|im_start|>system\n{default_instruction}<|im_end|>\n<|im_start|>user\n{image_placeholder}{text}<|im_end|>\n<|im_start|>assistant\n"
llm = LLM(**asdict(engine_args) | {"seed": seed})
llm = LLM(
model="Qwen/Qwen3-VL-Embedding-2B",
runner="pooling",
max_model_len=8192,
limit_mm_per_prompt={"image": 1},
mm_processor_kwargs={"do_resize": False} if smart_resize is not None else None,
seed=seed,
)
print("Text embedding output:")
outputs = llm.embed(prompt_text, use_tqdm=False)
@@ -149,14 +146,13 @@ def run_qwen3_vl(seed: int):
def run_siglip(seed: int):
engine_args = EngineArgs(
llm = LLM(
model="google/siglip-base-patch16-224",
runner="pooling",
limit_mm_per_prompt={"image": 1},
seed=seed,
)
llm = LLM(**asdict(engine_args) | {"seed": seed})
print("Text embedding output:")
outputs = llm.embed(text, use_tqdm=False)
print_embeddings(outputs[0].outputs.embedding)
@@ -174,16 +170,15 @@ def run_siglip(seed: int):
def run_vlm2vec_phi3v(seed: int):
engine_args = EngineArgs(
llm = LLM(
model="TIGER-Lab/VLM2Vec-Full",
runner="pooling",
max_model_len=4096,
trust_remote_code=True,
mm_processor_kwargs={"num_crops": 4},
limit_mm_per_prompt={"image": 1},
seed=seed,
)
llm = LLM(**asdict(engine_args) | {"seed": seed})
image_token = "<|image_1|>"
print("Text embedding output:")
@@ -259,7 +254,7 @@ def run_vlm2vec_qwen2vl(seed: int):
processor.save_pretrained(merged_path)
print("Done!")
engine_args = EngineArgs(
llm = LLM(
model=merged_path,
runner="pooling",
max_model_len=4096,
@@ -268,9 +263,8 @@ def run_vlm2vec_qwen2vl(seed: int):
"max_pixels": 12845056,
},
limit_mm_per_prompt={"image": 1},
seed=seed,
)
llm = LLM(**asdict(engine_args) | {"seed": seed})
image_token = "<|image_pad|>"
print("Text embedding output:")

View File

@@ -10,7 +10,6 @@ multimodal documents (text + images/videos).
from argparse import Namespace
from collections.abc import Callable
from dataclasses import asdict
from pathlib import Path
from typing import NamedTuple
@@ -125,7 +124,7 @@ def main(args: Namespace):
model_request = model_example_map[args.model_name]()
engine_args = model_request.engine_args
llm = LLM(**asdict(engine_args))
llm = LLM.from_engine_args(engine_args)
print("Query: string & Document: string")
outputs = llm.score(query, document)