[Mypy] Better fixes for the mypy issues in vllm/config (#37902)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -9,7 +9,6 @@ on HuggingFace model repository.
|
||||
"""
|
||||
|
||||
import os
|
||||
from dataclasses import asdict
|
||||
from typing import Any, NamedTuple
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
@@ -633,7 +632,7 @@ def main(args):
|
||||
req_data.engine_args.limit_mm_per_prompt or {}
|
||||
)
|
||||
|
||||
engine_args = asdict(req_data.engine_args) | {"seed": args.seed}
|
||||
engine_args = vars(req_data.engine_args) | {"seed": args.seed}
|
||||
if args.tensor_parallel_size is not None:
|
||||
engine_args["tensor_parallel_size"] = args.tensor_parallel_size
|
||||
llm = LLM(**engine_args)
|
||||
|
||||
@@ -8,7 +8,6 @@ the explicit/implicit prompt format on enc-dec LMMs for text generation.
|
||||
import os
|
||||
import time
|
||||
from collections.abc import Sequence
|
||||
from dataclasses import asdict
|
||||
from typing import NamedTuple
|
||||
|
||||
from vllm import LLM, EngineArgs, PromptType, SamplingParams
|
||||
@@ -91,13 +90,12 @@ def main(args):
|
||||
req_data = model_example_map[model]()
|
||||
|
||||
# Disable other modalities to save memory
|
||||
engine_args = req_data.engine_args
|
||||
default_limits = {"image": 0, "video": 0, "audio": 0}
|
||||
req_data.engine_args.limit_mm_per_prompt = default_limits | dict(
|
||||
req_data.engine_args.limit_mm_per_prompt or {}
|
||||
)
|
||||
|
||||
engine_args = asdict(req_data.engine_args) | {"seed": args.seed}
|
||||
llm = LLM(**engine_args)
|
||||
limit_mm_per_prompt = default_limits | (engine_args.limit_mm_per_prompt or {})
|
||||
engine_args.limit_mm_per_prompt = limit_mm_per_prompt
|
||||
engine_args.seed = args.seed
|
||||
llm = LLM.from_engine_args(engine_args)
|
||||
|
||||
prompts = req_data.prompts
|
||||
|
||||
|
||||
@@ -20,8 +20,6 @@ python load_sharded_state.py \
|
||||
--max-tokens 50
|
||||
"""
|
||||
|
||||
import dataclasses
|
||||
|
||||
from vllm import LLM, EngineArgs, SamplingParams
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
|
||||
@@ -64,7 +62,7 @@ def main():
|
||||
print(f"Tensor parallel size: {engine_args.tensor_parallel_size}")
|
||||
|
||||
# Load the model using engine args
|
||||
llm = LLM(**dataclasses.asdict(engine_args))
|
||||
llm = LLM.from_engine_args(engine_args)
|
||||
|
||||
# Prepare sampling parameters
|
||||
sampling_params = SamplingParams(
|
||||
|
||||
@@ -21,7 +21,6 @@ llm = LLM(
|
||||
)
|
||||
"""
|
||||
|
||||
import dataclasses
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
@@ -60,7 +59,7 @@ def main(args):
|
||||
if not Path(model_path).is_dir():
|
||||
raise ValueError("model path must be a local directory")
|
||||
# Create LLM instance from arguments
|
||||
llm = LLM(**dataclasses.asdict(engine_args))
|
||||
llm = LLM.from_engine_args(engine_args)
|
||||
# Prepare output directory
|
||||
Path(args.output).mkdir(exist_ok=True)
|
||||
# Dump worker states to output directory
|
||||
|
||||
@@ -11,7 +11,6 @@ on HuggingFace model repository.
|
||||
import os
|
||||
import random
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import asdict
|
||||
from typing import NamedTuple
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
@@ -2434,13 +2433,13 @@ def main(args):
|
||||
req_data.engine_args.limit_mm_per_prompt or {}
|
||||
)
|
||||
|
||||
engine_args = asdict(req_data.engine_args) | {
|
||||
"seed": args.seed,
|
||||
"mm_processor_cache_gb": 0 if args.disable_mm_processor_cache else 4,
|
||||
}
|
||||
engine_args = req_data.engine_args
|
||||
engine_args.seed = args.seed
|
||||
mm_processor_cache_gb = 0 if args.disable_mm_processor_cache else 4
|
||||
engine_args.mm_processor_cache_gb = mm_processor_cache_gb
|
||||
if args.tensor_parallel_size is not None:
|
||||
engine_args["tensor_parallel_size"] = args.tensor_parallel_size
|
||||
llm = LLM(**engine_args)
|
||||
engine_args.tensor_parallel_size = args.tensor_parallel_size
|
||||
llm = LLM.from_engine_args(engine_args)
|
||||
|
||||
# Don't want to check the flag multiple times, so just hijack `prompts`.
|
||||
prompts = (
|
||||
|
||||
@@ -8,7 +8,6 @@ using the chat template defined by the model.
|
||||
|
||||
import os
|
||||
from argparse import Namespace
|
||||
from dataclasses import asdict
|
||||
from typing import NamedTuple
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
@@ -1481,10 +1480,11 @@ def run_generate(
|
||||
):
|
||||
req_data = model_example_map[model](question, image_urls)
|
||||
|
||||
engine_args = asdict(req_data.engine_args) | {"seed": seed}
|
||||
engine_args = req_data.engine_args
|
||||
engine_args.seed = seed
|
||||
if tensor_parallel_size is not None:
|
||||
engine_args["tensor_parallel_size"] = tensor_parallel_size
|
||||
llm = LLM(**engine_args)
|
||||
engine_args.tensor_parallel_size = tensor_parallel_size
|
||||
llm = LLM.from_engine_args(engine_args)
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
temperature=0.0, max_tokens=256, stop_token_ids=req_data.stop_token_ids
|
||||
@@ -1521,10 +1521,11 @@ def run_chat(
|
||||
req_data.engine_args.limit_mm_per_prompt or {}
|
||||
)
|
||||
|
||||
engine_args = asdict(req_data.engine_args) | {"seed": seed}
|
||||
engine_args = req_data.engine_args
|
||||
engine_args.seed = seed
|
||||
if tensor_parallel_size is not None:
|
||||
engine_args["tensor_parallel_size"] = tensor_parallel_size
|
||||
llm = LLM(**engine_args)
|
||||
engine_args.tensor_parallel_size = tensor_parallel_size
|
||||
llm = LLM.from_engine_args(engine_args)
|
||||
|
||||
sampling_params = (
|
||||
SamplingParams(
|
||||
|
||||
@@ -10,12 +10,11 @@ on HuggingFace model repository.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from dataclasses import asdict
|
||||
from pathlib import Path
|
||||
|
||||
from PIL.Image import Image
|
||||
|
||||
from vllm import LLM, EngineArgs
|
||||
from vllm import LLM
|
||||
from vllm.multimodal.utils import fetch_image
|
||||
from vllm.utils.print_utils import print_embeddings
|
||||
|
||||
@@ -28,14 +27,13 @@ multi_modal_data = {"image": fetch_image(image_url)}
|
||||
|
||||
|
||||
def run_clip(seed: int):
|
||||
engine_args = EngineArgs(
|
||||
llm = LLM(
|
||||
model="openai/clip-vit-base-patch32",
|
||||
runner="pooling",
|
||||
limit_mm_per_prompt={"image": 1},
|
||||
seed=seed,
|
||||
)
|
||||
|
||||
llm = LLM(**asdict(engine_args) | {"seed": seed})
|
||||
|
||||
print("Text embedding output:")
|
||||
outputs = llm.embed(text, use_tqdm=False)
|
||||
print_embeddings(outputs[0].outputs.embedding)
|
||||
@@ -53,15 +51,14 @@ def run_clip(seed: int):
|
||||
|
||||
|
||||
def run_e5_v(seed: int):
|
||||
engine_args = EngineArgs(
|
||||
llm = LLM(
|
||||
model="royokong/e5-v",
|
||||
runner="pooling",
|
||||
max_model_len=4096,
|
||||
limit_mm_per_prompt={"image": 1},
|
||||
seed=seed,
|
||||
)
|
||||
|
||||
llm = LLM(**asdict(engine_args) | {"seed": seed})
|
||||
|
||||
llama3_template = "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n" # noqa: E501
|
||||
|
||||
print("Text embedding output:")
|
||||
@@ -108,20 +105,20 @@ def run_qwen3_vl(seed: int):
|
||||
|
||||
multi_modal_data["image"] = post_process_image(multi_modal_data["image"])
|
||||
|
||||
engine_args = EngineArgs(
|
||||
model="Qwen/Qwen3-VL-Embedding-2B",
|
||||
runner="pooling",
|
||||
max_model_len=8192,
|
||||
limit_mm_per_prompt={"image": 1},
|
||||
mm_processor_kwargs={"do_resize": False} if smart_resize is not None else None,
|
||||
)
|
||||
default_instruction = "Represent the user's input."
|
||||
image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
|
||||
prompt_text = f"<|im_start|>system\n{default_instruction}<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n"
|
||||
prompt_image = f"<|im_start|>system\n{default_instruction}<|im_end|>\n<|im_start|>user\n{image_placeholder}<|im_end|>\n<|im_start|>assistant\n"
|
||||
prompt_image_text = f"<|im_start|>system\n{default_instruction}<|im_end|>\n<|im_start|>user\n{image_placeholder}{text}<|im_end|>\n<|im_start|>assistant\n"
|
||||
|
||||
llm = LLM(**asdict(engine_args) | {"seed": seed})
|
||||
llm = LLM(
|
||||
model="Qwen/Qwen3-VL-Embedding-2B",
|
||||
runner="pooling",
|
||||
max_model_len=8192,
|
||||
limit_mm_per_prompt={"image": 1},
|
||||
mm_processor_kwargs={"do_resize": False} if smart_resize is not None else None,
|
||||
seed=seed,
|
||||
)
|
||||
|
||||
print("Text embedding output:")
|
||||
outputs = llm.embed(prompt_text, use_tqdm=False)
|
||||
@@ -149,14 +146,13 @@ def run_qwen3_vl(seed: int):
|
||||
|
||||
|
||||
def run_siglip(seed: int):
|
||||
engine_args = EngineArgs(
|
||||
llm = LLM(
|
||||
model="google/siglip-base-patch16-224",
|
||||
runner="pooling",
|
||||
limit_mm_per_prompt={"image": 1},
|
||||
seed=seed,
|
||||
)
|
||||
|
||||
llm = LLM(**asdict(engine_args) | {"seed": seed})
|
||||
|
||||
print("Text embedding output:")
|
||||
outputs = llm.embed(text, use_tqdm=False)
|
||||
print_embeddings(outputs[0].outputs.embedding)
|
||||
@@ -174,16 +170,15 @@ def run_siglip(seed: int):
|
||||
|
||||
|
||||
def run_vlm2vec_phi3v(seed: int):
|
||||
engine_args = EngineArgs(
|
||||
llm = LLM(
|
||||
model="TIGER-Lab/VLM2Vec-Full",
|
||||
runner="pooling",
|
||||
max_model_len=4096,
|
||||
trust_remote_code=True,
|
||||
mm_processor_kwargs={"num_crops": 4},
|
||||
limit_mm_per_prompt={"image": 1},
|
||||
seed=seed,
|
||||
)
|
||||
|
||||
llm = LLM(**asdict(engine_args) | {"seed": seed})
|
||||
image_token = "<|image_1|>"
|
||||
|
||||
print("Text embedding output:")
|
||||
@@ -259,7 +254,7 @@ def run_vlm2vec_qwen2vl(seed: int):
|
||||
processor.save_pretrained(merged_path)
|
||||
print("Done!")
|
||||
|
||||
engine_args = EngineArgs(
|
||||
llm = LLM(
|
||||
model=merged_path,
|
||||
runner="pooling",
|
||||
max_model_len=4096,
|
||||
@@ -268,9 +263,8 @@ def run_vlm2vec_qwen2vl(seed: int):
|
||||
"max_pixels": 12845056,
|
||||
},
|
||||
limit_mm_per_prompt={"image": 1},
|
||||
seed=seed,
|
||||
)
|
||||
|
||||
llm = LLM(**asdict(engine_args) | {"seed": seed})
|
||||
image_token = "<|image_pad|>"
|
||||
|
||||
print("Text embedding output:")
|
||||
|
||||
@@ -10,7 +10,6 @@ multimodal documents (text + images/videos).
|
||||
|
||||
from argparse import Namespace
|
||||
from collections.abc import Callable
|
||||
from dataclasses import asdict
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple
|
||||
|
||||
@@ -125,7 +124,7 @@ def main(args: Namespace):
|
||||
model_request = model_example_map[args.model_name]()
|
||||
engine_args = model_request.engine_args
|
||||
|
||||
llm = LLM(**asdict(engine_args))
|
||||
llm = LLM.from_engine_args(engine_args)
|
||||
|
||||
print("Query: string & Document: string")
|
||||
outputs = llm.score(query, document)
|
||||
|
||||
Reference in New Issue
Block a user