diff --git a/benchmarks/benchmark_long_document_qa_throughput.py b/benchmarks/benchmark_long_document_qa_throughput.py index f64fd09ba..b50b310fd 100644 --- a/benchmarks/benchmark_long_document_qa_throughput.py +++ b/benchmarks/benchmark_long_document_qa_throughput.py @@ -40,9 +40,9 @@ LLM engine. You can refer to the `vllm.engine.arg_utils.EngineArgs` for more details. """ -import dataclasses import random import time +from dataclasses import fields from vllm import LLM, SamplingParams from vllm.engine.arg_utils import EngineArgs @@ -124,7 +124,7 @@ def main(args): # Create the LLM engine engine_args = EngineArgs.from_cli_args(args) - llm = LLM(**dataclasses.asdict(engine_args)) + llm = LLM(**{f.name: getattr(engine_args, f.name) for f in fields(engine_args)}) sampling_params = SamplingParams(temperature=0, max_tokens=args.output_len) print("------warm up------") diff --git a/benchmarks/benchmark_prefix_caching.py b/benchmarks/benchmark_prefix_caching.py index e6391134f..e7759616e 100644 --- a/benchmarks/benchmark_prefix_caching.py +++ b/benchmarks/benchmark_prefix_caching.py @@ -32,6 +32,7 @@ import dataclasses import json import random import time +from dataclasses import fields from transformers import PreTrainedTokenizerBase @@ -196,7 +197,7 @@ def main(args): engine_args = EngineArgs.from_cli_args(args) - llm = LLM(**dataclasses.asdict(engine_args)) + llm = LLM(**{f.name: getattr(engine_args, f.name) for f in fields(engine_args)}) sampling_params = SamplingParams( temperature=0, diff --git a/benchmarks/benchmark_prioritization.py b/benchmarks/benchmark_prioritization.py index a35db0063..d83bb7e17 100644 --- a/benchmarks/benchmark_prioritization.py +++ b/benchmarks/benchmark_prioritization.py @@ -3,10 +3,10 @@ """Benchmark offline prioritization.""" import argparse -import dataclasses import json import random import time +from dataclasses import fields from transformers import AutoTokenizer, PreTrainedTokenizerBase @@ -79,7 +79,7 @@ def run_vllm( ) -> float: from vllm import LLM, SamplingParams - llm = LLM(**dataclasses.asdict(engine_args)) + llm = LLM(**{f.name: getattr(engine_args, f.name) for f in fields(engine_args)}) assert all( llm.llm_engine.model_config.max_model_len >= (request[1] + request[2]) diff --git a/vllm/benchmarks/latency.py b/vllm/benchmarks/latency.py index a9d149666..758e5efed 100644 --- a/vllm/benchmarks/latency.py +++ b/vllm/benchmarks/latency.py @@ -3,10 +3,10 @@ """Benchmark the latency of processing a single batch of requests.""" import argparse -import dataclasses import json import os import time +from dataclasses import fields from typing import Any import numpy as np @@ -85,7 +85,7 @@ def main(args: argparse.Namespace): # NOTE(woosuk): If the request cannot be processed in a single batch, # the engine will automatically process the request in multiple batches. - llm = LLM(**dataclasses.asdict(engine_args)) + llm = LLM(**{f.name: getattr(engine_args, f.name) for f in fields(engine_args)}) assert llm.llm_engine.model_config.max_model_len >= ( args.input_len + args.output_len ), ( diff --git a/vllm/benchmarks/mm_processor.py b/vllm/benchmarks/mm_processor.py index 5900bbf99..4f31af0e0 100644 --- a/vllm/benchmarks/mm_processor.py +++ b/vllm/benchmarks/mm_processor.py @@ -14,10 +14,10 @@ Run: """ import argparse -import dataclasses import json import time from collections import defaultdict +from dataclasses import fields from datetime import datetime from typing import TYPE_CHECKING, Any, Literal @@ -225,7 +225,7 @@ def benchmark_multimodal_processor( args.seed = 0 engine_args = EngineArgs.from_cli_args(args) - llm = LLM(**dataclasses.asdict(engine_args)) + llm = LLM(**{f.name: getattr(engine_args, f.name) for f in fields(engine_args)}) tokenizer = llm.get_tokenizer() requests = get_requests(args, tokenizer) diff --git a/vllm/benchmarks/startup.py b/vllm/benchmarks/startup.py index 005625f61..405299938 100644 --- a/vllm/benchmarks/startup.py +++ b/vllm/benchmarks/startup.py @@ -9,7 +9,6 @@ and cache operations) for both cold and warm scenarios: """ import argparse -import dataclasses import json import multiprocessing import os @@ -17,6 +16,7 @@ import shutil import tempfile import time from contextlib import contextmanager +from dataclasses import fields from typing import Any import numpy as np @@ -67,7 +67,7 @@ def run_startup_in_subprocess(engine_args, result_queue): # Measure total startup time start_time = time.perf_counter() - llm = LLM(**dataclasses.asdict(engine_args)) + llm = LLM(**{f.name: getattr(engine_args, f.name) for f in fields(engine_args)}) total_startup_time = time.perf_counter() - start_time diff --git a/vllm/benchmarks/sweep/serve_workload.py b/vllm/benchmarks/sweep/serve_workload.py index ca7ba09a5..a47668ff1 100644 --- a/vllm/benchmarks/sweep/serve_workload.py +++ b/vllm/benchmarks/sweep/serve_workload.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import argparse import math -from dataclasses import asdict, dataclass +from dataclasses import dataclass, fields from pathlib import Path from typing import ClassVar, Literal, get_args @@ -267,7 +267,7 @@ class SweepServeWorkloadArgs(SweepServeArgs): base_args = SweepServeArgs.from_cli_args(args) return cls( - **asdict(base_args), + **{f.name: getattr(base_args, f.name) for f in fields(base_args)}, workload_var=args.workload_var, workload_iters=args.workload_iters, ) diff --git a/vllm/benchmarks/throughput.py b/vllm/benchmarks/throughput.py index 1af8cf900..4c6379d67 100644 --- a/vllm/benchmarks/throughput.py +++ b/vllm/benchmarks/throughput.py @@ -3,12 +3,12 @@ """Benchmark offline inference throughput.""" import argparse -import dataclasses import json import os import random import time import warnings +from dataclasses import fields from typing import Any import torch @@ -53,7 +53,7 @@ def run_vllm( ) -> tuple[float, list[RequestOutput] | None]: from vllm import LLM, SamplingParams - llm = LLM(**dataclasses.asdict(engine_args)) + llm = LLM(**{f.name: getattr(engine_args, f.name) for f in fields(engine_args)}) assert all( llm.llm_engine.model_config.max_model_len >= (request.prompt_len + request.expected_output_len) @@ -141,7 +141,7 @@ def run_vllm_chat( """ from vllm import LLM, SamplingParams - llm = LLM(**dataclasses.asdict(engine_args)) + llm = LLM(**{f.name: getattr(engine_args, f.name) for f in fields(engine_args)}) assert all( llm.llm_engine.model_config.max_model_len