Improve configs - ModelConfig (#17130)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-04-30 03:38:22 +01:00
committed by GitHub
parent 2c4f59afc3
commit 13698db634
36 changed files with 490 additions and 648 deletions

View File

@@ -13,7 +13,7 @@ from typing_extensions import TypeVar, deprecated
from vllm.beam_search import (BeamSearchInstance, BeamSearchOutput,
BeamSearchSequence, get_beam_search_score)
from vllm.config import CompilationConfig
from vllm.config import CompilationConfig, ModelDType, TokenizerMode
from vllm.engine.arg_utils import (EngineArgs, HfOverrides, PoolerConfig,
TaskOption)
from vllm.engine.llm_engine import LLMEngine
@@ -32,6 +32,7 @@ from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.model_executor.guided_decoding.guided_fields import (
GuidedDecodingRequest, LLMGuidedOptions)
from vllm.model_executor.layers.quantization import QuantizationMethods
from vllm.outputs import (ClassificationRequestOutput, EmbeddingRequestOutput,
PoolingRequestOutput, RequestOutput,
ScoringRequestOutput)
@@ -163,20 +164,20 @@ class LLM:
self,
model: str,
tokenizer: Optional[str] = None,
tokenizer_mode: str = "auto",
tokenizer_mode: TokenizerMode = "auto",
skip_tokenizer_init: bool = False,
trust_remote_code: bool = False,
allowed_local_media_path: str = "",
tensor_parallel_size: int = 1,
dtype: str = "auto",
quantization: Optional[str] = None,
dtype: ModelDType = "auto",
quantization: Optional[QuantizationMethods] = None,
revision: Optional[str] = None,
tokenizer_revision: Optional[str] = None,
seed: Optional[int] = None,
gpu_memory_utilization: float = 0.9,
swap_space: float = 4,
cpu_offload_gb: float = 0,
enforce_eager: Optional[bool] = None,
enforce_eager: bool = False,
max_seq_len_to_capture: int = 8192,
disable_custom_all_reduce: bool = False,
disable_async_output_proc: bool = False,
@@ -189,12 +190,7 @@ class LLM:
compilation_config: Optional[Union[int, dict[str, Any]]] = None,
**kwargs,
) -> None:
'''
LLM constructor.
Note: if enforce_eager is unset (enforce_eager is None)
it defaults to False.
'''
"""LLM constructor."""
if "disable_log_stats" not in kwargs:
kwargs["disable_log_stats"] = True