Signed-off-by: Xingyu Liu <charlotteliu12x@gmail.com> Signed-off-by: Xingyu Liu <38244988+charlotte12l@users.noreply.github.com>
58 lines
1.6 KiB
Python
58 lines
1.6 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
from typing import Any
|
|
|
|
from pydantic import ConfigDict
|
|
from pydantic.dataclasses import dataclass
|
|
|
|
from vllm.logger import init_logger
|
|
|
|
logger = init_logger(__name__)
|
|
|
|
|
|
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|
|
class ModelArchitectureConfig:
|
|
"""
|
|
Configuration for model architecture that required by vLLM runtime
|
|
"""
|
|
|
|
architectures: list[str] | None
|
|
"""List of model architecture class names (e.g., ['LlamaForCausalLM']).
|
|
It can be None upon calling `vllm_config.with_hf_config(config.text_config)`"""
|
|
|
|
model_type: str
|
|
"""Model type identifier (e.g., 'llama', 'gpt_oss')."""
|
|
|
|
text_model_type: str | None
|
|
"""Text model type identifier (e.g., 'llama4_text')."""
|
|
|
|
hidden_size: int
|
|
"""Hidden size of the model."""
|
|
|
|
total_num_hidden_layers: int
|
|
"""Number of hidden layers in the model."""
|
|
|
|
total_num_attention_heads: int
|
|
"""Number of attention heads in the model."""
|
|
|
|
head_size: int
|
|
"""Head dimension of the model."""
|
|
|
|
vocab_size: int
|
|
"""Vocabulary size of the model."""
|
|
|
|
total_num_kv_heads: int
|
|
"""Number of key value heads in the model."""
|
|
|
|
num_experts: int
|
|
"""Number of experts in the model."""
|
|
|
|
quantization_config: dict[str, Any] | None
|
|
"""Quantization configuration dictionary containing quantization parameters."""
|
|
|
|
is_deepseek_mla: bool
|
|
"""Whether the model is a DeepSeek MLA model."""
|
|
|
|
derived_max_model_len_and_key: tuple[float, str | None]
|
|
"""Derived maximum model length and key from the hf config."""
|