[Misc] Remove redundant config definitions (#21891)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-07-30 14:54:18 +08:00
committed by GitHub
parent 6f8d261882
commit 2ca5f82c2a
23 changed files with 54 additions and 1910 deletions

View File

@@ -8,6 +8,7 @@ from typing import Optional
import torch
import torch.nn as nn
from transformers import PretrainedConfig
from vllm.attention.layer import MultiHeadAttention
from vllm.distributed import get_tensor_model_parallel_world_size
@@ -20,13 +21,12 @@ from vllm.model_executor.layers.linear import (MergedColumnParallelLinear,
from vllm.model_executor.layers.quantization.base_config import (
QuantizationConfig)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.transformers_utils.configs.ovis import AIMv2Config
class AIMv2SwiGLUFFN(nn.Module):
def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
prefix: str):
def __init__(self, config: PretrainedConfig,
quant_config: QuantizationConfig, prefix: str):
super().__init__()
hidden_features = config.intermediate_size
in_features = config.hidden_size
@@ -57,7 +57,7 @@ class AIMv2SwiGLUFFN(nn.Module):
class AIMv2PatchEmbed(nn.Module):
def __init__(self, config: AIMv2Config):
def __init__(self, config: PretrainedConfig):
super().__init__()
self.proj = nn.Conv2d(
config.num_channels,
@@ -75,7 +75,7 @@ class AIMv2PatchEmbed(nn.Module):
class AIMv2ViTPreprocessor(nn.Module):
def __init__(self, config: AIMv2Config):
def __init__(self, config: PretrainedConfig):
super().__init__()
num_patches = (config.image_size // config.patch_size)**2
@@ -93,8 +93,8 @@ class AIMv2ViTPreprocessor(nn.Module):
class AIMv2Attention(nn.Module):
def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
prefix: str):
def __init__(self, config: PretrainedConfig,
quant_config: QuantizationConfig, prefix: str):
super().__init__()
self.config = config
self.embed_dim = config.hidden_size
@@ -141,8 +141,8 @@ class AIMv2Attention(nn.Module):
class AIMv2Block(nn.Module):
def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
prefix: str):
def __init__(self, config: PretrainedConfig,
quant_config: QuantizationConfig, prefix: str):
super().__init__()
self.attn = AIMv2Attention(config,
quant_config=quant_config,
@@ -163,7 +163,7 @@ class AIMv2Transformer(nn.Module):
def __init__(
self,
config: AIMv2Config,
config: PretrainedConfig,
quant_config: QuantizationConfig,
*,
require_post_norm: Optional[bool] = None,
@@ -193,7 +193,7 @@ class AIMv2Transformer(nn.Module):
class AIMv2Model(torch.nn.Module):
def __init__(self,
config: AIMv2Config,
config: PretrainedConfig,
quant_config: QuantizationConfig,
*,
require_post_norm: Optional[bool] = None,

View File

@@ -6,6 +6,7 @@ from typing import Optional, Union
import torch
import torch.nn as nn
from transformers import PretrainedConfig
from vllm.attention import Attention
from vllm.config import CacheConfig, VllmConfig
@@ -24,7 +25,6 @@ from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader, maybe_remap_kv_scale_name)
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs.dbrx import DbrxConfig
from .interfaces import SupportsPP
from .utils import (AutoWeightsLoader, is_pp_missing_parameter,
@@ -39,7 +39,7 @@ class DbrxRouter(nn.Module):
def __init__(
self,
config: DbrxConfig,
config: PretrainedConfig,
params_dtype: Optional[torch.dtype] = None,
):
super().__init__()
@@ -63,7 +63,7 @@ class DbrxExperts(FusedMoE):
def __init__(
self,
config: DbrxConfig,
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
params_dtype: Optional[torch.dtype] = None,
prefix: str = "",
@@ -138,7 +138,7 @@ class DbrxMoE(nn.Module):
def __init__(
self,
config: DbrxConfig,
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
params_dtype: Optional[torch.dtype] = None,
prefix: str = "",
@@ -169,7 +169,7 @@ class DbrxAttention(nn.Module):
def __init__(
self,
config: DbrxConfig,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
@@ -249,7 +249,7 @@ class DbrxFusedNormAttention(nn.Module):
def __init__(
self,
config: DbrxConfig,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
@@ -284,7 +284,7 @@ class DbrxBlock(nn.Module):
def __init__(
self,
config: DbrxConfig,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",

View File

@@ -30,6 +30,7 @@ from typing import Any, Optional, Union
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
@@ -49,7 +50,6 @@ from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader, maybe_remap_kv_scale_name)
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs.exaone import ExaoneConfig
from .interfaces import SupportsLoRA, SupportsPP
from .utils import (AutoWeightsLoader, PPMissingLayer, is_pp_missing_parameter,
@@ -99,7 +99,7 @@ class ExaoneAttention(nn.Module):
def __init__(
self,
config: ExaoneConfig,
config: PretrainedConfig,
hidden_size: int,
num_heads: int,
num_kv_heads: int,
@@ -194,7 +194,7 @@ class ExaoneBlockAttention(nn.Module):
def __init__(
self,
config: ExaoneConfig,
config: PretrainedConfig,
hidden_size: int,
num_heads: int,
num_kv_heads: int,
@@ -236,7 +236,7 @@ class ExaoneDecoderLayer(nn.Module):
def __init__(
self,
config: ExaoneConfig,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",

View File

@@ -26,6 +26,7 @@ from typing import Any, Optional, Union
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
@@ -45,7 +46,6 @@ from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader, maybe_remap_kv_scale_name)
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs.exaone4 import Exaone4Config
from .interfaces import SupportsLoRA, SupportsPP
from .utils import (AutoWeightsLoader, PPMissingLayer, extract_layer_index,
@@ -96,7 +96,7 @@ class Exaone4Attention(nn.Module):
def __init__(
self,
config: Exaone4Config,
config: PretrainedConfig,
hidden_size: int,
num_heads: int,
num_kv_heads: int,
@@ -224,7 +224,7 @@ class Exaone4DecoderLayer(nn.Module):
def __init__(
self,
config: Exaone4Config,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",

View File

@@ -980,9 +980,6 @@ class KeyeMultiModalDataParser(MultiModalDataParser):
class KeyeProcessingInfo(BaseProcessingInfo):
def get_hf_config(self):
return self.ctx.get_hf_config(PretrainedConfig)
def get_hf_processor(
self,
*,

View File

@@ -5,7 +5,7 @@ from typing import Literal, Optional, TypedDict, Union, cast
import torch
import torch.nn as nn
from transformers import BatchFeature
from transformers import BatchFeature, PretrainedConfig
from vllm.config import VllmConfig
from vllm.jsontree import json_map_leaves
@@ -17,7 +17,6 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import MultiModalFieldConfig
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs.minimax_vl_01 import MiniMaxVL01Config
from .clip import CLIPVisionModel
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
@@ -90,8 +89,8 @@ class MiniMaxVL01DummyInputsBuilder(LlavaDummyInputsBuilder):
class MiniMaxVL01ProcessingInfo(LlavaNextProcessingInfo):
def get_hf_config(self):
return self.ctx.get_hf_config(MiniMaxVL01Config)
def get_hf_config(self): # Need to override the config type
return self.ctx.get_hf_config(PretrainedConfig)
def get_hf_processor(self, **kwargs: object):
hf_processor = self.ctx.get_hf_processor(**kwargs)

View File

@@ -8,6 +8,7 @@ from typing import Optional, Union
import torch
import torch.nn as nn
from transformers import PretrainedConfig
from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
@@ -25,7 +26,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs.mpt import MPTConfig
from .interfaces import SupportsPP
from .utils import (AutoWeightsLoader, is_pp_missing_parameter,
@@ -50,7 +50,7 @@ class MPTAttention(nn.Module):
def __init__(
self,
config: MPTConfig,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
@@ -144,7 +144,7 @@ class MPTMLP(nn.Module):
def __init__(
self,
config: MPTConfig,
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
@@ -176,7 +176,7 @@ class MPTBlock(nn.Module):
def __init__(
self,
config: MPTConfig,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",

View File

@@ -25,7 +25,7 @@ import torch
import torch.nn as nn
from torch import Tensor
from torch.nn.functional import gumbel_softmax, pad, softmax
from transformers import BaseImageProcessor, BatchFeature
from transformers import BaseImageProcessor, BatchFeature, PretrainedConfig
from vllm.config import VllmConfig
from vllm.model_executor.layers.linear import ReplicatedLinear
@@ -48,8 +48,6 @@ from vllm.multimodal.processing import (BaseMultiModalProcessor,
BaseProcessingInfo, PromptReplacement)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs.ovis import (BaseVisualTokenizerConfig,
OvisConfig)
from vllm.transformers_utils.processors.ovis import OvisProcessor
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
@@ -83,7 +81,7 @@ class VisualTokenizer(torch.nn.Module):
def __init__(
self,
config: BaseVisualTokenizerConfig,
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
):
@@ -107,7 +105,7 @@ class VisualTokenizer(torch.nn.Module):
def _init_backbone(
self,
config: BaseVisualTokenizerConfig,
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
) -> nn.Module:
@@ -247,9 +245,6 @@ class VisualEmbedding(torch.nn.Embedding):
class OvisProcessingInfo(BaseProcessingInfo):
def get_hf_config(self):
return self.ctx.get_hf_config(OvisConfig)
def get_hf_processor(self, **kwargs):
return self.ctx.get_hf_processor(
OvisProcessor,
@@ -417,7 +412,7 @@ class Ovis(nn.Module, SupportsMultiModal, SupportsPP):
config = vllm_config.model_config.hf_config
quant_config = vllm_config.quant_config
self.config: OvisConfig = config
self.config: PretrainedConfig = config
self.llm = init_vllm_registered_model(
vllm_config=vllm_config.with_hf_config(config.get_text_config()),
prefix=maybe_prefix(prefix, "llm"),