[Misc] Remove redundant config definitions (#21891)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -8,6 +8,7 @@ from typing import Optional
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
from vllm.attention.layer import MultiHeadAttention
|
||||
from vllm.distributed import get_tensor_model_parallel_world_size
|
||||
@@ -20,13 +21,12 @@ from vllm.model_executor.layers.linear import (MergedColumnParallelLinear,
|
||||
from vllm.model_executor.layers.quantization.base_config import (
|
||||
QuantizationConfig)
|
||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
||||
from vllm.transformers_utils.configs.ovis import AIMv2Config
|
||||
|
||||
|
||||
class AIMv2SwiGLUFFN(nn.Module):
|
||||
|
||||
def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
|
||||
prefix: str):
|
||||
def __init__(self, config: PretrainedConfig,
|
||||
quant_config: QuantizationConfig, prefix: str):
|
||||
super().__init__()
|
||||
hidden_features = config.intermediate_size
|
||||
in_features = config.hidden_size
|
||||
@@ -57,7 +57,7 @@ class AIMv2SwiGLUFFN(nn.Module):
|
||||
|
||||
class AIMv2PatchEmbed(nn.Module):
|
||||
|
||||
def __init__(self, config: AIMv2Config):
|
||||
def __init__(self, config: PretrainedConfig):
|
||||
super().__init__()
|
||||
self.proj = nn.Conv2d(
|
||||
config.num_channels,
|
||||
@@ -75,7 +75,7 @@ class AIMv2PatchEmbed(nn.Module):
|
||||
|
||||
class AIMv2ViTPreprocessor(nn.Module):
|
||||
|
||||
def __init__(self, config: AIMv2Config):
|
||||
def __init__(self, config: PretrainedConfig):
|
||||
super().__init__()
|
||||
num_patches = (config.image_size // config.patch_size)**2
|
||||
|
||||
@@ -93,8 +93,8 @@ class AIMv2ViTPreprocessor(nn.Module):
|
||||
|
||||
class AIMv2Attention(nn.Module):
|
||||
|
||||
def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
|
||||
prefix: str):
|
||||
def __init__(self, config: PretrainedConfig,
|
||||
quant_config: QuantizationConfig, prefix: str):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
self.embed_dim = config.hidden_size
|
||||
@@ -141,8 +141,8 @@ class AIMv2Attention(nn.Module):
|
||||
|
||||
class AIMv2Block(nn.Module):
|
||||
|
||||
def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
|
||||
prefix: str):
|
||||
def __init__(self, config: PretrainedConfig,
|
||||
quant_config: QuantizationConfig, prefix: str):
|
||||
super().__init__()
|
||||
self.attn = AIMv2Attention(config,
|
||||
quant_config=quant_config,
|
||||
@@ -163,7 +163,7 @@ class AIMv2Transformer(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: AIMv2Config,
|
||||
config: PretrainedConfig,
|
||||
quant_config: QuantizationConfig,
|
||||
*,
|
||||
require_post_norm: Optional[bool] = None,
|
||||
@@ -193,7 +193,7 @@ class AIMv2Transformer(nn.Module):
|
||||
class AIMv2Model(torch.nn.Module):
|
||||
|
||||
def __init__(self,
|
||||
config: AIMv2Config,
|
||||
config: PretrainedConfig,
|
||||
quant_config: QuantizationConfig,
|
||||
*,
|
||||
require_post_norm: Optional[bool] = None,
|
||||
|
||||
@@ -6,6 +6,7 @@ from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
from vllm.attention import Attention
|
||||
from vllm.config import CacheConfig, VllmConfig
|
||||
@@ -24,7 +25,6 @@ from vllm.model_executor.model_loader.weight_utils import (
|
||||
default_weight_loader, maybe_remap_kv_scale_name)
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.configs.dbrx import DbrxConfig
|
||||
|
||||
from .interfaces import SupportsPP
|
||||
from .utils import (AutoWeightsLoader, is_pp_missing_parameter,
|
||||
@@ -39,7 +39,7 @@ class DbrxRouter(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: DbrxConfig,
|
||||
config: PretrainedConfig,
|
||||
params_dtype: Optional[torch.dtype] = None,
|
||||
):
|
||||
super().__init__()
|
||||
@@ -63,7 +63,7 @@ class DbrxExperts(FusedMoE):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: DbrxConfig,
|
||||
config: PretrainedConfig,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
params_dtype: Optional[torch.dtype] = None,
|
||||
prefix: str = "",
|
||||
@@ -138,7 +138,7 @@ class DbrxMoE(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: DbrxConfig,
|
||||
config: PretrainedConfig,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
params_dtype: Optional[torch.dtype] = None,
|
||||
prefix: str = "",
|
||||
@@ -169,7 +169,7 @@ class DbrxAttention(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: DbrxConfig,
|
||||
config: PretrainedConfig,
|
||||
cache_config: Optional[CacheConfig] = None,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
prefix: str = "",
|
||||
@@ -249,7 +249,7 @@ class DbrxFusedNormAttention(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: DbrxConfig,
|
||||
config: PretrainedConfig,
|
||||
cache_config: Optional[CacheConfig] = None,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
prefix: str = "",
|
||||
@@ -284,7 +284,7 @@ class DbrxBlock(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: DbrxConfig,
|
||||
config: PretrainedConfig,
|
||||
cache_config: Optional[CacheConfig] = None,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
prefix: str = "",
|
||||
|
||||
@@ -30,6 +30,7 @@ from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
from vllm.attention import Attention
|
||||
from vllm.compilation.decorators import support_torch_compile
|
||||
@@ -49,7 +50,6 @@ from vllm.model_executor.model_loader.weight_utils import (
|
||||
default_weight_loader, maybe_remap_kv_scale_name)
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.configs.exaone import ExaoneConfig
|
||||
|
||||
from .interfaces import SupportsLoRA, SupportsPP
|
||||
from .utils import (AutoWeightsLoader, PPMissingLayer, is_pp_missing_parameter,
|
||||
@@ -99,7 +99,7 @@ class ExaoneAttention(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: ExaoneConfig,
|
||||
config: PretrainedConfig,
|
||||
hidden_size: int,
|
||||
num_heads: int,
|
||||
num_kv_heads: int,
|
||||
@@ -194,7 +194,7 @@ class ExaoneBlockAttention(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: ExaoneConfig,
|
||||
config: PretrainedConfig,
|
||||
hidden_size: int,
|
||||
num_heads: int,
|
||||
num_kv_heads: int,
|
||||
@@ -236,7 +236,7 @@ class ExaoneDecoderLayer(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: ExaoneConfig,
|
||||
config: PretrainedConfig,
|
||||
cache_config: Optional[CacheConfig] = None,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
prefix: str = "",
|
||||
|
||||
@@ -26,6 +26,7 @@ from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
from vllm.attention import Attention
|
||||
from vllm.compilation.decorators import support_torch_compile
|
||||
@@ -45,7 +46,6 @@ from vllm.model_executor.model_loader.weight_utils import (
|
||||
default_weight_loader, maybe_remap_kv_scale_name)
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.configs.exaone4 import Exaone4Config
|
||||
|
||||
from .interfaces import SupportsLoRA, SupportsPP
|
||||
from .utils import (AutoWeightsLoader, PPMissingLayer, extract_layer_index,
|
||||
@@ -96,7 +96,7 @@ class Exaone4Attention(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: Exaone4Config,
|
||||
config: PretrainedConfig,
|
||||
hidden_size: int,
|
||||
num_heads: int,
|
||||
num_kv_heads: int,
|
||||
@@ -224,7 +224,7 @@ class Exaone4DecoderLayer(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: Exaone4Config,
|
||||
config: PretrainedConfig,
|
||||
cache_config: Optional[CacheConfig] = None,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
prefix: str = "",
|
||||
|
||||
@@ -980,9 +980,6 @@ class KeyeMultiModalDataParser(MultiModalDataParser):
|
||||
|
||||
class KeyeProcessingInfo(BaseProcessingInfo):
|
||||
|
||||
def get_hf_config(self):
|
||||
return self.ctx.get_hf_config(PretrainedConfig)
|
||||
|
||||
def get_hf_processor(
|
||||
self,
|
||||
*,
|
||||
|
||||
@@ -5,7 +5,7 @@ from typing import Literal, Optional, TypedDict, Union, cast
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from transformers import BatchFeature
|
||||
from transformers import BatchFeature, PretrainedConfig
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.jsontree import json_map_leaves
|
||||
@@ -17,7 +17,6 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.multimodal.inputs import MultiModalFieldConfig
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.configs.minimax_vl_01 import MiniMaxVL01Config
|
||||
|
||||
from .clip import CLIPVisionModel
|
||||
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
|
||||
@@ -90,8 +89,8 @@ class MiniMaxVL01DummyInputsBuilder(LlavaDummyInputsBuilder):
|
||||
|
||||
class MiniMaxVL01ProcessingInfo(LlavaNextProcessingInfo):
|
||||
|
||||
def get_hf_config(self):
|
||||
return self.ctx.get_hf_config(MiniMaxVL01Config)
|
||||
def get_hf_config(self): # Need to override the config type
|
||||
return self.ctx.get_hf_config(PretrainedConfig)
|
||||
|
||||
def get_hf_processor(self, **kwargs: object):
|
||||
hf_processor = self.ctx.get_hf_processor(**kwargs)
|
||||
|
||||
@@ -8,6 +8,7 @@ from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
from vllm.attention import Attention
|
||||
from vllm.compilation.decorators import support_torch_compile
|
||||
@@ -25,7 +26,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
|
||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.configs.mpt import MPTConfig
|
||||
|
||||
from .interfaces import SupportsPP
|
||||
from .utils import (AutoWeightsLoader, is_pp_missing_parameter,
|
||||
@@ -50,7 +50,7 @@ class MPTAttention(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: MPTConfig,
|
||||
config: PretrainedConfig,
|
||||
cache_config: Optional[CacheConfig] = None,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
prefix: str = "",
|
||||
@@ -144,7 +144,7 @@ class MPTMLP(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: MPTConfig,
|
||||
config: PretrainedConfig,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
):
|
||||
super().__init__()
|
||||
@@ -176,7 +176,7 @@ class MPTBlock(nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: MPTConfig,
|
||||
config: PretrainedConfig,
|
||||
cache_config: Optional[CacheConfig] = None,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
prefix: str = "",
|
||||
|
||||
@@ -25,7 +25,7 @@ import torch
|
||||
import torch.nn as nn
|
||||
from torch import Tensor
|
||||
from torch.nn.functional import gumbel_softmax, pad, softmax
|
||||
from transformers import BaseImageProcessor, BatchFeature
|
||||
from transformers import BaseImageProcessor, BatchFeature, PretrainedConfig
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.model_executor.layers.linear import ReplicatedLinear
|
||||
@@ -48,8 +48,6 @@ from vllm.multimodal.processing import (BaseMultiModalProcessor,
|
||||
BaseProcessingInfo, PromptReplacement)
|
||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.configs.ovis import (BaseVisualTokenizerConfig,
|
||||
OvisConfig)
|
||||
from vllm.transformers_utils.processors.ovis import OvisProcessor
|
||||
|
||||
from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
|
||||
@@ -83,7 +81,7 @@ class VisualTokenizer(torch.nn.Module):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: BaseVisualTokenizerConfig,
|
||||
config: PretrainedConfig,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
prefix: str = "",
|
||||
):
|
||||
@@ -107,7 +105,7 @@ class VisualTokenizer(torch.nn.Module):
|
||||
|
||||
def _init_backbone(
|
||||
self,
|
||||
config: BaseVisualTokenizerConfig,
|
||||
config: PretrainedConfig,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
prefix: str = "",
|
||||
) -> nn.Module:
|
||||
@@ -247,9 +245,6 @@ class VisualEmbedding(torch.nn.Embedding):
|
||||
|
||||
class OvisProcessingInfo(BaseProcessingInfo):
|
||||
|
||||
def get_hf_config(self):
|
||||
return self.ctx.get_hf_config(OvisConfig)
|
||||
|
||||
def get_hf_processor(self, **kwargs):
|
||||
return self.ctx.get_hf_processor(
|
||||
OvisProcessor,
|
||||
@@ -417,7 +412,7 @@ class Ovis(nn.Module, SupportsMultiModal, SupportsPP):
|
||||
config = vllm_config.model_config.hf_config
|
||||
quant_config = vllm_config.quant_config
|
||||
|
||||
self.config: OvisConfig = config
|
||||
self.config: PretrainedConfig = config
|
||||
self.llm = init_vllm_registered_model(
|
||||
vllm_config=vllm_config.with_hf_config(config.get_text_config()),
|
||||
prefix=maybe_prefix(prefix, "llm"),
|
||||
|
||||
Reference in New Issue
Block a user