[Misc] Remove _maybe_ignore_quant_config from GLM4.1v (#20432)
Some checks failed
Create Release / Create Release (push) Has been cancelled
Some checks failed
Create Release / Create Release (push) Has been cancelled
Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com>
This commit is contained in:
@@ -55,9 +55,6 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear,
|
|||||||
QKVParallelLinear,
|
QKVParallelLinear,
|
||||||
RowParallelLinear)
|
RowParallelLinear)
|
||||||
from vllm.model_executor.layers.quantization import QuantizationConfig
|
from vllm.model_executor.layers.quantization import QuantizationConfig
|
||||||
from vllm.model_executor.layers.quantization.gptq import GPTQConfig
|
|
||||||
from vllm.model_executor.layers.quantization.gptq_marlin import (
|
|
||||||
GPTQMarlinConfig)
|
|
||||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
||||||
from vllm.model_executor.models.module_mapping import MultiModelKeys
|
from vllm.model_executor.models.module_mapping import MultiModelKeys
|
||||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||||
@@ -179,6 +176,7 @@ class Glm4vVisionMLP(nn.Module):
|
|||||||
hidden_features: int,
|
hidden_features: int,
|
||||||
bias: bool = False,
|
bias: bool = False,
|
||||||
quant_config: Optional[QuantizationConfig] = None,
|
quant_config: Optional[QuantizationConfig] = None,
|
||||||
|
prefix: str = "",
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.gate_up_proj = MergedColumnParallelLinear(
|
self.gate_up_proj = MergedColumnParallelLinear(
|
||||||
@@ -186,13 +184,12 @@ class Glm4vVisionMLP(nn.Module):
|
|||||||
output_sizes=[hidden_features] * 2,
|
output_sizes=[hidden_features] * 2,
|
||||||
bias=bias,
|
bias=bias,
|
||||||
quant_config=quant_config,
|
quant_config=quant_config,
|
||||||
)
|
prefix=f"{prefix}.gate_up_proj")
|
||||||
self.down_proj = RowParallelLinear(
|
self.down_proj = RowParallelLinear(hidden_features,
|
||||||
hidden_features,
|
|
||||||
in_features,
|
in_features,
|
||||||
bias=bias,
|
bias=bias,
|
||||||
quant_config=quant_config,
|
quant_config=quant_config,
|
||||||
)
|
prefix=f"{prefix}.down_proj")
|
||||||
self.act_fn = SiluAndMul()
|
self.act_fn = SiluAndMul()
|
||||||
|
|
||||||
def forward(self, x: torch.Tensor):
|
def forward(self, x: torch.Tensor):
|
||||||
@@ -407,6 +404,7 @@ class Glm4vVisionBlock(nn.Module):
|
|||||||
mlp_hidden_dim,
|
mlp_hidden_dim,
|
||||||
bias=False,
|
bias=False,
|
||||||
quant_config=quant_config,
|
quant_config=quant_config,
|
||||||
|
prefix=f"{prefix}.mlp",
|
||||||
)
|
)
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
@@ -1278,7 +1276,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
|
|||||||
self.visual = Glm4vVisionTransformer(
|
self.visual = Glm4vVisionTransformer(
|
||||||
config.vision_config,
|
config.vision_config,
|
||||||
norm_eps=getattr(config, "rms_norm_eps", 1e-5),
|
norm_eps=getattr(config, "rms_norm_eps", 1e-5),
|
||||||
quant_config=self._maybe_ignore_quant_config(quant_config),
|
quant_config=quant_config,
|
||||||
prefix=maybe_prefix(prefix, "visual"),
|
prefix=maybe_prefix(prefix, "visual"),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1291,13 +1289,6 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
|
|||||||
self.make_empty_intermediate_tensors = (
|
self.make_empty_intermediate_tensors = (
|
||||||
self.language_model.make_empty_intermediate_tensors)
|
self.language_model.make_empty_intermediate_tensors)
|
||||||
|
|
||||||
def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig):
|
|
||||||
# GPTQ configs do not have a list of ignored modules, however AutoGPTQ
|
|
||||||
# seems to avoid vision encoder sections for some models.
|
|
||||||
if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)):
|
|
||||||
return None
|
|
||||||
return quant_config
|
|
||||||
|
|
||||||
def _validate_and_reshape_mm_tensor(self, mm_input: object,
|
def _validate_and_reshape_mm_tensor(self, mm_input: object,
|
||||||
name: str) -> torch.Tensor:
|
name: str) -> torch.Tensor:
|
||||||
if not isinstance(mm_input, (torch.Tensor, list)):
|
if not isinstance(mm_input, (torch.Tensor, list)):
|
||||||
|
|||||||
Reference in New Issue
Block a user