for glm-4.1V update (#22000)
Signed-off-by: Isotr0py <2037008807@qq.com> Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -37,8 +37,7 @@ import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from einops import rearrange
|
||||
from transformers import BatchFeature
|
||||
from transformers.models.glm4v.configuration_glm4v import (Glm4vConfig,
|
||||
Glm4vVisionConfig)
|
||||
from transformers.models.glm4v.configuration_glm4v import Glm4vVisionConfig
|
||||
from transformers.models.glm4v.image_processing_glm4v import (
|
||||
Glm4vImageProcessor, smart_resize)
|
||||
from transformers.models.glm4v.video_processing_glm4v import (
|
||||
@@ -801,7 +800,7 @@ class Glm4vVisionTransformer(nn.Module):
|
||||
class Glm4vProcessingInfo(BaseProcessingInfo):
|
||||
|
||||
def get_hf_config(self):
|
||||
return self.ctx.get_hf_config(Glm4vConfig)
|
||||
return self.ctx.get_hf_config()
|
||||
|
||||
def get_tokenizer(self):
|
||||
return self.ctx.tokenizer
|
||||
@@ -1253,7 +1252,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
|
||||
|
||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||
super().__init__()
|
||||
config: Glm4vConfig = vllm_config.model_config.hf_config
|
||||
config = vllm_config.model_config.hf_config
|
||||
quant_config = vllm_config.quant_config
|
||||
multimodal_config = vllm_config.model_config.multimodal_config
|
||||
|
||||
@@ -1267,12 +1266,18 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
|
||||
prefix=maybe_prefix(prefix, "visual"),
|
||||
)
|
||||
|
||||
if config.model_type == "glm4v":
|
||||
architectures = ["Glm4ForCausalLM"]
|
||||
elif config.model_type == "glm4v_moe":
|
||||
architectures = ["Glm4MoeForCausalLM"]
|
||||
else:
|
||||
architectures = None
|
||||
|
||||
self.language_model = init_vllm_registered_model(
|
||||
vllm_config=vllm_config,
|
||||
prefix=maybe_prefix(prefix, ""),
|
||||
architectures=["Glm4ForCausalLM"],
|
||||
hf_config=self.config.get_text_config(),
|
||||
)
|
||||
hf_config=config.text_config,
|
||||
prefix=maybe_prefix(prefix, "language_model"),
|
||||
architectures=architectures)
|
||||
|
||||
self.make_empty_intermediate_tensors = (
|
||||
self.language_model.make_empty_intermediate_tensors)
|
||||
|
||||
@@ -206,6 +206,7 @@ _MULTIMODAL_MODELS = {
|
||||
"Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"), # noqa: E501
|
||||
"GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"),
|
||||
"Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501
|
||||
"Glm4v_moeForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501
|
||||
"GraniteSpeechForConditionalGeneration": ("granite_speech", "GraniteSpeechForConditionalGeneration"), # noqa: E501
|
||||
"H2OVLChatModel": ("h2ovl", "H2OVLChatModel"),
|
||||
"InternVLChatModel": ("internvl", "InternVLChatModel"),
|
||||
|
||||
Reference in New Issue
Block a user