From 85bda9e7d05371af6bb9d0052b1eb2f85d3cde29 Mon Sep 17 00:00:00 2001 From: Yuxuan Zhang <2448370773@qq.com> Date: Thu, 24 Jul 2025 16:52:43 +0800 Subject: [PATCH] remove GLM-4.5 quantization wrong Code (#21435) --- vllm/entrypoints/openai/tool_parsers/glm4_moe_tool_parser.py | 2 +- vllm/model_executor/models/glm4_moe.py | 1 - vllm/reasoning/glm4_moe_reasoning_parser.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/openai/tool_parsers/glm4_moe_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/glm4_moe_tool_parser.py index c3f9d7923..40cdf7275 100644 --- a/vllm/entrypoints/openai/tool_parsers/glm4_moe_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/glm4_moe_tool_parser.py @@ -20,7 +20,7 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer logger = init_logger(__name__) -@ToolParserManager.register_module("glm4_moe") +@ToolParserManager.register_module("glm45") class Glm4MoeModelToolParser(ToolParser): def __init__(self, tokenizer: AnyTokenizer): diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py index bdca293d2..095bfbc40 100644 --- a/vllm/model_executor/models/glm4_moe.py +++ b/vllm/model_executor/models/glm4_moe.py @@ -390,7 +390,6 @@ class Glm4MoeModel(nn.Module): self.embed_tokens = VocabParallelEmbedding( config.vocab_size, config.hidden_size, - quant_config=quant_config, prefix=f"{prefix}.embed_tokens") else: self.embed_tokens = PPMissingLayer() diff --git a/vllm/reasoning/glm4_moe_reasoning_parser.py b/vllm/reasoning/glm4_moe_reasoning_parser.py index 6511fb49d..460e38d2d 100644 --- a/vllm/reasoning/glm4_moe_reasoning_parser.py +++ b/vllm/reasoning/glm4_moe_reasoning_parser.py @@ -14,7 +14,7 @@ from vllm.reasoning import ReasoningParser, ReasoningParserManager logger = init_logger(__name__) -@ReasoningParserManager.register_module("glm4_moe") +@ReasoningParserManager.register_module("glm45") class Glm4MoeModelReasoningParser(ReasoningParser): """ Reasoning parser for the Glm4MoeModel model.