remove GLM-4.5 quantization wrong Code (#21435)

2025-07-24 16:52:43 +08:00
parent 610852a423
commit 85bda9e7d0
3 changed files with 2 additions and 3 deletions
--- a/vllm/model_executor/models/glm4_moe.py
+++ b/vllm/model_executor/models/glm4_moe.py
@@ -390,7 +390,6 @@ class Glm4MoeModel(nn.Module):
            self.embed_tokens = VocabParallelEmbedding(
                config.vocab_size,
                config.hidden_size,
-                quant_config=quant_config,
                prefix=f"{prefix}.embed_tokens")
        else:
            self.embed_tokens = PPMissingLayer()