remove GLM-4.5 quantization wrong Code (#21435)

This commit is contained in:
Yuxuan Zhang
2025-07-24 16:52:43 +08:00
committed by GitHub
parent 610852a423
commit 85bda9e7d0
3 changed files with 2 additions and 3 deletions

View File

@@ -390,7 +390,6 @@ class Glm4MoeModel(nn.Module):
self.embed_tokens = VocabParallelEmbedding(
config.vocab_size,
config.hidden_size,
quant_config=quant_config,
prefix=f"{prefix}.embed_tokens")
else:
self.embed_tokens = PPMissingLayer()