[Bugfix] support tie_word_embeddings for all models (#5724)

2024-08-19 20:00:04 -07:00
parent 0df7ec0b2d
commit f4fc7337bf
30 changed files with 90 additions and 16 deletions
--- a/vllm/model_executor/models/bloom.py
+++ b/vllm/model_executor/models/bloom.py
@@ -36,7 +36,7 @@ from vllm.model_executor.layers.quantization.base_config import (
    QuantizationConfig)
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
-    VocabParallelEmbedding)
+    ParallelLMHead, VocabParallelEmbedding)
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.sequence import IntermediateTensors, SamplerOutput
@@ -276,7 +276,12 @@ class BloomForCausalLM(nn.Module):
        self.config = config
        self.quant_config = quant_config
        self.transformer = BloomModel(config, cache_config, quant_config)
-        self.lm_head = self.transformer.word_embeddings
+        if self.config.tie_word_embeddings:
+            self.lm_head = self.transformer.word_embeddings
+        else:
+            self.lm_head = ParallelLMHead(self.config.vocab_size,
+                                          self.config.hidden_size)
+
        self.logits_processor = LogitsProcessor(config.vocab_size)
        self.sampler = Sampler()