[Model] Adding Granite MoE. (#8206)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
2024-10-02 21:33:57 -04:00
parent f58d4fccc9
commit 19f0d25796
4 changed files with 492 additions and 3 deletions
--- a/vllm/model_executor/models/granite.py
+++ b/vllm/model_executor/models/granite.py
@@ -404,9 +404,12 @@ class GraniteForCausalLM(nn.Module, SupportsLoRA):
                self.lm_head.weight = self.model.embed_tokens.weight

            logit_scale = getattr(config, "logit_scale", 1.0)
+
+            if hasattr(config, "logits_scaling"):
+                logit_scale /= config.logits_scaling
            self.logits_processor = LogitsProcessor(self.unpadded_vocab_size,
                                                    config.vocab_size,
-                                                    logit_scale)
+                                                    scale=logit_scale)
            self.sampler = Sampler()
        else:
            self.lm_head = PPMissingLayer()
@@ -428,8 +431,6 @@ class GraniteForCausalLM(nn.Module, SupportsLoRA):
            sampling_metadata: SamplingMetadata) -> Optional[torch.Tensor]:
        logits = self.logits_processor(self.lm_head, hidden_states,
                                       sampling_metadata)
-        if logits is not None:
-            logits /= self.config.logits_scaling
        return logits

    def sample(