[Doc] Explicitly state that PP isn't compatible with speculative decoding yet (#10975)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -400,16 +400,17 @@ class GraniteForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
|
||||
self.lm_head.weight = self.model.embed_tokens.weight
|
||||
|
||||
logit_scale = getattr(config, "logit_scale", 1.0)
|
||||
|
||||
if hasattr(config, "logits_scaling"):
|
||||
logit_scale /= config.logits_scaling
|
||||
|
||||
self.logits_processor = LogitsProcessor(self.unpadded_vocab_size,
|
||||
config.vocab_size,
|
||||
scale=logit_scale)
|
||||
self.sampler = get_sampler()
|
||||
else:
|
||||
self.lm_head = PPMissingLayer()
|
||||
|
||||
self.sampler = get_sampler()
|
||||
|
||||
def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
|
||||
return self.model.get_input_embeddings(input_ids)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user