[Model][Speculative Decoding] Expand DeepSeek MTP code to support k > n_predict (#13626)
Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai>
This commit is contained in:
committed by
GitHub
parent
2e94b9cfbb
commit
9804145cac
@@ -87,7 +87,7 @@ class DeepSeekMultiTokenPredictorLayer(nn.Module):
|
||||
hidden_states=hidden_states,
|
||||
residual=None)
|
||||
hidden_states = residual + hidden_states
|
||||
return self.shared_head(hidden_states)
|
||||
return hidden_states
|
||||
|
||||
|
||||
class DeepSeekMultiTokenPredictor(nn.Module):
|
||||
@@ -121,12 +121,13 @@ class DeepSeekMultiTokenPredictor(nn.Module):
|
||||
inputs_embeds: Optional[torch.Tensor] = None,
|
||||
spec_step_idx: int = 0,
|
||||
) -> torch.Tensor:
|
||||
return self.layers[str(self.mtp_start_layer_idx + spec_step_idx)](
|
||||
current_step_idx = (spec_step_idx % self.num_mtp_layers)
|
||||
return self.layers[str(self.mtp_start_layer_idx + current_step_idx)](
|
||||
input_ids,
|
||||
positions,
|
||||
previous_hidden_states,
|
||||
inputs_embeds,
|
||||
spec_step_idx,
|
||||
current_step_idx,
|
||||
)
|
||||
|
||||
def compute_logits(
|
||||
@@ -135,9 +136,12 @@ class DeepSeekMultiTokenPredictor(nn.Module):
|
||||
sampling_metadata: SamplingMetadata,
|
||||
spec_step_idx: int = 0,
|
||||
) -> torch.Tensor:
|
||||
mtp_layer = self.layers[str(self.mtp_start_layer_idx + spec_step_idx)]
|
||||
current_step_idx = (spec_step_idx % self.num_mtp_layers)
|
||||
mtp_layer = self.layers[str(self.mtp_start_layer_idx +
|
||||
current_step_idx)]
|
||||
logits = self.logits_processor(mtp_layer.shared_head.head,
|
||||
hidden_states, sampling_metadata)
|
||||
mtp_layer.shared_head(hidden_states),
|
||||
sampling_metadata)
|
||||
return logits
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user