[Doc] [SpecDecode] Update MLPSpeculator documentation (#7100)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
2024-08-06 01:29:43 +02:00
parent dfb1a15dcb
commit 789937af2e
2 changed files with 58 additions and 0 deletions
--- a/vllm/model_executor/models/mlp_speculator.py
+++ b/vllm/model_executor/models/mlp_speculator.py
@@ -56,6 +56,15 @@ class MLPSpeculatorLayerNorm(nn.Module):


 class MLPSpeculator(nn.Module):
+    """
+    An implementation of the speculative models introduced in
+    "Accelerating Production LLMs with Combined Token/Embedding
+    Speculators"
+    https://arxiv.org/pdf/2404.19124
+
+    Trained speculators of this type are available on HF hub at:
+    https://huggingface.co/ibm-fms and https://huggingface.co/ibm-granite
+    """

    def __init__(self, config: MLPSpeculatorConfig, **kwargs) -> None:
        super().__init__()