[Speculative Decoding] Medusa Implementation with Top-1 proposer (#4978)

This commit is contained in:
Abhinav Goyal
2024-07-10 07:04:02 +05:30
committed by GitHub
parent d3a245138a
commit 2416b26e11
9 changed files with 587 additions and 4 deletions

View File

@@ -64,6 +64,7 @@ _GENERATION_MODELS = {
"ArcticForCausalLM": ("arctic", "ArcticForCausalLM"),
"XverseForCausalLM": ("xverse", "XverseForCausalLM"),
"Phi3SmallForCausalLM": ("phi3_small", "Phi3SmallForCausalLM"),
"MedusaModel": ("medusa", "Medusa"),
"MLPSpeculatorPreTrainedModel": ("mlp_speculator", "MLPSpeculator"),
"JambaForCausalLM": ("jamba", "JambaForCausalLM")
}