[Speculative Decoding] Medusa Implementation with Top-1 proposer (#4978)

This commit is contained in:
Abhinav Goyal
2024-07-10 07:04:02 +05:30
committed by GitHub
parent d3a245138a
commit 2416b26e11
9 changed files with 587 additions and 4 deletions

View File

@@ -5,6 +5,7 @@ from vllm.transformers_utils.configs.dbrx import DbrxConfig
# `FalconConfig` class from the official HuggingFace transformers library.
from vllm.transformers_utils.configs.falcon import RWConfig
from vllm.transformers_utils.configs.jais import JAISConfig
from vllm.transformers_utils.configs.medusa import MedusaConfig
from vllm.transformers_utils.configs.mlp_speculator import MLPSpeculatorConfig
from vllm.transformers_utils.configs.mpt import MPTConfig
@@ -14,5 +15,6 @@ __all__ = [
"MPTConfig",
"RWConfig",
"JAISConfig",
"MedusaConfig",
"MLPSpeculatorConfig",
]