[Model] MLPSpeculator speculative decoding support (#4947)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>

Co-authored-by: Thomas Parnell <tpa@zurich.ibm.com>
Co-authored-by: Nick Hill <nickhill@us.ibm.com>
Co-authored-by: Davis Wertheimer <Davis.Wertheimer@ibm.com>
This commit is contained in:
Joshua Rosenkranz
2024-06-20 20:23:12 -04:00
committed by GitHub
parent 6c5b7af152
commit b12518d3cf
18 changed files with 523 additions and 40 deletions

View File

@@ -5,6 +5,7 @@ from vllm.transformers_utils.configs.dbrx import DbrxConfig
# `FalconConfig` class from the official HuggingFace transformers library.
from vllm.transformers_utils.configs.falcon import RWConfig
from vllm.transformers_utils.configs.jais import JAISConfig
from vllm.transformers_utils.configs.mlp_speculator import MLPSpeculatorConfig
from vllm.transformers_utils.configs.mpt import MPTConfig
__all__ = [
@@ -13,4 +14,5 @@ __all__ = [
"MPTConfig",
"RWConfig",
"JAISConfig",
"MLPSpeculatorConfig",
]