[Speculative Decoding] EAGLE Implementation with Top-1 proposer (#6830)

This commit is contained in:
Abhinav Goyal
2024-08-22 15:12:24 +05:30
committed by GitHub
parent b3856bef7d
commit a3fce56b88
17 changed files with 854 additions and 83 deletions

View File

@@ -1,5 +1,6 @@
from vllm.transformers_utils.configs.chatglm import ChatGLMConfig
from vllm.transformers_utils.configs.dbrx import DbrxConfig
from vllm.transformers_utils.configs.eagle import EAGLEConfig
# RWConfig is for the original tiiuae/falcon-40b(-instruct) and
# tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
# `FalconConfig` class from the official HuggingFace transformers library.
@@ -20,6 +21,7 @@ __all__ = [
"InternVLChatConfig",
"JAISConfig",
"MedusaConfig",
"EAGLEConfig",
"MLPSpeculatorConfig",
"NemotronConfig",
"UltravoxConfig",