[Speculative Decoding] EAGLE Implementation with Top-1 proposer (#6830)

This commit is contained in:
Abhinav Goyal
2024-08-22 15:12:24 +05:30
committed by GitHub
parent b3856bef7d
commit a3fce56b88
17 changed files with 854 additions and 83 deletions

View File

@@ -1,5 +1,6 @@
from vllm.transformers_utils.configs.chatglm import ChatGLMConfig
from vllm.transformers_utils.configs.dbrx import DbrxConfig
from vllm.transformers_utils.configs.eagle import EAGLEConfig
# RWConfig is for the original tiiuae/falcon-40b(-instruct) and
# tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
# `FalconConfig` class from the official HuggingFace transformers library.
@@ -20,6 +21,7 @@ __all__ = [
"InternVLChatConfig",
"JAISConfig",
"MedusaConfig",
"EAGLEConfig",
"MLPSpeculatorConfig",
"NemotronConfig",
"UltravoxConfig",

View File

@@ -0,0 +1,49 @@
import os
from typing import Optional, Union
from transformers import AutoConfig, PretrainedConfig
class EAGLEConfig(PretrainedConfig):
model_type = "eagle"
def __init__(self,
model: Union[PretrainedConfig, dict, None] = None,
truncated_vocab_size: Optional[int] = None,
**kwargs):
model_config = None if model is None else (AutoConfig.for_model(
**model) if isinstance(model, dict) else model)
for k, v in kwargs.items():
if k != "architectures" and k != "model_type" and hasattr(
model_config, k):
setattr(model_config, k, v)
self.model = model_config
if self.model is None:
self.truncated_vocab_size = None
else:
self.truncated_vocab_size = self.model.vocab_size if \
truncated_vocab_size is None else truncated_vocab_size
if "architectures" not in kwargs:
kwargs["architectures"] = ["EAGLEModel"]
super().__init__(**kwargs)
if self.model is not None:
for k, v in self.model.to_dict().items():
if not hasattr(self, k):
setattr(self, k, v)
@classmethod
def from_pretrained(
cls,
pretrained_model_name_or_path: Union[str, os.PathLike],
**kwargs,
) -> "EAGLEConfig":
config_dict, kwargs = cls.get_config_dict(
pretrained_model_name_or_path, **kwargs)
return cls.from_dict(config_dict, **kwargs)