[V1][Spec Decode] Implement Eagle Proposer [1/N] (#15729)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon
2025-04-01 12:33:16 -07:00
committed by GitHub
parent a79cc68b3a
commit e75a6301bd
6 changed files with 378 additions and 21 deletions

View File

@@ -1468,15 +1468,21 @@ class EngineArgs:
# Only Ngram speculative decoding so far.
is_ngram_enabled = False
is_eagle_enabled = False
if self.speculative_config is not None:
# This is supported but experimental (handled below).
if (("method" in self.speculative_config
and self.speculative_config["method"] in ("ngram", "[ngram]"))
or
("model" in self.speculative_config and
self.speculative_config["model"] in ("ngram", "[ngram]"))):
is_ngram_enabled = True
speculative_method = self.speculative_config.get("method")
if speculative_method:
if speculative_method in ("ngram", "[ngram]"):
is_ngram_enabled = True
elif speculative_method == "eagle":
is_eagle_enabled = True
else:
speculative_model = self.speculative_config.get("model")
if speculative_model in ("ngram", "[ngram]"):
is_ngram_enabled = True
if not (is_ngram_enabled or is_eagle_enabled):
# Other speculative decoding methods are not supported yet.
_raise_or_fallback(feature_name="Speculative Decoding",
recommend_to_remove=False)
return False
@@ -1523,6 +1529,10 @@ class EngineArgs:
if is_ngram_enabled and _warn_or_fallback("ngram"):
return False
# Eagle is under development, so we don't support it yet.
if is_eagle_enabled and _warn_or_fallback("Eagle"):
return False
# Non-CUDA is supported on V1, but off by default for now.
not_cuda = not current_platform.is_cuda()
if not_cuda and _warn_or_fallback( # noqa: SIM103