[V1][Spec Decode] EAGLE-3 Support (#16937)

Signed-off-by: Bryan Lu <yuzhelu@amazon.com>
Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai>
Co-authored-by: Bryan Lu <yuzhelu@amazon.com>
This commit is contained in:
Benjamin Chislett
2025-04-25 18:43:07 -04:00
committed by GitHub
parent 70116459c3
commit a0e619e62a
12 changed files with 358 additions and 34 deletions

View File

@@ -2339,9 +2339,10 @@ class SpeculativeConfig:
)
# Automatically detect the method
if self.method == 'eagle':
if self.method in ('eagle', 'eagle3'):
pass
elif "eagle-" in self.draft_model_config.model.lower():
elif "eagle-" in self.draft_model_config.model.lower() or \
"eagle3-" in self.draft_model_config.model.lower():
self.method = "eagle"
elif self.draft_model_config.hf_config.model_type == "medusa":
self.method = "medusa"
@@ -2352,7 +2353,7 @@ class SpeculativeConfig:
self.method = "draft_model"
# Replace hf_config for EAGLE draft_model
if self.method == "eagle":
if self.method in ("eagle", "eagle3"):
if self.enable_chunked_prefill and not envs.VLLM_USE_V1:
raise ValueError(
"Chunked prefill and EAGLE are not compatible "
@@ -2549,6 +2550,12 @@ class SpeculativeConfig:
"speculative decoding is > 1, but got "
f"{self.disable_by_batch_size=}")
if self.method == "eagle3" and self.target_model_config and \
"llama" not in self.target_model_config.hf_text_config.model_type:
raise ValueError(
"Eagle3 is only supported for Llama models. "
f"Got {self.target_model_config.hf_text_config.model_type=}")
@property
def num_lookahead_slots(self) -> int:
"""The number of additional slots the scheduler should allocate per