[V1][Spec Decode] EAGLE-3 Support (#16937)
Signed-off-by: Bryan Lu <yuzhelu@amazon.com> Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai> Co-authored-by: Bryan Lu <yuzhelu@amazon.com>
This commit is contained in:
committed by
GitHub
parent
70116459c3
commit
a0e619e62a
@@ -2339,9 +2339,10 @@ class SpeculativeConfig:
|
||||
)
|
||||
|
||||
# Automatically detect the method
|
||||
if self.method == 'eagle':
|
||||
if self.method in ('eagle', 'eagle3'):
|
||||
pass
|
||||
elif "eagle-" in self.draft_model_config.model.lower():
|
||||
elif "eagle-" in self.draft_model_config.model.lower() or \
|
||||
"eagle3-" in self.draft_model_config.model.lower():
|
||||
self.method = "eagle"
|
||||
elif self.draft_model_config.hf_config.model_type == "medusa":
|
||||
self.method = "medusa"
|
||||
@@ -2352,7 +2353,7 @@ class SpeculativeConfig:
|
||||
self.method = "draft_model"
|
||||
|
||||
# Replace hf_config for EAGLE draft_model
|
||||
if self.method == "eagle":
|
||||
if self.method in ("eagle", "eagle3"):
|
||||
if self.enable_chunked_prefill and not envs.VLLM_USE_V1:
|
||||
raise ValueError(
|
||||
"Chunked prefill and EAGLE are not compatible "
|
||||
@@ -2549,6 +2550,12 @@ class SpeculativeConfig:
|
||||
"speculative decoding is > 1, but got "
|
||||
f"{self.disable_by_batch_size=}")
|
||||
|
||||
if self.method == "eagle3" and self.target_model_config and \
|
||||
"llama" not in self.target_model_config.hf_text_config.model_type:
|
||||
raise ValueError(
|
||||
"Eagle3 is only supported for Llama models. "
|
||||
f"Got {self.target_model_config.hf_text_config.model_type=}")
|
||||
|
||||
@property
|
||||
def num_lookahead_slots(self) -> int:
|
||||
"""The number of additional slots the scheduler should allocate per
|
||||
|
||||
Reference in New Issue
Block a user