Add support for Eagle with separate lm-head and embed_tokens layers (#28549)

Signed-off-by: Eldar Kurtic <8884008+eldarkurtic@users.noreply.github.com>
2025-11-15 15:12:02 +01:00
parent 085a525332
commit e439c784fa
12 changed files with 205 additions and 64 deletions
--- a/vllm/model_executor/models/llama_eagle3.py
+++ b/vllm/model_executor/models/llama_eagle3.py
@@ -23,7 +23,7 @@ from vllm.model_executor.models.llama import LlamaDecoderLayer, LlamaForCausalLM
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.inputs import NestedTensors

-from .utils import AutoWeightsLoader, maybe_prefix
+from .utils import AutoWeightsLoader, maybe_prefix, process_eagle_weight

 logger = init_logger(__name__)

@@ -324,6 +324,7 @@ class Eagle3LlamaForCausalLM(LlamaForCausalLM):
            if "embed_tokens" in name:
                includes_embed_tokens = True
            model_weights[name] = loaded_weight
+            process_eagle_weight(self, name)

        skip_substrs = []
        if not includes_draft_id_mapping: