Add support for Eagle with separate lm-head and embed_tokens layers (#28549)

Signed-off-by: Eldar Kurtic <8884008+eldarkurtic@users.noreply.github.com>
This commit is contained in:
Eldar Kurtić
2025-11-15 15:12:02 +01:00
committed by GitHub
parent 085a525332
commit e439c784fa
12 changed files with 205 additions and 64 deletions

View File

@@ -17,7 +17,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmb
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.models.llama import LlamaDecoderLayer, LlamaForCausalLM
from .utils import AutoWeightsLoader, maybe_prefix
from .utils import AutoWeightsLoader, maybe_prefix, process_eagle_weight
logger = init_logger(__name__)
@@ -179,6 +179,7 @@ class EagleLlamaForCausalLM(LlamaForCausalLM):
name, loaded_weight = inputs
if "lm_head" not in name:
name = "model." + name
process_eagle_weight(self, name)
return name, loaded_weight
loader = AutoWeightsLoader(