From 0900cedb3f89e475bea256c4cf5a13b5f02635bc Mon Sep 17 00:00:00 2001 From: gopalsarda Date: Tue, 20 Jan 2026 19:18:05 -0800 Subject: [PATCH] Enable Eagle3 speculative decoding for Pixtral (LlavaForConditionalGeneration) (#32542) Signed-off-by: gopalsarda --- vllm/model_executor/models/llava.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index ec4367f6d..b4e41e0a5 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -53,6 +53,7 @@ from vllm.utils.tensor_schema import TensorSchema, TensorShape from .clip import CLIPVisionModel from .interfaces import ( MultiModalEmbeddings, + SupportsEagle3, SupportsLoRA, SupportsMultiModal, SupportsPP, @@ -503,7 +504,7 @@ def init_vision_tower_for_llava( dummy_inputs=LlavaDummyInputsBuilder, ) class LlavaForConditionalGeneration( - nn.Module, SupportsLoRA, SupportsMultiModal, SupportsPP + nn.Module, SupportsLoRA, SupportsMultiModal, SupportsPP, SupportsEagle3 ): packed_modules_mapping = { "qkv_proj": ["q_proj", "k_proj", "v_proj"], @@ -527,6 +528,13 @@ class LlavaForConditionalGeneration( raise ValueError("Only image modality is supported") + def set_aux_hidden_state_layers(self, layers: tuple[int, ...]) -> None: + self.get_language_model().model.aux_hidden_state_layers = layers + + def get_eagle3_aux_hidden_state_layers(self) -> tuple[int, ...]: + num_layers = len(self.get_language_model().model.layers) + return (2, num_layers // 2, num_layers - 3) + def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: super().__init__()