[Model] Improve olmo and olmo2 (#23228)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
2025-08-20 20:47:05 +08:00
parent 7cd17e22d7
commit c6d80a7a96
3 changed files with 36 additions and 7 deletions
--- a/vllm/model_executor/models/olmo2.py
+++ b/vllm/model_executor/models/olmo2.py
@@ -33,6 +33,7 @@ from torch import nn
 from transformers import Olmo2Config

 from vllm.attention import Attention
+from vllm.compilation.decorators import support_torch_compile
 from vllm.config import VllmConfig
 from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size
 from vllm.distributed.communication_op import tensor_model_parallel_all_gather
@@ -48,7 +49,7 @@ from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.vocab_parallel_embedding import (
    ParallelLMHead, VocabParallelEmbedding)
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
-from vllm.model_executor.models.interfaces import SupportsPP
+from vllm.model_executor.models.interfaces import SupportsLoRA, SupportsPP
 from vllm.model_executor.models.utils import (
    AutoWeightsLoader, is_pp_missing_parameter,
    make_empty_intermediate_tensors_factory, make_layers, maybe_prefix)
@@ -253,6 +254,7 @@ class Olmo2DecoderLayer(nn.Module):
        return hidden_states


+@support_torch_compile
 class Olmo2Model(nn.Module):

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
@@ -354,10 +356,21 @@ class Olmo2Model(nn.Module):
        return loaded_params


-class Olmo2ForCausalLM(nn.Module, SupportsPP):
+class Olmo2ForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
    """
    Extremely barebones HF model wrapper.
    """
+    packed_modules_mapping = {
+        "qkv_proj": [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+        ],
+        "gate_up_proj": [
+            "gate_proj",
+            "up_proj",
+        ],
+    }

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super().__init__()