From ea37530b474fa738a99a53a8975af4e389b968c7 Mon Sep 17 00:00:00 2001
From: tianshu-Michael-yu
 <101950379+tianshu-Michael-yu@users.noreply.github.com>
Date: Thu, 19 Feb 2026 22:07:23 -0800
Subject: [PATCH] [Models] LFM2: Support LoRA (#34921)

Co-authored-by: Piotr Mazurek <piotr635@gmail.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
---
 vllm/model_executor/models/lfm2.py     | 26 ++++++++++++++++++--------
 vllm/model_executor/models/lfm2_moe.py | 26 ++++++++++++++++++--------
 2 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/vllm/model_executor/models/lfm2.py b/vllm/model_executor/models/lfm2.py
index fa611ad50..453173fc8 100644
--- a/vllm/model_executor/models/lfm2.py
+++ b/vllm/model_executor/models/lfm2.py
@@ -39,6 +39,7 @@ from .interfaces import HasInnerState, IsHybrid, SupportsLoRA, SupportsPP, Suppo
 from .utils import (
     AutoWeightsLoader,
     PPMissingLayer,
+    WeightsMapper,
     extract_layer_index,
     is_pp_missing_parameter,
     make_empty_intermediate_tensors_factory,
@@ -66,12 +67,12 @@ class Lfm2MLP(nn.Module):
                 ff_dim = int(ffn_dim_multiplier * ff_dim)
             ff_dim = multiple_of * ((ff_dim + multiple_of - 1) // multiple_of)
 
-        self.w1 = MergedColumnParallelLinear(
+        self.w13 = MergedColumnParallelLinear(
             input_size=dim,
             output_sizes=[ff_dim] * 2,
             bias=False,
             quant_config=quant_config,
-            prefix=f"{prefix}.w1",
+            prefix=f"{prefix}.w13",
         )
         self.w2 = RowParallelLinear(
             input_size=ff_dim,
@@ -83,7 +84,7 @@ class Lfm2MLP(nn.Module):
         self.act_fn = SiluAndMul()
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        gate_up, _ = self.w1(x)
+        gate_up, _ = self.w13(x)
         x = self.act_fn(gate_up)
         x, _ = self.w2(x)
         return x
@@ -376,8 +377,8 @@ class Lfm2Model(nn.Module):
             (".qkv_proj", ".q_proj", "q"),
             (".qkv_proj", ".k_proj", "k"),
             (".qkv_proj", ".v_proj", "v"),
-            (".w1", ".w1", 0),
-            (".w1", ".w3", 1),
+            (".w13", ".w1", 0),
+            (".w13", ".w3", 1),
         ]
         params_dict = dict(self.named_parameters())
         loaded_params: set[str] = set()
@@ -386,9 +387,11 @@ class Lfm2Model(nn.Module):
                 name = name.replace(".conv.", ".short_conv.", 1)
 
             for param_name, weight_name, shard_id in stacked_params_mapping:
-                if weight_name not in name:
+                # Use segment-boundary matching (trailing dot) to prevent
+                # e.g. ".w1" from matching inside ".w13" in pre-fused keys.
+                if weight_name + "." not in name:
                     continue
-                name = name.replace(weight_name, param_name)
+                name = name.replace(weight_name + ".", param_name + ".")
 
                 if is_pp_missing_parameter(name, self):
                     continue
@@ -415,13 +418,20 @@ class Lfm2ForCausalLM(
             "k_proj",
             "v_proj",
         ],
-        "w1": [
+        "w13": [
             "w1",
             "w3",
         ],
         "in_proj": ["in_proj"],
     }
 
+    # HF uses .conv. but vLLM uses .short_conv. to avoid LoRA regex collision
+    # with the inner .conv.conv child (ShortConv has a child self.conv, so
+    # naming the container .conv too makes _match_target_modules match both)
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_substr={".conv.": ".short_conv."},
+    )
+
     # LoRA specific attributes
     embedding_modules = {
         "embed_tokens": "input_embeddings",
diff --git a/vllm/model_executor/models/lfm2_moe.py b/vllm/model_executor/models/lfm2_moe.py
index 22bd554bd..b7ca710ea 100644
--- a/vllm/model_executor/models/lfm2_moe.py
+++ b/vllm/model_executor/models/lfm2_moe.py
@@ -52,6 +52,7 @@ from .interfaces import (
 from .utils import (
     AutoWeightsLoader,
     PPMissingLayer,
+    WeightsMapper,
     extract_layer_index,
     is_pp_missing_parameter,
     make_empty_intermediate_tensors_factory,
@@ -69,12 +70,12 @@ class Lfm2MoeMlp(nn.Module):
         prefix: str = "",
     ):
         super().__init__()
-        self.w1 = MergedColumnParallelLinear(
+        self.w13 = MergedColumnParallelLinear(
             input_size=dim,
             output_sizes=[ff_dim] * 2,
             bias=False,
             quant_config=quant_config,
-            prefix=f"{prefix}.w1",
+            prefix=f"{prefix}.w13",
         )
         self.w2 = RowParallelLinear(
             input_size=ff_dim,
@@ -86,7 +87,7 @@ class Lfm2MoeMlp(nn.Module):
         self.act_fn = SiluAndMul()
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        gate_up, _ = self.w1(x)
+        gate_up, _ = self.w13(x)
         x = self.act_fn(gate_up)
         x, _ = self.w2(x)
         return x
@@ -501,8 +502,8 @@ class Lfm2MoeModel(nn.Module):
             (".qkv_proj", ".q_proj", "q"),
             (".qkv_proj", ".k_proj", "k"),
             (".qkv_proj", ".v_proj", "v"),
-            (".w1", ".w1", 0),
-            (".w1", ".w3", 1),
+            (".w13", ".w1", 0),
+            (".w13", ".w3", 1),
         ]
         params_dict = dict(self.named_parameters())
         loaded_params: set[str] = set()
@@ -516,12 +517,14 @@ class Lfm2MoeModel(nn.Module):
 
             for param_name, weight_name, shard_id in stacked_params_mapping:
                 # Skip non-stacked layers and experts (experts handled below).
-                if weight_name not in name:
+                # Use segment-boundary matching (trailing dot) to prevent
+                # e.g. ".w1" from matching inside ".w13" in pre-fused keys.
+                if weight_name + "." not in name:
                     continue
 
                 if ("feed_forward.experts." in name) and name not in params_dict:
                     continue
-                name = name.replace(weight_name, param_name)
+                name = name.replace(weight_name + ".", param_name + ".")
                 # Skip loading extra bias for GPTQ models.
                 if (
                     name.endswith(".bias") or name.endswith("_bias")
@@ -596,13 +599,20 @@ class Lfm2MoeForCausalLM(
             "k_proj",
             "v_proj",
         ],
-        "w1": [
+        "w13": [
             "w1",
             "w3",
         ],
         "in_proj": ["in_proj"],
     }
 
+    # HF uses .conv. but vLLM uses .short_conv. to avoid LoRA regex collision
+    # with the inner .conv.conv child (ShortConv has a child self.conv, so
+    # naming the container .conv too makes _match_target_modules match both)
+    hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_substr={".conv.": ".short_conv."},
+    )
+
     # LoRA specific attributes
     embedding_modules = {
         "embed_tokens": "input_embeddings",