Support LoRA and GPTQModel for PLaMo 2/3 (#31322)

Signed-off-by: Shinichi Hemmi <50256998+Alnusjaponica@users.noreply.github.com>
2025-12-26 12:41:33 +09:00
parent 3b8f31b362
commit 9ee05cbe7f
3 changed files with 31 additions and 17 deletions
--- a/vllm/model_executor/models/plamo3.py
+++ b/vllm/model_executor/models/plamo3.py
@@ -35,7 +35,7 @@ from vllm.model_executor.model_loader.weight_utils import (
    composed_weight_loader,
    default_weight_loader,
 )
-from vllm.model_executor.models.interfaces import SupportsPP
+from vllm.model_executor.models.interfaces import SupportsLoRA, SupportsPP
 from vllm.model_executor.models.utils import (
    AutoWeightsLoader,
    extract_layer_index,
@@ -369,13 +369,10 @@ class Plamo3Model(nn.Module):
        return hidden_states


-class Plamo3ForCausalLM(nn.Module, SupportsPP):
+class Plamo3ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
    packed_modules_mapping = {
-        "qkv_proj": [
-            "q_proj",
-            "k_proj",
-            "v_proj",
-        ],
+        "qkv_proj": ["qkv_proj"],
+        "gate_up_proj": ["gate_up_proj"],
    }

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: