Support LoRA and GPTQModel for PLaMo 2/3 (#31322)

Signed-off-by: Shinichi Hemmi <50256998+Alnusjaponica@users.noreply.github.com>
This commit is contained in:
Shinichi Hemmi
2025-12-26 12:41:33 +09:00
committed by GitHub
parent 3b8f31b362
commit 9ee05cbe7f
3 changed files with 31 additions and 17 deletions

View File

@@ -50,8 +50,14 @@ from vllm.model_executor.model_loader.weight_utils import (
default_weight_loader,
sharded_weight_loader,
)
from vllm.model_executor.models.interfaces import HasInnerState, IsHybrid, SupportsPP
from vllm.model_executor.models.interfaces import (
HasInnerState,
IsHybrid,
SupportsLoRA,
SupportsPP,
)
from vllm.model_executor.models.utils import (
AutoWeightsLoader,
is_pp_missing_parameter,
make_empty_intermediate_tensors_factory,
make_layers,
@@ -105,6 +111,7 @@ class Plamo2MambaMixer(MambaBase, CustomOp):
self.cache_config = vllm_config.cache_config
self.model_config = vllm_config.model_config
self.quant_config = vllm_config.quant_config
self.is_lora_enabled = bool(vllm_config.lora_config)
self.hidden_size = self.config.hidden_size
self.ssm_state_size = self.config.mamba_d_state
self.conv_kernel_size = self.config.mamba_d_conv
@@ -202,7 +209,11 @@ class Plamo2MambaMixer(MambaBase, CustomOp):
self.prefix = prefix
def _project_ssm_parameters(self, hidden_states):
ssm_parameters = self.bcdt_proj(hidden_states)
if self.is_lora_enabled:
# Lora kernel requires contiguous tensor.
ssm_parameters = self.bcdt_proj(hidden_states.contiguous())
else:
ssm_parameters = self.bcdt_proj(hidden_states)
B, C, time_step = torch.split(
ssm_parameters,
[self.ssm_state_size, self.ssm_state_size, self.time_step_rank],
@@ -780,13 +791,13 @@ class Plamo2Model(torch.nn.Module):
return hidden_states
class Plamo2ForCausalLM(torch.nn.Module, HasInnerState, SupportsPP, IsHybrid):
class Plamo2ForCausalLM(
torch.nn.Module, HasInnerState, SupportsLoRA, SupportsPP, IsHybrid
):
packed_modules_mapping = {
"qkv_proj": [
"q_proj",
"k_proj",
"v_proj",
],
"qkv_proj": ["qkv_proj"],
"gate_up_proj": ["gate_up_proj"],
"in_proj": ["in_proj"],
}
def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
@@ -892,6 +903,12 @@ class Plamo2ForCausalLM(torch.nn.Module, HasInnerState, SupportsPP, IsHybrid):
if name == "lm_head.weight" and self.config.tie_word_embeddings:
assert "lm_head.weight" not in params_dict
continue
# Same workaround as AutoWeightsLoader for GPTQModel
if any(
substr in name
for substr in AutoWeightsLoader.ROTARY_EMBEDS_UNUSED_WEIGHTS
):
continue
# Update the weight names to be compatible with the vllm version
# of the model.

View File

@@ -35,7 +35,7 @@ from vllm.model_executor.model_loader.weight_utils import (
composed_weight_loader,
default_weight_loader,
)
from vllm.model_executor.models.interfaces import SupportsPP
from vllm.model_executor.models.interfaces import SupportsLoRA, SupportsPP
from vllm.model_executor.models.utils import (
AutoWeightsLoader,
extract_layer_index,
@@ -369,13 +369,10 @@ class Plamo3Model(nn.Module):
return hidden_states
class Plamo3ForCausalLM(nn.Module, SupportsPP):
class Plamo3ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
packed_modules_mapping = {
"qkv_proj": [
"q_proj",
"k_proj",
"v_proj",
],
"qkv_proj": ["qkv_proj"],
"gate_up_proj": ["gate_up_proj"],
}
def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: