From 8c31f47c638b87425efc1f3afebf2026336fd061 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Wed, 18 Mar 2026 15:53:34 +0800 Subject: [PATCH] [LoRA] Make LoRA respect `language_model_only` (#37375) Signed-off-by: Jee Jee Li --- vllm/lora/model_manager.py | 14 +++++++++++++- vllm/lora/ops/triton_ops/fused_moe_lora_fp8_op.py | 3 +-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/vllm/lora/model_manager.py b/vllm/lora/model_manager.py index 12d6f719a..a84c399c3 100644 --- a/vllm/lora/model_manager.py +++ b/vllm/lora/model_manager.py @@ -161,9 +161,9 @@ class LoRAModelManager: device=self.device, lora_config=self.lora_config, ) + lm_prefix = self.mm_mapping.language_model[0] self.punica_wrapper_mapping[lm_prefix] = llm_punica_wrapper - if self.lora_config.enable_tower_connector_lora: self.supports_tower_connector_lora = self.supports_mm and hasattr( self.model, "get_num_mm_encoder_tokens" @@ -171,6 +171,18 @@ class LoRAModelManager: if not self.supports_tower_connector_lora: return + if ( + vllm_config.model_config.multimodal_config + and vllm_config.model_config.multimodal_config.language_model_only + ): + if self.supports_tower_connector_lora: + logger.warning( + "Disabling `enable_tower_connector_lora` because the multimodal " + "model is configured to initialize the language model only." + ) + self.supports_tower_connector_lora = False + return + logger.warning( "LoRA for the tower and connector of multimodal models is " "experimental and may contain bugs. Please report any related issues on " diff --git a/vllm/lora/ops/triton_ops/fused_moe_lora_fp8_op.py b/vllm/lora/ops/triton_ops/fused_moe_lora_fp8_op.py index 015d43416..deb34cfe4 100644 --- a/vllm/lora/ops/triton_ops/fused_moe_lora_fp8_op.py +++ b/vllm/lora/ops/triton_ops/fused_moe_lora_fp8_op.py @@ -10,11 +10,10 @@ from vllm.distributed import ( tensor_model_parallel_all_gather, tensor_model_parallel_all_reduce, ) +from vllm.lora.ops.triton_ops.utils import supports_pdl from vllm.triton_utils import tl, triton from vllm.utils.torch_utils import direct_register_custom_op -from .utils import supports_pdl - @triton.jit def _get_lora_id(