[LoRA] Make LoRA respect language_model_only (#37375)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -161,9 +161,9 @@ class LoRAModelManager:
|
|||||||
device=self.device,
|
device=self.device,
|
||||||
lora_config=self.lora_config,
|
lora_config=self.lora_config,
|
||||||
)
|
)
|
||||||
|
|
||||||
lm_prefix = self.mm_mapping.language_model[0]
|
lm_prefix = self.mm_mapping.language_model[0]
|
||||||
self.punica_wrapper_mapping[lm_prefix] = llm_punica_wrapper
|
self.punica_wrapper_mapping[lm_prefix] = llm_punica_wrapper
|
||||||
|
|
||||||
if self.lora_config.enable_tower_connector_lora:
|
if self.lora_config.enable_tower_connector_lora:
|
||||||
self.supports_tower_connector_lora = self.supports_mm and hasattr(
|
self.supports_tower_connector_lora = self.supports_mm and hasattr(
|
||||||
self.model, "get_num_mm_encoder_tokens"
|
self.model, "get_num_mm_encoder_tokens"
|
||||||
@@ -171,6 +171,18 @@ class LoRAModelManager:
|
|||||||
if not self.supports_tower_connector_lora:
|
if not self.supports_tower_connector_lora:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if (
|
||||||
|
vllm_config.model_config.multimodal_config
|
||||||
|
and vllm_config.model_config.multimodal_config.language_model_only
|
||||||
|
):
|
||||||
|
if self.supports_tower_connector_lora:
|
||||||
|
logger.warning(
|
||||||
|
"Disabling `enable_tower_connector_lora` because the multimodal "
|
||||||
|
"model is configured to initialize the language model only."
|
||||||
|
)
|
||||||
|
self.supports_tower_connector_lora = False
|
||||||
|
return
|
||||||
|
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"LoRA for the tower and connector of multimodal models is "
|
"LoRA for the tower and connector of multimodal models is "
|
||||||
"experimental and may contain bugs. Please report any related issues on "
|
"experimental and may contain bugs. Please report any related issues on "
|
||||||
|
|||||||
@@ -10,11 +10,10 @@ from vllm.distributed import (
|
|||||||
tensor_model_parallel_all_gather,
|
tensor_model_parallel_all_gather,
|
||||||
tensor_model_parallel_all_reduce,
|
tensor_model_parallel_all_reduce,
|
||||||
)
|
)
|
||||||
|
from vllm.lora.ops.triton_ops.utils import supports_pdl
|
||||||
from vllm.triton_utils import tl, triton
|
from vllm.triton_utils import tl, triton
|
||||||
from vllm.utils.torch_utils import direct_register_custom_op
|
from vllm.utils.torch_utils import direct_register_custom_op
|
||||||
|
|
||||||
from .utils import supports_pdl
|
|
||||||
|
|
||||||
|
|
||||||
@triton.jit
|
@triton.jit
|
||||||
def _get_lora_id(
|
def _get_lora_id(
|
||||||
|
|||||||
Reference in New Issue
Block a user