diff --git a/vllm/lora/lora_model.py b/vllm/lora/lora_model.py index f5e36697e..bc88c71ea 100644 --- a/vllm/lora/lora_model.py +++ b/vllm/lora/lora_model.py @@ -12,7 +12,6 @@ from vllm.lora.peft_helper import PEFTHelper from vllm.lora.utils import ( get_lora_id, is_base_embeddding_weights, - is_regex_target_modules, parse_fine_tuned_lora_name, ) from vllm.model_executor.model_loader.tensorizer import TensorizerConfig @@ -201,37 +200,13 @@ class LoRAModel: for module in f.keys(): # noqa tensors[module] = f.get_tensor(module) elif os.path.isfile(lora_bin_file_path) or os.path.isfile(lora_pt_file_path): - # When a bin/pt file is provided, we rely on config to find - # unexpected modules. - unexpected_modules = [] - target_modules = peft_helper.target_modules - if not isinstance(target_modules, list): - target_modules = [target_modules] - for module in target_modules: - # Compatible with more modules, - # such as:layers.11.self_attn.k_proj - part_name = module.split(".")[-1] - if part_name not in expected_lora_modules: - unexpected_modules.append(module) - # loaded lora's target modules must be a subset of - # expected_lora_modules. It is not reliable. See - # https://github.com/vllm-project/vllm/pull/5909. But there's no - # other better mechanism. - if unexpected_modules and not is_regex_target_modules( - peft_helper.target_modules, expected_lora_modules - ): - raise ValueError( - f"While loading {lora_dir}, expected" - f" target modules in {expected_lora_modules}" - f" but received {unexpected_modules}." - f" Please verify that the loaded LoRA module is correct" - ) lora_file_path = ( lora_bin_file_path if os.path.isfile(lora_bin_file_path) else lora_pt_file_path ) tensors = torch.load(lora_file_path, map_location=device, weights_only=True) + check_unexpected_modules(tensors) else: raise ValueError(f"{lora_dir} doesn't contain tensors") diff --git a/vllm/lora/utils.py b/vllm/lora/utils.py index 4d264c068..75aeccd00 100644 --- a/vllm/lora/utils.py +++ b/vllm/lora/utils.py @@ -5,7 +5,6 @@ import os from typing import TYPE_CHECKING, Optional import huggingface_hub -import regex as re from huggingface_hub.utils import ( EntryNotFoundError, HfHubHTTPError, @@ -186,39 +185,6 @@ def is_base_embeddding_weights(name: str) -> bool: return name.endswith(embedding_suffixes) -def is_regex_target_modules( - load_modules: str | list[str], expected_lora_modules: set[str] -) -> bool: - """ - PEFT supports passing `target_modules` in the form of regular expressions, - such as `model.*(q_proj|k_proj|v_proj)$`. This function is mainly used to - determine whether the suffix in the regular expression is present in the - `expected_lora_modules`. - """ - - def is_valid_regex(pattern): - try: - re.compile(pattern) - return True - except re.error: - return False - - def is_subset(sub_list, full_set): - return set(sub_list).issubset(full_set) - - # Similar to PEFT's processing logic, regex-related operations are only - # executed when the load_modules is a `str`. - if not isinstance(load_modules, str): - return False - - if is_valid_regex(load_modules): - match = re.search(r"\((.*?)\)\$?$", load_modules) - if match: - suffix = match.group(1).split("|") - return is_subset(suffix, expected_lora_modules) - return False - - def get_supported_lora_modules(model: nn.Module) -> list[str]: """ In vLLM, all linear layers support LoRA.