[LoRA] Cleanup LoRA unused code (#29611)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
Jee Jee Li
2025-11-29 14:52:58 +08:00
committed by GitHub
parent 4a80ad0a25
commit 39e63dec7c
46 changed files with 126 additions and 173 deletions

View File

@@ -34,7 +34,6 @@ EMBEDDING_MODULES = {
"lm_head": "output_embeddings",
}
EMBEDDING_PADDING_MODULES = ["lm_head"]
DEVICES = (
[f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)]
@@ -46,24 +45,22 @@ DEFAULT_DTYPE = torch.get_default_dtype()
@pytest.mark.parametrize("device", DEVICES)
def test_from_lora_tensors(sql_lora_files, device):
tensors = load_file(os.path.join(sql_lora_files, "adapter_model.safetensors"))
def test_from_lora_tensors(qwen3_lora_files, device):
tensors = load_file(os.path.join(qwen3_lora_files, "adapter_model.safetensors"))
peft_helper = PEFTHelper.from_local_dir(
sql_lora_files, max_position_embeddings=4096
qwen3_lora_files, max_position_embeddings=4096
)
lora_model = LoRAModel.from_lora_tensors(
1,
tensors,
peft_helper=peft_helper,
device=device,
embedding_modules=EMBEDDING_MODULES,
embedding_padding_modules=EMBEDDING_PADDING_MODULES,
)
for module_name, lora in lora_model.loras.items():
assert lora.module_name == module_name
assert lora.rank == 8
assert lora.lora_alpha == 16
assert lora.lora_alpha == 32
assert lora.lora_a is not None
assert lora.lora_b is not None
assert lora.lora_a.device == torch.device(device)
@@ -430,7 +427,7 @@ def test_lru_cache_worker_adapter_manager(dist_init, dummy_model, device, tmp_pa
vllm_config.scheduler_config.max_num_seqs = 4
vllm_config.scheduler_config.max_num_batched_tokens = 2
worker_adapter_manager = LRUCacheWorkerLoRAManager(
vllm_config, device, EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES
vllm_config, device, EMBEDDING_MODULES
)
worker_adapter_manager.max_num_seqs = 4
@@ -533,9 +530,7 @@ def test_worker_adapter_manager(dist_init, dummy_model_gate_up, device, tmp_path
vllm_config.scheduler_config.max_num_seqs = 4
vllm_config.scheduler_config.max_num_batched_tokens = 2
worker_adapter_manager = WorkerLoRAManager(
vllm_config, device, EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES
)
worker_adapter_manager = WorkerLoRAManager(vllm_config, device, EMBEDDING_MODULES)
worker_adapter_manager.vocab_size = dummy_model_gate_up.unpadded_vocab_size
worker_adapter_manager.create_lora_manager(dummy_model_gate_up)