[LoRA] Cleanup LoRA unused code (#29611)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -34,7 +34,6 @@ EMBEDDING_MODULES = {
|
||||
"lm_head": "output_embeddings",
|
||||
}
|
||||
|
||||
EMBEDDING_PADDING_MODULES = ["lm_head"]
|
||||
|
||||
DEVICES = (
|
||||
[f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)]
|
||||
@@ -46,24 +45,22 @@ DEFAULT_DTYPE = torch.get_default_dtype()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("device", DEVICES)
|
||||
def test_from_lora_tensors(sql_lora_files, device):
|
||||
tensors = load_file(os.path.join(sql_lora_files, "adapter_model.safetensors"))
|
||||
def test_from_lora_tensors(qwen3_lora_files, device):
|
||||
tensors = load_file(os.path.join(qwen3_lora_files, "adapter_model.safetensors"))
|
||||
|
||||
peft_helper = PEFTHelper.from_local_dir(
|
||||
sql_lora_files, max_position_embeddings=4096
|
||||
qwen3_lora_files, max_position_embeddings=4096
|
||||
)
|
||||
lora_model = LoRAModel.from_lora_tensors(
|
||||
1,
|
||||
tensors,
|
||||
peft_helper=peft_helper,
|
||||
device=device,
|
||||
embedding_modules=EMBEDDING_MODULES,
|
||||
embedding_padding_modules=EMBEDDING_PADDING_MODULES,
|
||||
)
|
||||
for module_name, lora in lora_model.loras.items():
|
||||
assert lora.module_name == module_name
|
||||
assert lora.rank == 8
|
||||
assert lora.lora_alpha == 16
|
||||
assert lora.lora_alpha == 32
|
||||
assert lora.lora_a is not None
|
||||
assert lora.lora_b is not None
|
||||
assert lora.lora_a.device == torch.device(device)
|
||||
@@ -430,7 +427,7 @@ def test_lru_cache_worker_adapter_manager(dist_init, dummy_model, device, tmp_pa
|
||||
vllm_config.scheduler_config.max_num_seqs = 4
|
||||
vllm_config.scheduler_config.max_num_batched_tokens = 2
|
||||
worker_adapter_manager = LRUCacheWorkerLoRAManager(
|
||||
vllm_config, device, EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES
|
||||
vllm_config, device, EMBEDDING_MODULES
|
||||
)
|
||||
|
||||
worker_adapter_manager.max_num_seqs = 4
|
||||
@@ -533,9 +530,7 @@ def test_worker_adapter_manager(dist_init, dummy_model_gate_up, device, tmp_path
|
||||
vllm_config.scheduler_config.max_num_seqs = 4
|
||||
vllm_config.scheduler_config.max_num_batched_tokens = 2
|
||||
|
||||
worker_adapter_manager = WorkerLoRAManager(
|
||||
vllm_config, device, EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES
|
||||
)
|
||||
worker_adapter_manager = WorkerLoRAManager(vllm_config, device, EMBEDDING_MODULES)
|
||||
worker_adapter_manager.vocab_size = dummy_model_gate_up.unpadded_vocab_size
|
||||
worker_adapter_manager.create_lora_manager(dummy_model_gate_up)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user