[CORE] Adding support for insertion of soft-tuned prompts (#4645)
Co-authored-by: Swapnil Parekh <swapnilp@ibm.com> Co-authored-by: Joe G <joseph.granados@h2o.ai> Co-authored-by: Antoni Baum <antoni.baum@protonmail.com>
This commit is contained in:
@@ -127,37 +127,37 @@ def test_lora_model_manager(dist_init, dummy_model):
|
||||
model, 2, 2, 2,
|
||||
LoRAConfig(max_lora_rank=8, max_cpu_loras=3, max_loras=2))
|
||||
assert all(x is None for x in manager.lora_index_to_id)
|
||||
assert manager.add_lora(model_lora1)
|
||||
assert manager.activate_lora(1)
|
||||
assert manager.add_adapter(model_lora1)
|
||||
assert manager.activate_adapter(1)
|
||||
assert manager.lora_index_to_id[0] == 1
|
||||
assert not manager.add_lora(model_lora1)
|
||||
assert not manager.activate_lora(1)
|
||||
assert manager.add_lora(model_lora2)
|
||||
assert manager.activate_lora(2)
|
||||
assert not manager.add_adapter(model_lora1)
|
||||
assert not manager.activate_adapter(1)
|
||||
assert manager.add_adapter(model_lora2)
|
||||
assert manager.activate_adapter(2)
|
||||
assert manager.lora_index_to_id[0] == 1
|
||||
assert manager.lora_index_to_id[1] == 2
|
||||
assert not manager.add_lora(model_lora2)
|
||||
assert not manager.activate_lora(2)
|
||||
assert manager.add_lora(model_lora3)
|
||||
assert not manager.add_adapter(model_lora2)
|
||||
assert not manager.activate_adapter(2)
|
||||
assert manager.add_adapter(model_lora3)
|
||||
assert manager.lora_index_to_id[0] == 1
|
||||
assert manager.lora_index_to_id[1] == 2
|
||||
with pytest.raises(ValueError):
|
||||
assert manager.activate_lora(3)
|
||||
assert manager.activate_adapter(3)
|
||||
assert manager.lora_index_to_id[0] == 1
|
||||
assert manager.lora_index_to_id[1] == 2
|
||||
assert manager.remove_lora(model_lora2.id)
|
||||
assert manager.remove_adapter(model_lora2.id)
|
||||
assert manager.lora_index_to_id[1] is None
|
||||
assert not manager.remove_lora(model_lora2.id)
|
||||
assert manager.remove_lora(model_lora1.id)
|
||||
assert not manager.remove_lora(model_lora1.id)
|
||||
assert manager.add_lora(model_lora1)
|
||||
assert not manager.remove_adapter(model_lora2.id)
|
||||
assert manager.remove_adapter(model_lora1.id)
|
||||
assert not manager.remove_adapter(model_lora1.id)
|
||||
assert manager.add_adapter(model_lora1)
|
||||
assert manager.lora_index_to_id[0] is None
|
||||
assert manager.lora_index_to_id[1] is None
|
||||
assert manager.add_lora(model_lora2)
|
||||
assert manager.activate_lora(3)
|
||||
assert manager.add_adapter(model_lora2)
|
||||
assert manager.activate_adapter(3)
|
||||
assert manager.lora_index_to_id[0] == 3
|
||||
assert manager.lora_index_to_id[1] is None
|
||||
assert manager.activate_lora(2)
|
||||
assert manager.activate_adapter(2)
|
||||
assert manager.lora_index_to_id[0] == 3
|
||||
assert manager.lora_index_to_id[1] == 2
|
||||
|
||||
@@ -173,70 +173,70 @@ def test_lora_lru_cache_model_manager(dist_init, dummy_model):
|
||||
model, 2, 2, 2,
|
||||
LoRAConfig(max_lora_rank=8, max_cpu_loras=3, max_loras=2))
|
||||
assert all(x is None for x in manager.lora_index_to_id)
|
||||
assert manager.add_lora(model_lora1)
|
||||
assert manager.activate_lora(1)
|
||||
assert manager.add_adapter(model_lora1)
|
||||
assert manager.activate_adapter(1)
|
||||
assert manager.lora_index_to_id[0] == 1
|
||||
assert not manager.add_lora(model_lora1)
|
||||
assert not manager.activate_lora(1)
|
||||
assert manager.add_lora(model_lora2)
|
||||
assert manager.activate_lora(2)
|
||||
assert not manager.add_adapter(model_lora1)
|
||||
assert not manager.activate_adapter(1)
|
||||
assert manager.add_adapter(model_lora2)
|
||||
assert manager.activate_adapter(2)
|
||||
assert manager.lora_index_to_id[0] == 1
|
||||
assert manager.lora_index_to_id[1] == 2
|
||||
assert not manager.add_lora(model_lora2)
|
||||
assert not manager.activate_lora(2)
|
||||
assert manager.add_lora(model_lora3)
|
||||
assert not manager.add_adapter(model_lora2)
|
||||
assert not manager.activate_adapter(2)
|
||||
assert manager.add_adapter(model_lora3)
|
||||
assert manager.lora_index_to_id[0] == 1
|
||||
assert manager.lora_index_to_id[1] == 2
|
||||
assert manager.activate_lora(3)
|
||||
assert manager.activate_adapter(3)
|
||||
assert manager.lora_index_to_id[0] == 3
|
||||
assert manager.lora_index_to_id[1] == 2
|
||||
assert manager.remove_lora(model_lora2.id)
|
||||
assert manager.remove_adapter(model_lora2.id)
|
||||
assert manager.lora_index_to_id[1] is None
|
||||
assert not manager.remove_lora(model_lora2.id)
|
||||
assert manager.remove_lora(model_lora1.id)
|
||||
assert not manager.remove_lora(model_lora1.id)
|
||||
assert manager.add_lora(model_lora1)
|
||||
assert manager.activate_lora(1)
|
||||
assert not manager.remove_adapter(model_lora2.id)
|
||||
assert manager.remove_adapter(model_lora1.id)
|
||||
assert not manager.remove_adapter(model_lora1.id)
|
||||
assert manager.add_adapter(model_lora1)
|
||||
assert manager.activate_adapter(1)
|
||||
assert manager.lora_index_to_id[0] == 3
|
||||
assert manager.lora_index_to_id[1] == 1
|
||||
assert manager.add_lora(model_lora2)
|
||||
assert manager.deactivate_lora(3)
|
||||
assert manager.add_adapter(model_lora2)
|
||||
assert manager.deactivate_adapter(3)
|
||||
assert manager.lora_index_to_id[0] is None
|
||||
assert manager.lora_index_to_id[1] == 1
|
||||
assert manager.activate_lora(2)
|
||||
assert manager.activate_adapter(2)
|
||||
assert manager.lora_index_to_id[0] == 2
|
||||
assert manager.lora_index_to_id[1] == 1
|
||||
assert manager.activate_lora(3)
|
||||
assert manager.activate_adapter(3)
|
||||
assert manager.lora_index_to_id[0] == 2
|
||||
assert manager.lora_index_to_id[1] == 3
|
||||
assert manager.pin_lora(2)
|
||||
assert manager.pin_adapter(2)
|
||||
assert manager.lora_index_to_id[0] == 2
|
||||
assert manager.lora_index_to_id[1] == 3
|
||||
assert manager.activate_lora(1)
|
||||
assert manager.activate_adapter(1)
|
||||
assert manager.lora_index_to_id[0] == 2
|
||||
assert manager.lora_index_to_id[1] == 1
|
||||
assert manager.deactivate_lora(2)
|
||||
assert manager.deactivate_adapter(2)
|
||||
assert manager.lora_index_to_id[0] is None
|
||||
assert manager.lora_index_to_id[1] == 1
|
||||
assert manager.activate_lora(3)
|
||||
assert manager.activate_adapter(3)
|
||||
assert manager.lora_index_to_id[0] == 3
|
||||
assert manager.lora_index_to_id[1] == 1
|
||||
assert manager.pin_lora(3)
|
||||
assert manager.pin_lora(1)
|
||||
assert manager.pin_adapter(3)
|
||||
assert manager.pin_adapter(1)
|
||||
with pytest.raises(RuntimeError):
|
||||
assert manager.pin_lora(2)
|
||||
assert manager.pin_adapter(2)
|
||||
assert manager.lora_index_to_id[0] == 3
|
||||
assert manager.lora_index_to_id[1] == 1
|
||||
with pytest.raises(RuntimeError):
|
||||
assert manager.activate_lora(2)
|
||||
assert manager.activate_adapter(2)
|
||||
|
||||
assert manager.deactivate_lora(3)
|
||||
assert manager.pin_lora(2)
|
||||
assert manager.deactivate_adapter(3)
|
||||
assert manager.pin_adapter(2)
|
||||
assert manager.lora_index_to_id[0] == 2
|
||||
assert manager.lora_index_to_id[1] == 1
|
||||
assert manager.remove_lora(3)
|
||||
assert manager.remove_adapter(3)
|
||||
with pytest.raises(ValueError):
|
||||
assert manager.pin_lora(3)
|
||||
assert manager.pin_adapter(3)
|
||||
|
||||
|
||||
def test_lru_lora_model_manager(dist_init, dummy_model):
|
||||
@@ -256,168 +256,169 @@ def test_lru_lora_model_manager(dist_init, dummy_model):
|
||||
assert all(x is None for x in manager.lora_index_to_id)
|
||||
|
||||
# Add up to capacity
|
||||
assert manager.add_lora(model_lora1)
|
||||
assert manager.add_lora(model_lora2)
|
||||
assert manager.activate_lora(1)
|
||||
assert manager.activate_lora(2)
|
||||
assert manager.add_adapter(model_lora1)
|
||||
assert manager.add_adapter(model_lora2)
|
||||
assert manager.activate_adapter(1)
|
||||
assert manager.activate_adapter(2)
|
||||
|
||||
assert set(manager.list_loras()) == {1, 2}
|
||||
assert set(manager.list_adapters()) == {1, 2}
|
||||
assert manager.lora_index_to_id[0] == 1
|
||||
assert manager.lora_index_to_id[1] == 2
|
||||
|
||||
# Add over capacity
|
||||
assert manager.add_lora(model_lora3)
|
||||
assert manager.add_lora(model_lora4)
|
||||
assert manager.activate_lora(3)
|
||||
assert manager.activate_lora(4)
|
||||
assert manager.add_adapter(model_lora3)
|
||||
assert manager.add_adapter(model_lora4)
|
||||
assert manager.activate_adapter(3)
|
||||
assert manager.activate_adapter(4)
|
||||
|
||||
assert set(manager.list_loras()) == {3, 4}
|
||||
assert set(manager.list_adapters()) == {3, 4}
|
||||
assert manager.lora_index_to_id[0] == 3
|
||||
assert manager.lora_index_to_id[1] == 4
|
||||
|
||||
# Add 3 again to move it to the top and then add 2
|
||||
# should return false since it's in already
|
||||
assert not manager.add_lora(model_lora3)
|
||||
assert not manager.activate_lora(3)
|
||||
assert manager.add_lora(model_lora2)
|
||||
assert manager.activate_lora(2)
|
||||
assert not manager.add_adapter(model_lora3)
|
||||
assert not manager.activate_adapter(3)
|
||||
assert manager.add_adapter(model_lora2)
|
||||
assert manager.activate_adapter(2)
|
||||
|
||||
assert set(manager.list_loras()) == {3, 2}
|
||||
assert set(manager.list_adapters()) == {3, 2}
|
||||
assert manager.lora_index_to_id[0] == 3
|
||||
assert manager.lora_index_to_id[1] == 2
|
||||
|
||||
# Remove manually
|
||||
assert manager.remove_lora(3)
|
||||
assert not manager.remove_lora(3)
|
||||
assert manager.remove_adapter(3)
|
||||
assert not manager.remove_adapter(3)
|
||||
|
||||
assert set(manager.list_loras()) == {2}
|
||||
assert set(manager.list_adapters()) == {2}
|
||||
assert manager.lora_index_to_id[0] is None
|
||||
assert manager.lora_index_to_id[1] == 2
|
||||
|
||||
assert manager.add_lora(model_lora3)
|
||||
assert manager.activate_lora(3)
|
||||
assert manager.add_lora(model_lora4)
|
||||
assert manager.activate_lora(4)
|
||||
assert manager.add_adapter(model_lora3)
|
||||
assert manager.activate_adapter(3)
|
||||
assert manager.add_adapter(model_lora4)
|
||||
assert manager.activate_adapter(4)
|
||||
|
||||
assert set(manager.list_loras()) == {3, 4}
|
||||
assert set(manager.list_adapters()) == {3, 4}
|
||||
assert manager.lora_index_to_id[0] == 3
|
||||
assert manager.lora_index_to_id[1] == 4
|
||||
|
||||
assert manager.remove_oldest_lora()
|
||||
assert set(manager.list_loras()) == {4}
|
||||
assert manager.remove_oldest_adapter()
|
||||
assert set(manager.list_adapters()) == {4}
|
||||
assert manager.lora_index_to_id[0] is None
|
||||
assert manager.lora_index_to_id[1] == 4
|
||||
|
||||
assert manager.remove_oldest_lora()
|
||||
assert set(manager.list_loras()) == set()
|
||||
assert manager.remove_oldest_adapter()
|
||||
assert set(manager.list_adapters()) == set()
|
||||
assert all(x is None for x in manager.lora_index_to_id)
|
||||
|
||||
assert not manager.remove_oldest_lora()
|
||||
assert set(manager.list_loras()) == set()
|
||||
assert not manager.remove_oldest_adapter()
|
||||
assert set(manager.list_adapters()) == set()
|
||||
assert all(x is None for x in manager.lora_index_to_id)
|
||||
|
||||
# pinning
|
||||
assert manager.add_lora(model_lora3)
|
||||
assert manager.activate_lora(3)
|
||||
assert manager.add_lora(model_lora4)
|
||||
assert manager.activate_lora(4)
|
||||
assert set(manager.list_loras()) == {3, 4}
|
||||
assert manager.add_adapter(model_lora3)
|
||||
assert manager.activate_adapter(3)
|
||||
assert manager.add_adapter(model_lora4)
|
||||
assert manager.activate_adapter(4)
|
||||
assert set(manager.list_adapters()) == {3, 4}
|
||||
with pytest.raises(ValueError):
|
||||
assert manager.pin_lora(1)
|
||||
assert manager.pin_lora(3)
|
||||
assert manager.pin_adapter(1)
|
||||
assert manager.pin_adapter(3)
|
||||
# Remove manually
|
||||
assert manager.remove_lora(3)
|
||||
assert not manager.remove_lora(3)
|
||||
assert manager.remove_adapter(3)
|
||||
assert not manager.remove_adapter(3)
|
||||
|
||||
assert set(manager.list_loras()) == {4}
|
||||
assert set(manager.list_adapters()) == {4}
|
||||
assert manager.lora_index_to_id[0] is None
|
||||
assert manager.lora_index_to_id[1] == 4
|
||||
|
||||
assert manager.add_lora(model_lora1)
|
||||
assert manager.pin_lora(1)
|
||||
assert manager.add_lora(model_lora2)
|
||||
assert manager.activate_lora(2)
|
||||
assert manager.add_adapter(model_lora1)
|
||||
assert manager.pin_adapter(1)
|
||||
assert manager.add_adapter(model_lora2)
|
||||
assert manager.activate_adapter(2)
|
||||
|
||||
assert set(manager.list_loras()) == {1, 2}
|
||||
assert set(manager.list_adapters()) == {1, 2}
|
||||
assert manager.lora_index_to_id[0] == 1
|
||||
assert manager.lora_index_to_id[1] == 2
|
||||
|
||||
assert manager.remove_oldest_lora()
|
||||
assert set(manager.list_loras()) == {1}
|
||||
assert manager.remove_oldest_adapter()
|
||||
assert set(manager.list_adapters()) == {1}
|
||||
assert manager.lora_index_to_id[0] == 1
|
||||
assert manager.lora_index_to_id[1] is None
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
assert manager.remove_oldest_lora()
|
||||
assert manager.remove_oldest_adapter()
|
||||
|
||||
assert set(manager.list_loras()) == {1}
|
||||
assert set(manager.list_adapters()) == {1}
|
||||
|
||||
|
||||
def test_lru_cache_worker_lora_manager(llama_2_7b_model_extra_embeddings,
|
||||
sql_lora_files):
|
||||
def test_lru_cache_worker_adapter_manager(llama_2_7b_model_extra_embeddings,
|
||||
sql_lora_files):
|
||||
lora_config = LoRAConfig(max_lora_rank=8, max_cpu_loras=4, max_loras=4)
|
||||
worker_lora_manager = LRUCacheWorkerLoRAManager(
|
||||
worker_adapter_manager = LRUCacheWorkerLoRAManager(
|
||||
4, 2, llama_2_7b_model_extra_embeddings.unpadded_vocab_size -
|
||||
lora_config.lora_extra_vocab_size, lora_config, torch.device("cuda"),
|
||||
EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES)
|
||||
worker_lora_manager.create_lora_manager(llama_2_7b_model_extra_embeddings)
|
||||
worker_adapter_manager.create_lora_manager(
|
||||
llama_2_7b_model_extra_embeddings)
|
||||
|
||||
mapping = LoRAMapping([], [])
|
||||
worker_lora_manager.set_active_loras([
|
||||
worker_adapter_manager.set_active_adapters([
|
||||
LoRARequest("1", 1, sql_lora_files),
|
||||
LoRARequest("2", 2, sql_lora_files)
|
||||
], mapping)
|
||||
assert worker_lora_manager.list_loras() == {1, 2}
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 2
|
||||
assert worker_adapter_manager.list_adapters() == {1, 2}
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 2
|
||||
|
||||
worker_lora_manager.set_active_loras([
|
||||
worker_adapter_manager.set_active_adapters([
|
||||
LoRARequest("1", 1, sql_lora_files),
|
||||
LoRARequest("3", 3, sql_lora_files),
|
||||
LoRARequest("4", 4, sql_lora_files)
|
||||
], mapping)
|
||||
assert worker_lora_manager.list_loras() == {1, 2, 3, 4}
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 2
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 3
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[3] == 4
|
||||
assert worker_adapter_manager.list_adapters() == {1, 2, 3, 4}
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 2
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 3
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[3] == 4
|
||||
|
||||
worker_lora_manager.set_active_loras([
|
||||
worker_adapter_manager.set_active_adapters([
|
||||
LoRARequest("1", 1, sql_lora_files),
|
||||
LoRARequest("2", 2, sql_lora_files),
|
||||
LoRARequest("5", 5, sql_lora_files)
|
||||
], mapping)
|
||||
assert worker_lora_manager.list_loras() == {1, 2, 4, 5}
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 2
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 5
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[3] == 4
|
||||
assert worker_adapter_manager.list_adapters() == {1, 2, 4, 5}
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 2
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 5
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[3] == 4
|
||||
|
||||
worker_lora_manager.set_active_loras([
|
||||
worker_adapter_manager.set_active_adapters([
|
||||
LoRARequest("1", 1, sql_lora_files),
|
||||
LoRARequest("1", 1, sql_lora_files),
|
||||
LoRARequest("1", 1, sql_lora_files)
|
||||
], mapping)
|
||||
assert worker_lora_manager.list_loras() == {1, 2, 4, 5}
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 2
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 5
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[3] == 4
|
||||
assert worker_adapter_manager.list_adapters() == {1, 2, 4, 5}
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 2
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 5
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[3] == 4
|
||||
|
||||
worker_lora_manager.set_active_loras([
|
||||
worker_adapter_manager.set_active_adapters([
|
||||
LoRARequest("6", 6, sql_lora_files),
|
||||
LoRARequest("7", 7, sql_lora_files),
|
||||
LoRARequest("8", 8, sql_lora_files)
|
||||
], mapping)
|
||||
assert worker_lora_manager.list_loras() == {1, 6, 7, 8}
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 7
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 8
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[3] == 6
|
||||
assert worker_adapter_manager.list_adapters() == {1, 6, 7, 8}
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 7
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 8
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[3] == 6
|
||||
|
||||
# Over capacity
|
||||
with pytest.raises(RuntimeError):
|
||||
worker_lora_manager.set_active_loras([
|
||||
worker_adapter_manager.set_active_adapters([
|
||||
LoRARequest("10", 10, sql_lora_files),
|
||||
LoRARequest("11", 11, sql_lora_files),
|
||||
LoRARequest("12", 12, sql_lora_files),
|
||||
@@ -426,68 +427,69 @@ def test_lru_cache_worker_lora_manager(llama_2_7b_model_extra_embeddings,
|
||||
], mapping)
|
||||
|
||||
|
||||
def test_worker_lora_manager(llama_2_7b_model_extra_embeddings,
|
||||
sql_lora_files):
|
||||
def test_worker_adapter_manager(llama_2_7b_model_extra_embeddings,
|
||||
sql_lora_files):
|
||||
# Should remove every LoRA not specified in the request.
|
||||
lora_config = LoRAConfig(max_lora_rank=8, max_cpu_loras=4, max_loras=4)
|
||||
worker_lora_manager = WorkerLoRAManager(
|
||||
worker_adapter_manager = WorkerLoRAManager(
|
||||
4, 2, llama_2_7b_model_extra_embeddings.unpadded_vocab_size -
|
||||
lora_config.lora_extra_vocab_size, lora_config, torch.device("cuda"),
|
||||
EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES)
|
||||
worker_lora_manager.create_lora_manager(llama_2_7b_model_extra_embeddings)
|
||||
worker_adapter_manager.create_lora_manager(
|
||||
llama_2_7b_model_extra_embeddings)
|
||||
|
||||
mapping = LoRAMapping([], [])
|
||||
worker_lora_manager.set_active_loras([
|
||||
worker_adapter_manager.set_active_adapters([
|
||||
LoRARequest("1", 1, sql_lora_files),
|
||||
LoRARequest("2", 2, sql_lora_files)
|
||||
], mapping)
|
||||
assert worker_lora_manager.list_loras() == {1, 2}
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 2
|
||||
assert worker_adapter_manager.list_adapters() == {1, 2}
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 2
|
||||
|
||||
worker_lora_manager.set_active_loras([
|
||||
worker_adapter_manager.set_active_adapters([
|
||||
LoRARequest("1", 1, sql_lora_files),
|
||||
LoRARequest("3", 3, sql_lora_files),
|
||||
LoRARequest("4", 4, sql_lora_files)
|
||||
], mapping)
|
||||
assert worker_lora_manager.list_loras() == {1, 3, 4}
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 3
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 4
|
||||
assert worker_adapter_manager.list_adapters() == {1, 3, 4}
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 3
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 4
|
||||
|
||||
worker_lora_manager.set_active_loras([
|
||||
worker_adapter_manager.set_active_adapters([
|
||||
LoRARequest("1", 1, sql_lora_files),
|
||||
LoRARequest("2", 2, sql_lora_files),
|
||||
LoRARequest("5", 5, sql_lora_files)
|
||||
], mapping)
|
||||
assert worker_lora_manager.list_loras() == {1, 2, 5}
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 2
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 5
|
||||
assert worker_adapter_manager.list_adapters() == {1, 2, 5}
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 2
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 5
|
||||
|
||||
worker_lora_manager.set_active_loras([
|
||||
worker_adapter_manager.set_active_adapters([
|
||||
LoRARequest("1", 1, sql_lora_files),
|
||||
LoRARequest("1", 1, sql_lora_files),
|
||||
LoRARequest("1", 1, sql_lora_files)
|
||||
], mapping)
|
||||
assert worker_lora_manager.list_loras() == {1}
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 1
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[1] is None
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[2] is None
|
||||
assert worker_adapter_manager.list_adapters() == {1}
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 1
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] is None
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] is None
|
||||
|
||||
worker_lora_manager.set_active_loras([
|
||||
worker_adapter_manager.set_active_adapters([
|
||||
LoRARequest("6", 6, sql_lora_files),
|
||||
LoRARequest("7", 7, sql_lora_files),
|
||||
LoRARequest("8", 8, sql_lora_files)
|
||||
], mapping)
|
||||
assert worker_lora_manager.list_loras() == {6, 7, 8}
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[0] == 8
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[1] == 6
|
||||
assert worker_lora_manager._lora_manager.lora_index_to_id[2] == 7
|
||||
assert worker_adapter_manager.list_adapters() == {6, 7, 8}
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[0] == 8
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[1] == 6
|
||||
assert worker_adapter_manager._adapter_manager.lora_index_to_id[2] == 7
|
||||
|
||||
# Over capacity
|
||||
with pytest.raises(RuntimeError):
|
||||
worker_lora_manager.set_active_loras([
|
||||
worker_adapter_manager.set_active_adapters([
|
||||
LoRARequest("10", 10, sql_lora_files),
|
||||
LoRARequest("11", 11, sql_lora_files),
|
||||
LoRARequest("12", 12, sql_lora_files),
|
||||
@@ -525,8 +527,8 @@ def test_packed_loras(dist_init, dummy_model_gate_up):
|
||||
|
||||
assert isinstance(model.get_submodule("gate_up_proj"),
|
||||
MergedColumnParallelLinearWithLoRA)
|
||||
assert manager.add_lora(model_lora)
|
||||
assert manager.add_lora(model_lora1)
|
||||
assert manager.add_adapter(model_lora)
|
||||
assert manager.add_adapter(model_lora1)
|
||||
|
||||
packed_lora = model_lora.get_lora("gate_up_proj")
|
||||
assert packed_lora and isinstance(packed_lora, PackedLoRALayerWeights)
|
||||
|
||||
Reference in New Issue
Block a user