[CI/Build] Reduce LoRA layer test cases (#23721)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -243,7 +243,7 @@ def check_punica_wrapper(punica_wrapper) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
@pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
|
@pytest.mark.parametrize("num_loras", [1, 2, 4])
|
||||||
@pytest.mark.parametrize("device", DEVICES)
|
@pytest.mark.parametrize("device", DEVICES)
|
||||||
@pytest.mark.parametrize("vocab_size", [512, 32000, 64000, 128000])
|
@pytest.mark.parametrize("vocab_size", [512, 32000, 64000, 128000])
|
||||||
@pytest.mark.parametrize("stage", STAGES)
|
@pytest.mark.parametrize("stage", STAGES)
|
||||||
@@ -347,7 +347,7 @@ def test_embeddings(dist_init, num_loras, device, vocab_size, stage) -> None:
|
|||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
# @pytest.mark.skip(
|
# @pytest.mark.skip(
|
||||||
# reason="Fails when loras are in any slot other than the first.")
|
# reason="Fails when loras are in any slot other than the first.")
|
||||||
@pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
|
@pytest.mark.parametrize("num_loras", [1, 2, 4])
|
||||||
@pytest.mark.parametrize("device", DEVICES)
|
@pytest.mark.parametrize("device", DEVICES)
|
||||||
@pytest.mark.parametrize("vocab_size", [512, 32000, 64000, 128000])
|
@pytest.mark.parametrize("vocab_size", [512, 32000, 64000, 128000])
|
||||||
@pytest.mark.parametrize("stage", STAGES)
|
@pytest.mark.parametrize("stage", STAGES)
|
||||||
@@ -486,7 +486,7 @@ def test_embeddings_with_new_embeddings(dist_init, num_loras, device,
|
|||||||
|
|
||||||
|
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
@pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
|
@pytest.mark.parametrize("num_loras", [1, 2, 4])
|
||||||
@pytest.mark.parametrize("device", DEVICES)
|
@pytest.mark.parametrize("device", DEVICES)
|
||||||
@pytest.mark.parametrize("vocab_size", [512, 32000, 64000, 256512])
|
@pytest.mark.parametrize("vocab_size", [512, 32000, 64000, 256512])
|
||||||
@pytest.mark.parametrize("stage", STAGES)
|
@pytest.mark.parametrize("stage", STAGES)
|
||||||
@@ -620,12 +620,15 @@ def test_lm_head_logits_processor(dist_init, num_loras, device, vocab_size,
|
|||||||
|
|
||||||
|
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
@pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
|
@pytest.mark.parametrize("num_loras", [1, 2, 4])
|
||||||
@pytest.mark.parametrize("device", DEVICES)
|
@pytest.mark.parametrize("device", DEVICES)
|
||||||
@pytest.mark.parametrize("stage", STAGES)
|
@pytest.mark.parametrize("stage", STAGES)
|
||||||
@pytest.mark.parametrize("bias_enabled", [True, False])
|
def test_linear_replicated(
|
||||||
def test_linear_replicated(dist_init, num_loras, device, stage,
|
dist_init,
|
||||||
bias_enabled) -> None:
|
num_loras,
|
||||||
|
device,
|
||||||
|
stage,
|
||||||
|
) -> None:
|
||||||
|
|
||||||
if current_platform.is_cuda_alike():
|
if current_platform.is_cuda_alike():
|
||||||
torch.cuda.set_device(device)
|
torch.cuda.set_device(device)
|
||||||
@@ -634,10 +637,11 @@ def test_linear_replicated(dist_init, num_loras, device, stage,
|
|||||||
torch.set_default_device(device)
|
torch.set_default_device(device)
|
||||||
punica_wrapper = get_punica_wrapper(8192, 256, device, max_loras=max_loras)
|
punica_wrapper = get_punica_wrapper(8192, 256, device, max_loras=max_loras)
|
||||||
assert check_punica_wrapper(punica_wrapper)
|
assert check_punica_wrapper(punica_wrapper)
|
||||||
lora_config = LoRAConfig(max_loras=max_loras,
|
lora_config = LoRAConfig(
|
||||||
max_lora_rank=8,
|
max_loras=max_loras,
|
||||||
lora_dtype=torch.float16,
|
max_lora_rank=8,
|
||||||
bias_enabled=bias_enabled)
|
lora_dtype=torch.float16,
|
||||||
|
)
|
||||||
|
|
||||||
def create_random_linear_replicated_layer():
|
def create_random_linear_replicated_layer():
|
||||||
|
|
||||||
@@ -651,10 +655,6 @@ def test_linear_replicated(dist_init, num_loras, device, stage,
|
|||||||
lora_linear.create_lora_weights(max_loras, lora_config)
|
lora_linear.create_lora_weights(max_loras, lora_config)
|
||||||
assert (lora_linear.n_slices == len(lora_linear.lora_a_stacked) == len(
|
assert (lora_linear.n_slices == len(lora_linear.lora_a_stacked) == len(
|
||||||
lora_linear.lora_b_stacked) == 1)
|
lora_linear.lora_b_stacked) == 1)
|
||||||
if bias_enabled:
|
|
||||||
assert len(lora_linear.lora_bias_stacked) == lora_linear.n_slices
|
|
||||||
else:
|
|
||||||
assert lora_linear.lora_bias_stacked is None
|
|
||||||
return linear, lora_linear
|
return linear, lora_linear
|
||||||
|
|
||||||
for i in range(NUM_RANDOM_SEEDS):
|
for i in range(NUM_RANDOM_SEEDS):
|
||||||
@@ -734,14 +734,13 @@ def test_linear_replicated(dist_init, num_loras, device, stage,
|
|||||||
|
|
||||||
|
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
@pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
|
@pytest.mark.parametrize("num_loras", [1, 2, 4])
|
||||||
@pytest.mark.parametrize("orientation", ["row", "column"])
|
@pytest.mark.parametrize("orientation", ["row", "column"])
|
||||||
@pytest.mark.parametrize("fully_shard", [True, False])
|
@pytest.mark.parametrize("fully_shard", [True, False])
|
||||||
@pytest.mark.parametrize("device", DEVICES)
|
@pytest.mark.parametrize("device", DEVICES)
|
||||||
@pytest.mark.parametrize("stage", STAGES)
|
@pytest.mark.parametrize("stage", STAGES)
|
||||||
@pytest.mark.parametrize("bias_enabled", [True, False])
|
|
||||||
def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
|
def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
|
||||||
device, stage, bias_enabled) -> None:
|
device, stage) -> None:
|
||||||
|
|
||||||
if current_platform.is_cuda_alike():
|
if current_platform.is_cuda_alike():
|
||||||
torch.cuda.set_device(device)
|
torch.cuda.set_device(device)
|
||||||
@@ -750,11 +749,12 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
|
|||||||
torch.set_default_device(device)
|
torch.set_default_device(device)
|
||||||
punica_wrapper = get_punica_wrapper(8192, 256, device, max_loras=max_loras)
|
punica_wrapper = get_punica_wrapper(8192, 256, device, max_loras=max_loras)
|
||||||
assert check_punica_wrapper(punica_wrapper)
|
assert check_punica_wrapper(punica_wrapper)
|
||||||
lora_config = LoRAConfig(max_loras=max_loras,
|
lora_config = LoRAConfig(
|
||||||
max_lora_rank=8,
|
max_loras=max_loras,
|
||||||
fully_sharded_loras=fully_shard,
|
max_lora_rank=8,
|
||||||
lora_dtype=torch.float16,
|
fully_sharded_loras=fully_shard,
|
||||||
bias_enabled=bias_enabled)
|
lora_dtype=torch.float16,
|
||||||
|
)
|
||||||
|
|
||||||
def create_random_linear_parallel_layer():
|
def create_random_linear_parallel_layer():
|
||||||
if orientation == "row":
|
if orientation == "row":
|
||||||
@@ -777,10 +777,7 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
|
|||||||
lora_linear.create_lora_weights(max_loras, lora_config)
|
lora_linear.create_lora_weights(max_loras, lora_config)
|
||||||
assert (lora_linear.n_slices == len(lora_linear.lora_a_stacked) == len(
|
assert (lora_linear.n_slices == len(lora_linear.lora_a_stacked) == len(
|
||||||
lora_linear.lora_b_stacked) == 1)
|
lora_linear.lora_b_stacked) == 1)
|
||||||
if bias_enabled:
|
|
||||||
assert len(lora_linear.lora_bias_stacked) == lora_linear.n_slices
|
|
||||||
else:
|
|
||||||
assert lora_linear.lora_bias_stacked is None
|
|
||||||
return linear, lora_linear
|
return linear, lora_linear
|
||||||
|
|
||||||
for i in range(NUM_RANDOM_SEEDS):
|
for i in range(NUM_RANDOM_SEEDS):
|
||||||
@@ -860,14 +857,13 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
|
|||||||
|
|
||||||
|
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
@pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
|
@pytest.mark.parametrize("num_loras", [1, 2, 4])
|
||||||
@pytest.mark.parametrize("repeats", [1, 2, 3])
|
@pytest.mark.parametrize("repeats", [1, 2, 3])
|
||||||
@pytest.mark.parametrize("fully_shard", [True, False])
|
@pytest.mark.parametrize("fully_shard", [True, False])
|
||||||
@pytest.mark.parametrize("device", DEVICES)
|
@pytest.mark.parametrize("device", DEVICES)
|
||||||
@pytest.mark.parametrize("stage", STAGES)
|
@pytest.mark.parametrize("stage", STAGES)
|
||||||
@pytest.mark.parametrize("bias_enabled", [True, False])
|
|
||||||
def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard,
|
def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard,
|
||||||
device, stage, bias_enabled) -> None:
|
device, stage) -> None:
|
||||||
|
|
||||||
if current_platform.is_cuda_alike():
|
if current_platform.is_cuda_alike():
|
||||||
torch.cuda.set_device(device)
|
torch.cuda.set_device(device)
|
||||||
@@ -876,11 +872,12 @@ def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard,
|
|||||||
torch.set_default_device(device)
|
torch.set_default_device(device)
|
||||||
punica_wrapper = get_punica_wrapper(8192, 256, device, max_loras=max_loras)
|
punica_wrapper = get_punica_wrapper(8192, 256, device, max_loras=max_loras)
|
||||||
assert check_punica_wrapper(punica_wrapper)
|
assert check_punica_wrapper(punica_wrapper)
|
||||||
lora_config = LoRAConfig(max_loras=max_loras,
|
lora_config = LoRAConfig(
|
||||||
max_lora_rank=8,
|
max_loras=max_loras,
|
||||||
fully_sharded_loras=fully_shard,
|
max_lora_rank=8,
|
||||||
lora_dtype=torch.float16,
|
fully_sharded_loras=fully_shard,
|
||||||
bias_enabled=bias_enabled)
|
lora_dtype=torch.float16,
|
||||||
|
)
|
||||||
|
|
||||||
def create_column_parallel_packed_layer():
|
def create_column_parallel_packed_layer():
|
||||||
if repeats == 2:
|
if repeats == 2:
|
||||||
@@ -924,10 +921,7 @@ def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard,
|
|||||||
model_config=FakeConfig())
|
model_config=FakeConfig())
|
||||||
assert (lora_linear.n_slices == len(lora_linear.lora_a_stacked) == len(
|
assert (lora_linear.n_slices == len(lora_linear.lora_a_stacked) == len(
|
||||||
lora_linear.lora_b_stacked) == n_slices)
|
lora_linear.lora_b_stacked) == n_slices)
|
||||||
if bias_enabled:
|
|
||||||
assert len(lora_linear.lora_bias_stacked) == lora_linear.n_slices
|
|
||||||
else:
|
|
||||||
assert lora_linear.lora_bias_stacked is None
|
|
||||||
return linear, lora_linear
|
return linear, lora_linear
|
||||||
|
|
||||||
for i in range(NUM_RANDOM_SEEDS):
|
for i in range(NUM_RANDOM_SEEDS):
|
||||||
|
|||||||
Reference in New Issue
Block a user