Optimize KV cache distribution for asymmetric pipeline parallelism (#25164)
Signed-off-by: gholmes829 <g.holmes429@gmail.com>
This commit is contained in:
committed by
GitHub
parent
7e4cd070b0
commit
d100d78eb3
@@ -681,10 +681,10 @@ def test_get_kv_cache_configs_multiple_workers():
|
||||
num_blocks=10,
|
||||
kv_cache_tensors=[
|
||||
KVCacheTensor(
|
||||
size=ref_kv_cache_spec.page_size_bytes * 20, shared_by=["layer1"]
|
||||
size=ref_kv_cache_spec.page_size_bytes * 10, shared_by=["layer1"]
|
||||
),
|
||||
KVCacheTensor(
|
||||
size=ref_kv_cache_spec.page_size_bytes * 20, shared_by=["layer2"]
|
||||
size=ref_kv_cache_spec.page_size_bytes * 10, shared_by=["layer2"]
|
||||
),
|
||||
],
|
||||
kv_cache_groups=[
|
||||
@@ -718,7 +718,7 @@ def test_get_kv_cache_configs_multiple_workers():
|
||||
num_blocks=10,
|
||||
kv_cache_tensors=[
|
||||
KVCacheTensor(
|
||||
size=ref_kv_cache_spec.page_size_bytes * 20, shared_by=["layer1"]
|
||||
size=ref_kv_cache_spec.page_size_bytes * 10, shared_by=["layer1"]
|
||||
),
|
||||
],
|
||||
kv_cache_groups=[
|
||||
@@ -802,7 +802,7 @@ def test_get_kv_cache_configs_multiple_workers():
|
||||
num_blocks=10,
|
||||
kv_cache_tensors=[
|
||||
KVCacheTensor(
|
||||
size=ref_kv_cache_spec.page_size_bytes * 20, shared_by=["layer3"]
|
||||
size=ref_kv_cache_spec.page_size_bytes * 10, shared_by=["layer3"]
|
||||
),
|
||||
],
|
||||
kv_cache_groups=[
|
||||
@@ -813,7 +813,7 @@ def test_get_kv_cache_configs_multiple_workers():
|
||||
num_blocks=10,
|
||||
kv_cache_tensors=[
|
||||
KVCacheTensor(
|
||||
size=ref_kv_cache_spec.page_size_bytes * 20, shared_by=["layer3"]
|
||||
size=ref_kv_cache_spec.page_size_bytes * 10, shared_by=["layer3"]
|
||||
),
|
||||
],
|
||||
kv_cache_groups=[
|
||||
|
||||
Reference in New Issue
Block a user