[Core] Pipeline Parallel Support (#4412)

Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai>
This commit is contained in:
Murali Andoorveedu
2024-07-02 10:58:08 -07:00
committed by GitHub
parent 15aba081f3
commit c5832d2ae9
82 changed files with 1096 additions and 400 deletions

View File

@@ -54,9 +54,9 @@ def patch_execute_model_with_seeds(worker: Worker, rand_seeds: List[int]):
return new_execute_model
def zero_kv_cache(cache_engine: CacheEngine):
assert cache_engine.gpu_cache
for key_blocks, value_blocks in cache_engine.gpu_cache:
def zero_kv_cache(cache_engine: List[CacheEngine]):
assert cache_engine[0].gpu_cache
for key_blocks, value_blocks in cache_engine[0].gpu_cache:
key_blocks.zero_()
value_blocks.zero_()