[core][model] yet another cpu offload implementation (#6496)

Co-authored-by: Michael Goin <michael@neuralmagic.com>
This commit is contained in:
youkaichao
2024-07-17 20:54:35 -07:00
committed by GitHub
parent 18fecc3559
commit 1c27d25fb5
7 changed files with 128 additions and 4 deletions

View File

@@ -433,6 +433,7 @@ class CacheConfig:
num_gpu_blocks_override: Optional[int] = None,
sliding_window: Optional[int] = None,
enable_prefix_caching: bool = False,
cpu_offload_gb: float = 0,
) -> None:
self.block_size = block_size
self.gpu_memory_utilization = gpu_memory_utilization
@@ -441,6 +442,7 @@ class CacheConfig:
self.cache_dtype = cache_dtype
self.sliding_window = sliding_window
self.enable_prefix_caching = enable_prefix_caching
self.cpu_offload_gb = cpu_offload_gb
self._verify_args()
self._verify_cache_dtype()
self._verify_prefix_caching()