Refactor Worker & InputMetadata (#1843)

This commit is contained in:
Woosuk Kwon
2023-11-29 22:16:37 -08:00
committed by GitHub
parent c782195662
commit 27feead2f8
27 changed files with 668 additions and 443 deletions

View File

@@ -201,9 +201,10 @@ class EngineArgs:
self.dtype, self.seed, self.revision,
self.tokenizer_revision, self.max_model_len,
self.quantization)
cache_config = CacheConfig(
self.block_size, self.gpu_memory_utilization, self.swap_space,
getattr(model_config.hf_config, 'sliding_window', None))
cache_config = CacheConfig(self.block_size,
self.gpu_memory_utilization,
self.swap_space,
model_config.get_sliding_window())
parallel_config = ParallelConfig(self.pipeline_parallel_size,
self.tensor_parallel_size,
self.worker_use_ray,