Use runtime profiling to replace manual memory analyzers (#81)

This commit is contained in:
Zhuohan Li
2023-05-19 11:35:44 -06:00
committed by GitHub
parent 825d8892b5
commit f756799b84
14 changed files with 211 additions and 478 deletions

View File

@@ -23,23 +23,18 @@ class Controller:
pipeline_parallel_size: int,
distributed_init_method: str,
model_name: str,
block_size: int,
num_gpu_blocks: int,
num_cpu_blocks: int,
dtype: str,
seed: int,
cache_dir: Optional[str],
use_dummy_weights: bool,
use_np_cache: bool,
max_num_batched_tokens: int,
max_num_sequences: int,
use_ray: bool,
) -> None:
self.stage_id = stage_id
self.stage_devices = stage_devices
self.model_name = model_name
self.block_size = block_size
self.num_gpu_blocks = num_gpu_blocks
self.num_cpu_blocks = num_cpu_blocks
self.use_ray = use_ray
# Which pipeline stage is this node assigned to?
@@ -56,9 +51,6 @@ class Controller:
worker_cls = Worker
worker = worker_cls(
model_name=model_name,
block_size=block_size,
num_gpu_blocks=num_gpu_blocks,
num_cpu_blocks=num_cpu_blocks,
dtype=dtype,
seed=seed,
distributed_init_method=distributed_init_method,
@@ -70,9 +62,44 @@ class Controller:
use_dummy_weights=use_dummy_weights,
use_np_cache=use_np_cache,
max_num_batched_tokens=max_num_batched_tokens,
max_num_sequences=max_num_sequences,
)
self.workers.append(worker)
def get_num_available_blocks(self, block_size: int, cpu_swap_space: int,
gpu_memory_utilization: float) -> List[Tuple[int, int]]:
all_worker_results = []
for worker in self.workers:
executor = worker.get_num_available_blocks
if self.use_ray:
executor = executor.remote
result = executor(
block_size,
cpu_swap_space,
gpu_memory_utilization,
)
all_worker_results.append(result)
if self.use_ray:
all_worker_results = ray.get(all_worker_results)
return all_worker_results
def init_cache_engine(self, block_size: int, num_gpu_blocks: int,
num_cpu_blocks: int):
all_worker_futures = []
for worker in self.workers:
executor = worker.init_cache_engine
if self.use_ray:
executor = executor.remote
future = executor(
block_size,
num_gpu_blocks,
num_cpu_blocks,
)
all_worker_futures.append(future)
if self.use_ray:
ray.get(all_worker_futures)
def set_next(
self,
next_node: Union['Controller', 'Scheduler'],