[Misc] [Core] Implement RFC "Augment BaseExecutor interfaces to enable hardware-agnostic speculative decoding" (#3837)

This commit is contained in:
Cade Daniel
2024-04-09 11:44:15 -07:00
committed by GitHub
parent 6d592eb430
commit e7c7067b45
20 changed files with 451 additions and 275 deletions

View File

@@ -117,6 +117,7 @@ def create_worker(cls: type,
parallel_config=engine_config.parallel_config,
scheduler_config=engine_config.scheduler_config,
device_config=engine_config.device_config,
cache_config=engine_config.cache_config,
local_rank=0,
rank=0,
distributed_init_method=distributed_init_method,
@@ -128,8 +129,9 @@ def create_worker(cls: type,
engine_config.cache_config.num_gpu_blocks = num_gpu_blocks
engine_config.cache_config.num_cpu_blocks = 0
worker.init_cache_engine(engine_config.cache_config)
worker.warm_up_model()
worker.initialize_cache(
num_gpu_blocks=engine_config.cache_config.num_gpu_blocks,
num_cpu_blocks=engine_config.cache_config.num_cpu_blocks)
return worker