[TPU] Implement multi-step scheduling (#8489)

This commit is contained in:
Woosuk Kwon
2024-09-14 16:58:31 -07:00
committed by GitHub
parent 47790f3e32
commit 50e9ec41fc
5 changed files with 274 additions and 71 deletions

View File

@@ -68,8 +68,12 @@ class RayTPUExecutor(TPUExecutor):
)
assert self.speculative_config is None
worker_module_name = "vllm.worker.tpu_worker"
worker_class_name = "TPUWorker"
if self.scheduler_config.is_multi_step:
worker_module_name = "vllm.worker.multi_step_tpu_worker"
worker_class_name = "MultiStepTPUWorker"
else:
worker_module_name = "vllm.worker.tpu_worker"
worker_class_name = "TPUWorker"
# GKE does not fetch environment information from metadata server
# and instead sets these from within the Ray process. Therefore we