[Misc] Configurable timeout for execute_model RPC calls via env var (#19544)

Signed-off-by: jinqinn <goodqinjin@163.com>
This commit is contained in:
jinqinn
2025-06-23 11:36:26 +08:00
committed by GitHub
parent 4a0f7888a3
commit f39ab2d4bd
2 changed files with 12 additions and 11 deletions

View File

@@ -37,11 +37,6 @@ from vllm.worker.worker_base import WorkerWrapperBase
logger = init_logger(__name__)
POLLING_TIMEOUT_MS = 5000
POLLING_TIMEOUT_S = POLLING_TIMEOUT_MS // 1000
EXECUTE_MODEL_TIMEOUT_S = 300
class MultiprocExecutor(Executor):
@@ -160,12 +155,12 @@ class MultiprocExecutor(Executor):
self,
scheduler_output,
) -> Union[ModelRunnerOutput, Future[ModelRunnerOutput]]:
(output, ) = self.collective_rpc("execute_model",
args=(scheduler_output, ),
unique_reply_rank=self.output_rank,
non_block=self.max_concurrent_batches
> 1,
timeout=EXECUTE_MODEL_TIMEOUT_S)
(output, ) = self.collective_rpc(
"execute_model",
args=(scheduler_output, ),
unique_reply_rank=self.output_rank,
non_block=self.max_concurrent_batches > 1,
timeout=envs.VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS)
return output
def collective_rpc(self,