[Frontend] Make TIMEOUT_KEEP_ALIVE configurable through env var (#18472)

Signed-off-by: liusiqian <liusiqian@tal.com>
This commit is contained in:
liusiqian-tal
2025-06-10 05:41:21 +08:00
committed by GitHub
parent ebb2f383b8
commit 31f58be96a
4 changed files with 14 additions and 11 deletions

View File

@@ -17,6 +17,7 @@ from typing import Any, Optional
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, Response, StreamingResponse
import vllm.envs as envs
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.launcher import serve_http
@@ -29,7 +30,6 @@ from vllm.version import __version__ as VLLM_VERSION
logger = init_logger("vllm.entrypoints.api_server")
TIMEOUT_KEEP_ALIVE = 5 # seconds.
app = FastAPI()
engine = None
@@ -134,7 +134,7 @@ async def run_server(args: Namespace,
host=args.host,
port=args.port,
log_level=args.log_level,
timeout_keep_alive=TIMEOUT_KEEP_ALIVE,
timeout_keep_alive=envs.VLLM_HTTP_TIMEOUT_KEEP_ALIVE,
ssl_keyfile=args.ssl_keyfile,
ssl_certfile=args.ssl_certfile,
ssl_ca_certs=args.ssl_ca_certs,