[Frontend] Kill the server on engine death (#6594)
Signed-off-by: Joe Runde <joe@joerun.de> Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
This commit is contained in:
@@ -49,6 +49,7 @@ if TYPE_CHECKING:
|
||||
NVCC_THREADS: Optional[str] = None
|
||||
VLLM_USE_PRECOMPILED: bool = False
|
||||
VLLM_NO_DEPRECATION_WARNING: bool = False
|
||||
VLLM_KEEP_ALIVE_ON_ENGINE_DEATH: bool = False
|
||||
CMAKE_BUILD_TYPE: Optional[str] = None
|
||||
VERBOSE: bool = False
|
||||
VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False
|
||||
@@ -335,6 +336,11 @@ environment_variables: Dict[str, Callable[[], Any]] = {
|
||||
"VLLM_NO_DEPRECATION_WARNING":
|
||||
lambda: bool(int(os.getenv("VLLM_NO_DEPRECATION_WARNING", "0"))),
|
||||
|
||||
# If set, the OpenAI API server will stay alive even after the underlying
|
||||
# AsyncLLMEngine errors and stops serving requests
|
||||
"VLLM_KEEP_ALIVE_ON_ENGINE_DEATH":
|
||||
lambda: bool(os.getenv("VLLM_KEEP_ALIVE_ON_ENGINE_DEATH", 0)),
|
||||
|
||||
# If the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN is set, it allows
|
||||
# the user to specify a max sequence length greater than
|
||||
# the max length derived from the model's config.json.
|
||||
|
||||
Reference in New Issue
Block a user