[Core][Bugfix][Perf] Introduce MQLLMEngine to avoid asyncio OH (#8157)
Co-authored-by: Nick Hill <nickhill@us.ibm.com> Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Co-authored-by: Simon Mo <simon.mo@hey.com>
This commit is contained in:
committed by
GitHub
parent
9d104b5beb
commit
7c7714d856
@@ -57,7 +57,7 @@ if TYPE_CHECKING:
|
||||
VERBOSE: bool = False
|
||||
VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False
|
||||
VLLM_TEST_FORCE_FP8_MARLIN: bool = False
|
||||
VLLM_RPC_GET_DATA_TIMEOUT_MS: int = 5000
|
||||
VLLM_RPC_TIMEOUT: int = 10000 # ms
|
||||
VLLM_PLUGINS: Optional[List[str]] = None
|
||||
VLLM_TORCH_PROFILER_DIR: Optional[str] = None
|
||||
VLLM_USE_TRITON_AWQ: bool = False
|
||||
@@ -393,8 +393,8 @@ environment_variables: Dict[str, Callable[[], Any]] = {
|
||||
|
||||
# Time in ms for the zmq client to wait for a response from the backend
|
||||
# server for simple data operations
|
||||
"VLLM_RPC_GET_DATA_TIMEOUT_MS":
|
||||
lambda: int(os.getenv("VLLM_RPC_GET_DATA_TIMEOUT_MS", "5000")),
|
||||
"VLLM_RPC_TIMEOUT":
|
||||
lambda: int(os.getenv("VLLM_RPC_TIMEOUT", "10000")),
|
||||
|
||||
# a list of plugin names to load, separated by commas.
|
||||
# if this is not set, it means all plugins will be loaded
|
||||
|
||||
Reference in New Issue
Block a user