[Core][Bugfix][Perf] Introduce MQLLMEngine to avoid asyncio OH (#8157)
Co-authored-by: Nick Hill <nickhill@us.ibm.com> Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Co-authored-by: Simon Mo <simon.mo@hey.com>
This commit is contained in:
committed by
GitHub
parent
9d104b5beb
commit
7c7714d856
@@ -601,9 +601,12 @@ class AsyncLLMEngine:
|
||||
return self._errored_with is not None
|
||||
|
||||
@property
|
||||
def limit_concurrency(self) -> Optional[int]:
|
||||
"""Maximum number of concurrently running requests."""
|
||||
return None
|
||||
def dead_error(self) -> BaseException:
|
||||
return AsyncEngineDeadError(
|
||||
"Background loop is not running. If it was running, "
|
||||
"inspect the output to find the stacktrace of the "
|
||||
"error that caused the background loop to stop "
|
||||
"(AsyncEngineDeadError).")
|
||||
|
||||
def set_errored(self, exc: Exception) -> None:
|
||||
self._errored_with = exc
|
||||
|
||||
Reference in New Issue
Block a user