[Bugfix][Frontend] Fix Issues Under High Load With zeromq Frontend (#7394)

Co-authored-by: Nick Hill <nickhill@us.ibm.com>
This commit is contained in:
Robert Shaw
2024-08-21 13:34:14 -04:00
committed by GitHub
parent d3c002eadc
commit f7e3b0c5aa
9 changed files with 322 additions and 141 deletions

View File

@@ -766,6 +766,11 @@ class AsyncLLMEngine:
def errored(self) -> bool:
return self._errored_with is not None
@property
def limit_concurrency(self) -> Optional[int]:
"""Maximum number of concurrently running requests."""
return None
def set_errored(self, exc: Exception) -> None:
self._errored_with = exc