[Refactor] Remove deadcode in Responses API serving (#36726)
Signed-off-by: sfeng33 <4florafeng@gmail.com> Co-authored-by: Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
@@ -1102,7 +1102,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
event_deque: deque[StreamingResponsesResponse] = deque()
|
||||
new_event_signal = asyncio.Event()
|
||||
self.event_store[request.request_id] = (event_deque, new_event_signal)
|
||||
response = None
|
||||
generator = self.responses_stream_generator(request, *args, **kwargs)
|
||||
try:
|
||||
async for event in generator:
|
||||
@@ -1111,15 +1110,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
finally:
|
||||
new_event_signal.set()
|
||||
|
||||
if response is not None and isinstance(response, ErrorResponse):
|
||||
# If the request has failed, update the status to "failed".
|
||||
response_id = request.request_id
|
||||
async with self.response_store_lock:
|
||||
stored_response = self.response_store.get(response_id)
|
||||
assert stored_response is not None
|
||||
if stored_response.status not in ("completed", "cancelled"):
|
||||
stored_response.status = "failed"
|
||||
|
||||
async def _run_background_request(
|
||||
self,
|
||||
request: ResponsesRequest,
|
||||
@@ -1226,19 +1216,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
param="response_id",
|
||||
)
|
||||
|
||||
def _make_store_not_supported_error(self) -> ErrorResponse:
|
||||
return self.create_error_response(
|
||||
err_type="invalid_request_error",
|
||||
message=(
|
||||
"`store=True` (default) is not supported. Please set "
|
||||
"`store=False` in Responses API or set "
|
||||
"`VLLM_ENABLE_RESPONSES_API_STORE=1` in the env var when "
|
||||
"starting the vLLM server."
|
||||
),
|
||||
status_code=HTTPStatus.BAD_REQUEST,
|
||||
param="store",
|
||||
)
|
||||
|
||||
async def _process_simple_streaming_events(
|
||||
self,
|
||||
request: ResponsesRequest,
|
||||
|
||||
Reference in New Issue
Block a user