Convert online APIs to use Renderer (#34084)
Signed-off-by: Reagan Lee <“reaganjlee@gmail.com”> Co-authored-by: Reagan Lee <“reaganjlee@gmail.com”>
This commit is contained in:
@@ -471,15 +471,31 @@ class OpenAISpeechToText(OpenAIServing):
|
||||
lora_request=lora_request,
|
||||
)
|
||||
|
||||
list_result_generator = [
|
||||
self.engine_client.generate(
|
||||
trace_headers = (
|
||||
None
|
||||
if raw_request is None
|
||||
else await self._get_trace_headers(raw_request.headers)
|
||||
)
|
||||
|
||||
list_result_generator = []
|
||||
for i, prompt in enumerate(prompts):
|
||||
request_id_item = f"{request_id}_{i}"
|
||||
engine_request = self.input_processor.process_inputs(
|
||||
request_id_item,
|
||||
prompt,
|
||||
sampling_params,
|
||||
f"{request_id}_{i}",
|
||||
lora_request=lora_request,
|
||||
trace_headers=trace_headers,
|
||||
priority=0,
|
||||
)
|
||||
list_result_generator.append(
|
||||
self.engine_client.generate(
|
||||
engine_request,
|
||||
sampling_params,
|
||||
request_id_item,
|
||||
lora_request=lora_request,
|
||||
)
|
||||
)
|
||||
for i, prompt in enumerate(prompts)
|
||||
]
|
||||
except ValueError as e:
|
||||
return self.create_error_response(e)
|
||||
|
||||
|
||||
@@ -99,8 +99,6 @@ class ServingTokens(OpenAIServing):
|
||||
if raw_request:
|
||||
raw_request.state.request_metadata = request_metadata
|
||||
|
||||
# TODO(NickLucche): Change to EngineCoreRequest once Renderer work is
|
||||
# completed
|
||||
engine_prompts = await self._preprocess_completion(
|
||||
request,
|
||||
prompt_input=request.token_ids,
|
||||
@@ -132,16 +130,26 @@ class ServingTokens(OpenAIServing):
|
||||
tok_params = request.build_tok_params(self.model_config)
|
||||
tokenization_kwargs = tok_params.get_encode_kwargs()
|
||||
|
||||
result_generator = self.engine_client.generate(
|
||||
engine_request = self.input_processor.process_inputs(
|
||||
request_id,
|
||||
engine_prompt,
|
||||
sampling_params,
|
||||
request_id,
|
||||
lora_request=lora_request,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
trace_headers=trace_headers,
|
||||
priority=request.priority,
|
||||
)
|
||||
|
||||
result_generator = self.engine_client.generate(
|
||||
engine_request,
|
||||
sampling_params,
|
||||
request_id,
|
||||
lora_request=lora_request,
|
||||
trace_headers=trace_headers,
|
||||
priority=request.priority,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
)
|
||||
|
||||
except ValueError as e:
|
||||
return self.create_error_response(str(e))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user