Convert online APIs to use Renderer (#34084)

Signed-off-by: Reagan Lee <“reaganjlee@gmail.com”>
Co-authored-by: Reagan Lee <“reaganjlee@gmail.com”>
This commit is contained in:
Reagan Lee
2026-02-10 11:44:31 -08:00
committed by GitHub
parent 33bcd3dc3b
commit fdd6f2ad58
2 changed files with 33 additions and 9 deletions

View File

@@ -471,15 +471,31 @@ class OpenAISpeechToText(OpenAIServing):
lora_request=lora_request,
)
list_result_generator = [
self.engine_client.generate(
trace_headers = (
None
if raw_request is None
else await self._get_trace_headers(raw_request.headers)
)
list_result_generator = []
for i, prompt in enumerate(prompts):
request_id_item = f"{request_id}_{i}"
engine_request = self.input_processor.process_inputs(
request_id_item,
prompt,
sampling_params,
f"{request_id}_{i}",
lora_request=lora_request,
trace_headers=trace_headers,
priority=0,
)
list_result_generator.append(
self.engine_client.generate(
engine_request,
sampling_params,
request_id_item,
lora_request=lora_request,
)
)
for i, prompt in enumerate(prompts)
]
except ValueError as e:
return self.create_error_response(e)

View File

@@ -99,8 +99,6 @@ class ServingTokens(OpenAIServing):
if raw_request:
raw_request.state.request_metadata = request_metadata
# TODO(NickLucche): Change to EngineCoreRequest once Renderer work is
# completed
engine_prompts = await self._preprocess_completion(
request,
prompt_input=request.token_ids,
@@ -132,16 +130,26 @@ class ServingTokens(OpenAIServing):
tok_params = request.build_tok_params(self.model_config)
tokenization_kwargs = tok_params.get_encode_kwargs()
result_generator = self.engine_client.generate(
engine_request = self.input_processor.process_inputs(
request_id,
engine_prompt,
sampling_params,
request_id,
lora_request=lora_request,
tokenization_kwargs=tokenization_kwargs,
trace_headers=trace_headers,
priority=request.priority,
)
result_generator = self.engine_client.generate(
engine_request,
sampling_params,
request_id,
lora_request=lora_request,
trace_headers=trace_headers,
priority=request.priority,
tokenization_kwargs=tokenization_kwargs,
)
except ValueError as e:
return self.create_error_response(str(e))