[Core] Remove prompt string from engine core data structures (#17214)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@@ -217,14 +217,12 @@ class AsyncLLM(EngineClient):
|
||||
queue = RequestOutputCollector(output_kind=params.output_kind)
|
||||
|
||||
# Convert Input --> Request.
|
||||
request = self.processor.process_inputs(request_id, prompt, params,
|
||||
arrival_time, lora_request,
|
||||
trace_headers,
|
||||
prompt_adapter_request,
|
||||
priority)
|
||||
prompt_str, request = self.processor.process_inputs(
|
||||
request_id, prompt, params, arrival_time, lora_request,
|
||||
trace_headers, prompt_adapter_request, priority)
|
||||
|
||||
if params.n == 1:
|
||||
await self._add_request(request, None, 0, queue)
|
||||
await self._add_request(request, prompt_str, None, 0, queue)
|
||||
return queue
|
||||
|
||||
# Fan out child requests (for n>1).
|
||||
@@ -234,15 +232,18 @@ class AsyncLLM(EngineClient):
|
||||
child_request = request if idx == params.n - 1 else copy(request)
|
||||
child_request.request_id = request_id
|
||||
child_request.sampling_params = params
|
||||
await self._add_request(child_request, parent_request, idx, queue)
|
||||
await self._add_request(child_request, prompt_str, parent_request,
|
||||
idx, queue)
|
||||
return queue
|
||||
|
||||
async def _add_request(self, request: EngineCoreRequest,
|
||||
prompt: Optional[str],
|
||||
parent_req: Optional[ParentRequest], index: int,
|
||||
queue: RequestOutputCollector):
|
||||
|
||||
# Add the request to OutputProcessor (this process).
|
||||
self.output_processor.add_request(request, parent_req, index, queue)
|
||||
self.output_processor.add_request(request, prompt, parent_req, index,
|
||||
queue)
|
||||
|
||||
# Add the EngineCoreRequest to EngineCore (separate process).
|
||||
await self.engine_core.add_request_async(request)
|
||||
|
||||
Reference in New Issue
Block a user