[Core] Remove prompt string from engine core data structures (#17214)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill
2025-04-25 23:41:05 -07:00
committed by GitHub
parent 513f074766
commit df6f3ce883
21 changed files with 40 additions and 76 deletions

View File

@@ -217,14 +217,12 @@ class AsyncLLM(EngineClient):
queue = RequestOutputCollector(output_kind=params.output_kind)
# Convert Input --> Request.
request = self.processor.process_inputs(request_id, prompt, params,
arrival_time, lora_request,
trace_headers,
prompt_adapter_request,
priority)
prompt_str, request = self.processor.process_inputs(
request_id, prompt, params, arrival_time, lora_request,
trace_headers, prompt_adapter_request, priority)
if params.n == 1:
await self._add_request(request, None, 0, queue)
await self._add_request(request, prompt_str, None, 0, queue)
return queue
# Fan out child requests (for n>1).
@@ -234,15 +232,18 @@ class AsyncLLM(EngineClient):
child_request = request if idx == params.n - 1 else copy(request)
child_request.request_id = request_id
child_request.sampling_params = params
await self._add_request(child_request, parent_request, idx, queue)
await self._add_request(child_request, prompt_str, parent_request,
idx, queue)
return queue
async def _add_request(self, request: EngineCoreRequest,
prompt: Optional[str],
parent_req: Optional[ParentRequest], index: int,
queue: RequestOutputCollector):
# Add the request to OutputProcessor (this process).
self.output_processor.add_request(request, parent_req, index, queue)
self.output_processor.add_request(request, prompt, parent_req, index,
queue)
# Add the EngineCoreRequest to EngineCore (separate process).
await self.engine_core.add_request_async(request)