[V1] [5/N] API Server: unify Detokenizer and EngineCore input (#11545)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
This commit is contained in:
Robert Shaw
2024-12-28 15:51:57 -05:00
committed by GitHub
parent 328841d002
commit 4fb8e329fd
6 changed files with 66 additions and 77 deletions

View File

@@ -152,15 +152,17 @@ class LLMEngine:
) -> None:
# 1) Process raw inputs into the request.
detokenizer_req, engine_core_req = self.processor.process_inputs(
request_id, prompt, params, arrival_time, lora_request,
trace_headers, prompt_adapter_request, priority)
request = self.processor.process_inputs(request_id, prompt, params,
arrival_time, lora_request,
trace_headers,
prompt_adapter_request,
priority)
# 2) Add the request to Detokenizer.
self.detokenizer.add_request(detokenizer_req)
self.detokenizer.add_request(request)
# 3) Add the request to EngineCore.
self.engine_core.add_request(engine_core_req)
self.engine_core.add_request(request)
def step(self) -> List[RequestOutput]: