[Core] Consolidate prompt arguments to LLM engines (#4328)
Co-authored-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
@@ -86,20 +86,18 @@ def generate(
|
||||
|
||||
|
||||
def batched_generate(
|
||||
llm,
|
||||
llm: vllm.LLM,
|
||||
inputs: List[Tuple[str, SamplingParams, Optional[LoRARequest]]],
|
||||
):
|
||||
for input in inputs:
|
||||
prompt, sampling_param, lora_req = input
|
||||
requests_data = llm._validate_and_prepare_requests(
|
||||
# Add requests to the engine and run the engine
|
||||
llm._validate_and_add_requests(
|
||||
prompt,
|
||||
sampling_param,
|
||||
lora_request=lora_req,
|
||||
)
|
||||
|
||||
# Add requests to the engine and run the engine
|
||||
for request_data in requests_data:
|
||||
llm._add_request(**request_data)
|
||||
outputs = llm._run_engine(use_tqdm=True)
|
||||
return [outputs[i].outputs[0].text.strip() for i in range(len(outputs))]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user