[Core] Consolidate prompt arguments to LLM engines (#4328)

Co-authored-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
Cyrus Leung
2024-05-29 04:29:31 +08:00
committed by GitHub
parent 290f4ada2b
commit 5ae5ed1e60
43 changed files with 1407 additions and 442 deletions

View File

@@ -119,12 +119,17 @@ class OpenAIServingCompletion(OpenAIServing):
truncate_prompt_tokens)
prompt_ids, prompt_text = prompt_formats
generators.append(
self.engine.generate(prompt_text,
sampling_params,
f"{request_id}-{i}",
prompt_token_ids=prompt_ids,
lora_request=lora_request))
generator = self.engine.generate(
{
"prompt": prompt_text,
"prompt_token_ids": prompt_ids
},
sampling_params,
f"{request_id}-{i}",
lora_request=lora_request,
)
generators.append(generator)
except ValueError as e:
# TODO: Use a vllm-specific Validation Error
return self.create_error_response(str(e))