[Core] Consolidate prompt arguments to LLM engines (#4328)

Co-authored-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
Cyrus Leung
2024-05-29 04:29:31 +08:00
committed by GitHub
parent 290f4ada2b
commit 5ae5ed1e60
43 changed files with 1407 additions and 442 deletions

View File

@@ -143,7 +143,8 @@ class OpenAIServing:
return json_str
async def _check_model(
self, request: Union[CompletionRequest, ChatCompletionRequest]
self, request: Union[CompletionRequest, ChatCompletionRequest,
EmbeddingRequest]
) -> Optional[ErrorResponse]:
if request.model in self.served_model_names:
return None
@@ -155,7 +156,8 @@ class OpenAIServing:
status_code=HTTPStatus.NOT_FOUND)
def _maybe_get_lora(
self, request: Union[CompletionRequest, ChatCompletionRequest]
self, request: Union[CompletionRequest, ChatCompletionRequest,
EmbeddingRequest]
) -> Optional[LoRARequest]:
if request.model in self.served_model_names:
return None