Allow model to be served under multiple names (#2894)
Co-authored-by: Alexandre Payot <alexandrep@graphcore.ai>
This commit is contained in:
@@ -53,10 +53,10 @@ class OpenAIServingCompletion(OpenAIServing):
|
||||
|
||||
def __init__(self,
|
||||
engine: AsyncLLMEngine,
|
||||
served_model: str,
|
||||
served_model_names: List[str],
|
||||
lora_modules: Optional[List[LoRA]] = None):
|
||||
super().__init__(engine=engine,
|
||||
served_model=served_model,
|
||||
served_model_names=served_model_names,
|
||||
lora_modules=lora_modules)
|
||||
|
||||
async def create_completion(self, request: CompletionRequest,
|
||||
@@ -79,7 +79,7 @@ class OpenAIServingCompletion(OpenAIServing):
|
||||
return self.create_error_response(
|
||||
"suffix is not currently supported")
|
||||
|
||||
model_name = request.model
|
||||
model_name = self.served_model_names[0]
|
||||
request_id = f"cmpl-{random_uuid()}"
|
||||
created_time = int(time.time())
|
||||
|
||||
|
||||
Reference in New Issue
Block a user