[Core] Support Lora lineage and base model metadata management (#6315)

This commit is contained in:
Jiaxin Shan
2024-09-19 23:20:56 -07:00
committed by GitHub
parent 9e5ec35b1f
commit 260d40b5ea
15 changed files with 337 additions and 45 deletions

View File

@@ -20,7 +20,8 @@ from vllm.entrypoints.openai.protocol import (CompletionLogProbs,
CompletionStreamResponse,
ErrorResponse, UsageInfo)
# yapf: enable
from vllm.entrypoints.openai.serving_engine import (LoRAModulePath,
from vllm.entrypoints.openai.serving_engine import (BaseModelPath,
LoRAModulePath,
OpenAIServing,
PromptAdapterPath)
from vllm.logger import init_logger
@@ -45,7 +46,7 @@ class OpenAIServingCompletion(OpenAIServing):
self,
engine_client: EngineClient,
model_config: ModelConfig,
served_model_names: List[str],
base_model_paths: List[BaseModelPath],
*,
lora_modules: Optional[List[LoRAModulePath]],
prompt_adapters: Optional[List[PromptAdapterPath]],
@@ -54,7 +55,7 @@ class OpenAIServingCompletion(OpenAIServing):
):
super().__init__(engine_client=engine_client,
model_config=model_config,
served_model_names=served_model_names,
base_model_paths=base_model_paths,
lora_modules=lora_modules,
prompt_adapters=prompt_adapters,
request_logger=request_logger,
@@ -89,7 +90,7 @@ class OpenAIServingCompletion(OpenAIServing):
return self.create_error_response(
"suffix is not currently supported")
model_name = self.served_model_names[0]
model_name = self.base_model_paths[0].name
request_id = f"cmpl-{random_uuid()}"
created_time = int(time.time())