[Core] Support Lora lineage and base model metadata management (#6315)

This commit is contained in:
Jiaxin Shan
2024-09-19 23:20:56 -07:00
committed by GitHub
parent 9e5ec35b1f
commit 260d40b5ea
15 changed files with 337 additions and 45 deletions

View File

@@ -7,10 +7,12 @@ from vllm.config import MultiModalConfig
from vllm.engine.multiprocessing.client import MQLLMEngineClient
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_engine import BaseModelPath
from vllm.transformers_utils.tokenizer import get_tokenizer
MODEL_NAME = "openai-community/gpt2"
CHAT_TEMPLATE = "Dummy chat template for testing {}"
BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)]
@dataclass
@@ -37,7 +39,7 @@ async def _async_serving_chat_init():
serving_completion = OpenAIServingChat(engine,
model_config,
served_model_names=[MODEL_NAME],
BASE_MODEL_PATHS,
response_role="assistant",
chat_template=CHAT_TEMPLATE,
lora_modules=None,
@@ -58,7 +60,7 @@ def test_serving_chat_should_set_correct_max_tokens():
serving_chat = OpenAIServingChat(mock_engine,
MockModelConfig(),
served_model_names=[MODEL_NAME],
BASE_MODEL_PATHS,
response_role="assistant",
chat_template=CHAT_TEMPLATE,
lora_modules=None,