[Core] Support Lora lineage and base model metadata management (#6315)
This commit is contained in:
@@ -7,10 +7,12 @@ from vllm.config import MultiModalConfig
|
||||
from vllm.engine.multiprocessing.client import MQLLMEngineClient
|
||||
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
|
||||
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
|
||||
from vllm.entrypoints.openai.serving_engine import BaseModelPath
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
MODEL_NAME = "openai-community/gpt2"
|
||||
CHAT_TEMPLATE = "Dummy chat template for testing {}"
|
||||
BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)]
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -37,7 +39,7 @@ async def _async_serving_chat_init():
|
||||
|
||||
serving_completion = OpenAIServingChat(engine,
|
||||
model_config,
|
||||
served_model_names=[MODEL_NAME],
|
||||
BASE_MODEL_PATHS,
|
||||
response_role="assistant",
|
||||
chat_template=CHAT_TEMPLATE,
|
||||
lora_modules=None,
|
||||
@@ -58,7 +60,7 @@ def test_serving_chat_should_set_correct_max_tokens():
|
||||
|
||||
serving_chat = OpenAIServingChat(mock_engine,
|
||||
MockModelConfig(),
|
||||
served_model_names=[MODEL_NAME],
|
||||
BASE_MODEL_PATHS,
|
||||
response_role="assistant",
|
||||
chat_template=CHAT_TEMPLATE,
|
||||
lora_modules=None,
|
||||
|
||||
Reference in New Issue
Block a user