[BugFix][Frontend] Use LoRA tokenizer in OpenAI APIs (#6227)

Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
Nick Hill
2024-07-18 00:13:30 -07:00
committed by GitHub
parent 8a74c68bd1
commit e2fbaee725
16 changed files with 267 additions and 186 deletions

View File

@@ -257,7 +257,8 @@ def run_server(args, llm_engine=None):
openai_serving_embedding = OpenAIServingEmbedding(engine, model_config,
served_model_names)
openai_serving_tokenization = OpenAIServingTokenization(
engine, model_config, served_model_names, args.chat_template)
engine, model_config, served_model_names, args.lora_modules,
args.chat_template)
app.root_path = args.root_path
logger.info("Available routes are:")