[BugFix][Frontend] Use LoRA tokenizer in OpenAI APIs (#6227)
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
@@ -257,7 +257,8 @@ def run_server(args, llm_engine=None):
|
||||
openai_serving_embedding = OpenAIServingEmbedding(engine, model_config,
|
||||
served_model_names)
|
||||
openai_serving_tokenization = OpenAIServingTokenization(
|
||||
engine, model_config, served_model_names, args.chat_template)
|
||||
engine, model_config, served_model_names, args.lora_modules,
|
||||
args.chat_template)
|
||||
app.root_path = args.root_path
|
||||
|
||||
logger.info("Available routes are:")
|
||||
|
||||
Reference in New Issue
Block a user