[Fix] Add chat completion Example and simplify dependencies (#576)

This commit is contained in:
Zhuohan Li
2023-07-25 23:45:48 -07:00
committed by GitHub
parent df5dd3c68e
commit 82ad323dee
4 changed files with 52 additions and 11 deletions

View File

@@ -13,9 +13,6 @@ from fastapi import BackgroundTasks, Request
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse
from fastchat.conversation import Conversation, SeparatorStyle
from fastchat.model.model_adapter import get_conversation_template
import uvicorn
from vllm.engine.arg_utils import AsyncEngineArgs
@@ -33,6 +30,13 @@ from vllm.sampling_params import SamplingParams
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.utils import random_uuid
try:
from fastchat.conversation import Conversation, SeparatorStyle
from fastchat.model.model_adapter import get_conversation_template
_fastchat_available = True
except ImportError:
_fastchat_available = False
TIMEOUT_KEEP_ALIVE = 5 # seconds
logger = init_logger(__name__)
@@ -63,6 +67,11 @@ async def check_model(request) -> Optional[JSONResponse]:
async def get_gen_prompt(request) -> str:
if not _fastchat_available:
raise ModuleNotFoundError(
"fastchat is not installed. Please install fastchat to use "
"the chat completion and conversation APIs: `$ pip install fschat`"
)
conv = get_conversation_template(request.model)
conv = Conversation(
name=conv.name,