[Feature] OpenAI-Compatible Tools API + Streaming for Hermes & Mistral models (#5649)

Co-authored-by: constellate <constellate@1-ai-appserver-staging.codereach.com>
Co-authored-by: Kyle Mistele <kyle@constellate.ai>
This commit is contained in:
Kyle Mistele
2024-09-04 15:18:13 -05:00
committed by GitHub
parent 561d6f8077
commit e02ce498be
26 changed files with 2591 additions and 86 deletions

View File

@@ -233,7 +233,7 @@ def mount_metrics(app: FastAPI):
metrics_route = Mount("/metrics", make_asgi_app())
# Workaround for 307 Redirect for /metrics
metrics_route.path_regex = re.compile('^/metrics(?P<path>.*)$')
metrics_route.path_regex = re.compile("^/metrics(?P<path>.*)$")
app.routes.append(metrics_route)
@@ -283,11 +283,14 @@ async def show_version():
@router.post("/v1/chat/completions")
async def create_chat_completion(request: ChatCompletionRequest,
raw_request: Request):
generator = await openai_serving_chat.create_chat_completion(
request, raw_request)
if isinstance(generator, ErrorResponse):
return JSONResponse(content=generator.model_dump(),
status_code=generator.code)
elif isinstance(generator, ChatCompletionResponse):
return JSONResponse(content=generator.model_dump())
@@ -422,7 +425,8 @@ async def init_app(
request_logger=request_logger,
chat_template=args.chat_template,
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
)
enable_auto_tools=args.enable_auto_tool_choice,
tool_parser=args.tool_call_parser)
openai_serving_completion = OpenAIServingCompletion(
async_engine_client,
model_config,