[Frontend] split run_server into build_server and run_server (#6740)

This commit is contained in:
Daniele
2024-07-24 19:36:04 +02:00
committed by GitHub
parent 40468b13fa
commit ee812580f7
2 changed files with 52 additions and 28 deletions

View File

@@ -1,5 +1,6 @@
# The CLI entrypoint to vLLM.
import argparse
import asyncio
import os
import signal
import sys
@@ -25,7 +26,7 @@ def serve(args: argparse.Namespace) -> None:
# EngineArgs expects the model name to be passed as --model.
args.model = args.model_tag
run_server(args)
asyncio.run(run_server(args))
def interactive_cli(args: argparse.Namespace) -> None: