[Bugfix] Validate lora adapters to avoid crashing server (#11727)

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Joe Runde
2025-01-10 00:56:36 -07:00
committed by GitHub
parent cf5f000d21
commit ac2f3f7fee
15 changed files with 460 additions and 172 deletions

View File

@@ -662,7 +662,7 @@ def build_app(args: Namespace) -> FastAPI:
return app
def init_app_state(
async def init_app_state(
engine_client: EngineClient,
model_config: ModelConfig,
state: State,
@@ -690,12 +690,13 @@ def init_app_state(
logger.info("Using supplied chat template:\n%s", resolved_chat_template)
state.openai_serving_models = OpenAIServingModels(
engine_client=engine_client,
model_config=model_config,
base_model_paths=base_model_paths,
lora_modules=args.lora_modules,
prompt_adapters=args.prompt_adapters,
)
# TODO: The chat template is now broken for lora adapters :(
await state.openai_serving_models.init_static_loras()
state.openai_serving_chat = OpenAIServingChat(
engine_client,
model_config,
@@ -794,7 +795,7 @@ async def run_server(args, **uvicorn_kwargs) -> None:
app = build_app(args)
model_config = await engine_client.get_model_config()
init_app_state(engine_client, model_config, app.state, args)
await init_app_state(engine_client, model_config, app.state, args)
shutdown_task = await serve_http(
app,