[Performance] V1 Classify Models E2E Performance Optimization (#23541)

Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
wang.yuqi
2025-08-29 18:12:32 +08:00
committed by GitHub
parent ad39106b16
commit d9e00dbd1f
5 changed files with 81 additions and 38 deletions

View File

@@ -1805,17 +1805,13 @@ async def init_app_state(
request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if "classify" in supported_tasks else None
enable_serving_reranking = ("classify" in supported_tasks and getattr(
model_config.hf_config, "num_labels", 0) == 1)
state.openai_serving_scores = ServingScores(
engine_client,
model_config,
state.openai_serving_models,
request_logger=request_logger,
log_error_stack=args.log_error_stack,
) if ("embed" in supported_tasks or enable_serving_reranking) else None
) if ("embed" in supported_tasks or "score" in supported_tasks) else None
state.openai_serving_tokenization = OpenAIServingTokenization(
engine_client,
model_config,