[Core] Set pooling params based on task and model (#21128)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-07-18 20:41:17 +08:00
committed by GitHub
parent 4adc66f64d
commit 45badd05d0
24 changed files with 509 additions and 241 deletions

View File

@@ -142,6 +142,11 @@ class OpenAIServingPooling(OpenAIServing):
try:
pooling_params = request.to_pooling_params()
try:
pooling_params.verify("encode", self.model_config)
except ValueError as e:
return self.create_error_response(str(e))
for i, engine_prompt in enumerate(engine_prompts):
request_id_item = f"{request_id}-{i}"