[Misc] IO Processor plugins for pooling models (#22820)
Signed-off-by: Christian Pinto <christian.pinto@ibm.com> Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Co-authored-by: Max de Bayser <mbayser@br.ibm.com>
This commit is contained in:
@@ -64,6 +64,7 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
|
||||
EmbeddingRequest,
|
||||
EmbeddingResponse, ErrorInfo,
|
||||
ErrorResponse,
|
||||
IOProcessorResponse,
|
||||
LoadLoRAAdapterRequest,
|
||||
PoolingRequest, PoolingResponse,
|
||||
RerankRequest, RerankResponse,
|
||||
@@ -795,7 +796,7 @@ async def create_pooling(request: PoolingRequest, raw_request: Request):
|
||||
if isinstance(generator, ErrorResponse):
|
||||
return JSONResponse(content=generator.model_dump(),
|
||||
status_code=generator.error.code)
|
||||
elif isinstance(generator, PoolingResponse):
|
||||
elif isinstance(generator, (PoolingResponse, IOProcessorResponse)):
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
|
||||
assert_never(generator)
|
||||
@@ -1782,7 +1783,7 @@ async def init_app_state(
|
||||
) if "generate" in supported_tasks else None
|
||||
state.openai_serving_pooling = OpenAIServingPooling(
|
||||
engine_client,
|
||||
model_config,
|
||||
vllm_config,
|
||||
state.openai_serving_models,
|
||||
request_logger=request_logger,
|
||||
chat_template=resolved_chat_template,
|
||||
|
||||
Reference in New Issue
Block a user