[Misc] IO Processor plugins for pooling models (#22820)

Signed-off-by: Christian Pinto <christian.pinto@ibm.com> Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Co-authored-by: Max de Bayser <mbayser@br.ibm.com>
2025-09-01 07:07:12 +01:00
parent 437c3ce026
commit 1cb39dbcdd
25 changed files with 1183 additions and 43 deletions
--- a/vllm/engine/protocol.py
+++ b/vllm/engine/protocol.py
@@ -15,6 +15,7 @@ from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
 from vllm.model_executor.layers.sampler import SamplerOutput
 from vllm.outputs import CompletionOutput, PoolingRequestOutput, RequestOutput
+from vllm.plugins.io_processors.interface import IOProcessor
 from vllm.pooling_params import PoolingParams
 from vllm.sampling_params import BeamSearchParams, SamplingParams
 from vllm.transformers_utils.tokenizer import AnyTokenizer
@@ -267,6 +268,9 @@ class EngineClient(ABC):
        """Get the appropriate tokenizer for the request"""
        ...

+    async def get_io_processor(self) -> IOProcessor:
+        raise NotImplementedError
+
    @abstractmethod
    async def is_tracing_enabled(self) -> bool:
        ...