[Misc] IO Processor plugins for pooling models (#22820)

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
Co-authored-by: Max de Bayser <mbayser@br.ibm.com>
This commit is contained in:
Christian Pinto
2025-09-01 07:07:12 +01:00
committed by GitHub
parent 437c3ce026
commit 1cb39dbcdd
25 changed files with 1183 additions and 43 deletions

View File

@@ -15,6 +15,7 @@ from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.model_executor.layers.sampler import SamplerOutput
from vllm.outputs import CompletionOutput, PoolingRequestOutput, RequestOutput
from vllm.plugins.io_processors.interface import IOProcessor
from vllm.pooling_params import PoolingParams
from vllm.sampling_params import BeamSearchParams, SamplingParams
from vllm.transformers_utils.tokenizer import AnyTokenizer
@@ -267,6 +268,9 @@ class EngineClient(ABC):
"""Get the appropriate tokenizer for the request"""
...
async def get_io_processor(self) -> IOProcessor:
raise NotImplementedError
@abstractmethod
async def is_tracing_enabled(self) -> bool:
...