[Misc] IO Processor plugins for pooling models (#22820)

Signed-off-by: Christian Pinto <christian.pinto@ibm.com>
Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
Co-authored-by: Max de Bayser <mbayser@br.ibm.com>
This commit is contained in:
Christian Pinto
2025-09-01 07:07:12 +01:00
committed by GitHub
parent 437c3ce026
commit 1cb39dbcdd
25 changed files with 1183 additions and 43 deletions

View File

@@ -364,6 +364,7 @@ class EngineArgs:
disable_mm_preprocessor_cache: bool = False # DEPRECATED
mm_processor_cache_gb: float = MultiModalConfig.mm_processor_cache_gb
mm_encoder_tp_mode: MMEncoderTPMode = MultiModalConfig.mm_encoder_tp_mode
io_processor_plugin: Optional[str] = None
skip_mm_profiling: bool = MultiModalConfig.skip_mm_profiling
# LoRA fields
enable_lora: bool = False
@@ -577,6 +578,8 @@ class EngineArgs:
**model_kwargs["override_attention_dtype"])
model_group.add_argument("--logits-processors",
**model_kwargs["logits_processors"])
model_group.add_argument("--io-processor-plugin",
**model_kwargs["io_processor_plugin"])
# Model loading arguments
load_kwargs = get_kwargs(LoadConfig)
@@ -993,6 +996,7 @@ class EngineArgs:
model_impl=self.model_impl,
override_attention_dtype=self.override_attention_dtype,
logits_processors=self.logits_processors,
io_processor_plugin=self.io_processor_plugin,
)
def validate_tensorizer_args(self):