[V1] Logits processors extensibility (#19912)

Signed-off-by: Andrew Feldman <afeldman@redhat.com> Signed-off-by: Andrew Feldman <afeld2012@gmail.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Nick Hill <nhill@redhat.com> Co-authored-by: Nick Hill <nhill@redhat.com> Co-authored-by: Andrew Feldman <afeld2012@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-08-16 15:59:17 -04:00
parent 4fc722eca4
commit bf7f470b22
22 changed files with 1312 additions and 334 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -43,6 +43,7 @@ from vllm.transformers_utils.config import is_interleaved
 from vllm.transformers_utils.utils import check_gguf_file
 from vllm.utils import (STR_DUAL_CHUNK_FLASH_ATTN_VAL, FlexibleArgumentParser,
                        GiB_bytes, get_ip, is_in_ray_actor)
+from vllm.v1.sample.logits_processor import LogitsProcessor

 # yapf: enable

@@ -435,6 +436,10 @@ class EngineArgs:
    enable_multimodal_encoder_data_parallel: bool = \
        ParallelConfig.enable_multimodal_encoder_data_parallel

+    logits_processors: Optional[list[Union[
+        str, type[LogitsProcessor]]]] = ModelConfig.logits_processors
+    """Custom logitproc types"""
+
    async_scheduling: bool = SchedulerConfig.async_scheduling
    # DEPRECATED
    enable_prompt_adapter: bool = False
@@ -549,6 +554,8 @@ class EngineArgs:
                                 **model_kwargs["model_impl"])
        model_group.add_argument("--override-attention-dtype",
                                 **model_kwargs["override_attention_dtype"])
+        model_group.add_argument("--logits-processors",
+                                 **model_kwargs["logits_processors"])

        # Model loading arguments
        load_kwargs = get_kwargs(LoadConfig)
@@ -940,6 +947,7 @@ class EngineArgs:
            enable_sleep_mode=self.enable_sleep_mode,
            model_impl=self.model_impl,
            override_attention_dtype=self.override_attention_dtype,
+            logits_processors=self.logits_processors,
        )

    def validate_tensorizer_args(self):