[V1] Logits processors extensibility (#19912)

Signed-off-by: Andrew Feldman <afeldman@redhat.com> Signed-off-by: Andrew Feldman <afeld2012@gmail.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Nick Hill <nhill@redhat.com> Co-authored-by: Nick Hill <nhill@redhat.com> Co-authored-by: Andrew Feldman <afeld2012@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-08-16 15:59:17 -04:00
parent 4fc722eca4
commit bf7f470b22
22 changed files with 1312 additions and 334 deletions
--- a/vllm/config/init.py
+++ b/vllm/config/init.py
@@ -62,6 +62,7 @@ if TYPE_CHECKING:
        QuantizationConfig)
    from vllm.model_executor.model_loader import LoadFormats
    from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
+    from vllm.v1.sample.logits_processor import LogitsProcessor

    HfOverrides = Union[dict, Callable[[type], type]]
 else:
@@ -72,6 +73,7 @@ else:
    BaseModelLoader = Any
    LoadFormats = Any
    TensorizerConfig = Any
+    LogitsProcessor = Any
    HfOverrides = Union[dict[str, Any], Callable[[type], type]]

    me_quant = LazyLoader("model_executor", globals(),
@@ -465,6 +467,9 @@ class ModelConfig:
    - "transformers" will use the Transformers model implementation."""
    override_attention_dtype: Optional[str] = None
    """Override dtype for attention"""
+    logits_processors: Optional[list[Union[str, type[LogitsProcessor]]]] = None
+    """One or more logits processors' fully-qualified class names or class
+    definitions"""

    def compute_hash(self) -> str:
        """