[V1] Logits processors extensibility (#19912)

Signed-off-by: Andrew Feldman <afeldman@redhat.com>
Signed-off-by: Andrew Feldman <afeld2012@gmail.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Signed-off-by: Nick Hill <nhill@redhat.com>
Co-authored-by: Nick Hill <nhill@redhat.com>
Co-authored-by: Andrew Feldman <afeld2012@gmail.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
afeldman-nm
2025-08-16 15:59:17 -04:00
committed by GitHub
parent 4fc722eca4
commit bf7f470b22
22 changed files with 1312 additions and 334 deletions

View File

@@ -62,6 +62,7 @@ if TYPE_CHECKING:
QuantizationConfig)
from vllm.model_executor.model_loader import LoadFormats
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
from vllm.v1.sample.logits_processor import LogitsProcessor
HfOverrides = Union[dict, Callable[[type], type]]
else:
@@ -72,6 +73,7 @@ else:
BaseModelLoader = Any
LoadFormats = Any
TensorizerConfig = Any
LogitsProcessor = Any
HfOverrides = Union[dict[str, Any], Callable[[type], type]]
me_quant = LazyLoader("model_executor", globals(),
@@ -465,6 +467,9 @@ class ModelConfig:
- "transformers" will use the Transformers model implementation."""
override_attention_dtype: Optional[str] = None
"""Override dtype for attention"""
logits_processors: Optional[list[Union[str, type[LogitsProcessor]]]] = None
"""One or more logits processors' fully-qualified class names or class
definitions"""
def compute_hash(self) -> str:
"""