[V1] Logits processors extensibility (#19912)
Signed-off-by: Andrew Feldman <afeldman@redhat.com> Signed-off-by: Andrew Feldman <afeld2012@gmail.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Nick Hill <nhill@redhat.com> Co-authored-by: Nick Hill <nhill@redhat.com> Co-authored-by: Andrew Feldman <afeld2012@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -62,6 +62,7 @@ if TYPE_CHECKING:
|
||||
QuantizationConfig)
|
||||
from vllm.model_executor.model_loader import LoadFormats
|
||||
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
|
||||
from vllm.v1.sample.logits_processor import LogitsProcessor
|
||||
|
||||
HfOverrides = Union[dict, Callable[[type], type]]
|
||||
else:
|
||||
@@ -72,6 +73,7 @@ else:
|
||||
BaseModelLoader = Any
|
||||
LoadFormats = Any
|
||||
TensorizerConfig = Any
|
||||
LogitsProcessor = Any
|
||||
HfOverrides = Union[dict[str, Any], Callable[[type], type]]
|
||||
|
||||
me_quant = LazyLoader("model_executor", globals(),
|
||||
@@ -465,6 +467,9 @@ class ModelConfig:
|
||||
- "transformers" will use the Transformers model implementation."""
|
||||
override_attention_dtype: Optional[str] = None
|
||||
"""Override dtype for attention"""
|
||||
logits_processors: Optional[list[Union[str, type[LogitsProcessor]]]] = None
|
||||
"""One or more logits processors' fully-qualified class names or class
|
||||
definitions"""
|
||||
|
||||
def compute_hash(self) -> str:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user