[Model]: Add transformers backend support (#11330)
# Adds support for `transformers` as a backend Following https://github.com/huggingface/transformers/pull/35235, a bunch of models should already be supported, we are ramping up support for more models. Thanks @Isotr0py for the TP support, and @hmellor for his help as well! This includes: - `trust_remote_code=True` support: any model on the hub, if it implements attention the correct way can be natively supported!! - tensor parallel support --------- Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <41363108+Isotr0py@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Isotr0py <2037008807@qq.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
@@ -83,6 +83,12 @@ class SupportsHash(Protocol):
|
||||
...
|
||||
|
||||
|
||||
class ModelImpl(str, enum.Enum):
|
||||
AUTO = "auto"
|
||||
VLLM = "vllm"
|
||||
TRANSFORMERS = "transformers"
|
||||
|
||||
|
||||
class ModelConfig:
|
||||
"""Configuration for the model.
|
||||
|
||||
@@ -167,6 +173,12 @@ class ModelConfig:
|
||||
`logits_processors` extra completion argument. Defaults to None,
|
||||
which allows no processors.
|
||||
generation_config: Configuration parameter file for generation.
|
||||
model_impl: Which implementation of the model to use:
|
||||
"auto" will try to use the vLLM implementation if it exists and
|
||||
fall back to the Transformers implementation if no vLLM
|
||||
implementation is available.
|
||||
"vllm" will use the vLLM model implementation.
|
||||
"transformers" will use the Transformers model implementation.
|
||||
override_generation_config: Override the generation config with the
|
||||
given config.
|
||||
"""
|
||||
@@ -230,6 +242,7 @@ class ModelConfig:
|
||||
generation_config: Optional[str] = None,
|
||||
enable_sleep_mode: bool = False,
|
||||
override_generation_config: Optional[Dict[str, Any]] = None,
|
||||
model_impl: Union[str, ModelImpl] = ModelImpl.AUTO,
|
||||
) -> None:
|
||||
self.model = model
|
||||
self.tokenizer = tokenizer
|
||||
@@ -241,6 +254,7 @@ class ModelConfig:
|
||||
self.code_revision = code_revision
|
||||
self.rope_scaling = rope_scaling
|
||||
self.rope_theta = rope_theta
|
||||
self.model_impl = model_impl
|
||||
|
||||
if hf_overrides is None:
|
||||
hf_overrides = {}
|
||||
|
||||
Reference in New Issue
Block a user