[Deprecation] Remove deprecated task, seed and MM settings (#30397)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -73,17 +73,6 @@ logger = init_logger(__name__)
|
||||
RunnerOption = Literal["auto", RunnerType]
|
||||
ConvertType = Literal["none", "embed", "classify", "reward"]
|
||||
ConvertOption = Literal["auto", ConvertType]
|
||||
TaskOption = Literal[
|
||||
"auto",
|
||||
"generate",
|
||||
"embedding",
|
||||
"embed",
|
||||
"classify",
|
||||
"score",
|
||||
"reward",
|
||||
"transcription",
|
||||
"draft",
|
||||
]
|
||||
TokenizerMode = Literal["auto", "hf", "slow", "mistral", "deepseek_v32"]
|
||||
ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"]
|
||||
LogprobsMode = Literal[
|
||||
@@ -93,12 +82,6 @@ HfOverrides = dict[str, Any] | Callable[[PretrainedConfig], PretrainedConfig]
|
||||
ModelImpl = Literal["auto", "vllm", "transformers", "terratorch"]
|
||||
LayerBlockType = Literal["attention", "linear_attention", "mamba"]
|
||||
|
||||
_RUNNER_TASKS: dict[RunnerType, list[TaskOption]] = {
|
||||
"generate": ["generate", "transcription"],
|
||||
"pooling": ["embedding", "embed", "classify", "score", "reward"],
|
||||
"draft": ["draft"],
|
||||
}
|
||||
|
||||
_RUNNER_CONVERTS: dict[RunnerType, list[ConvertType]] = {
|
||||
"generate": [],
|
||||
"pooling": ["embed", "classify", "reward"],
|
||||
@@ -126,12 +109,6 @@ class ModelConfig:
|
||||
"""Convert the model using adapters defined in
|
||||
[vllm.model_executor.models.adapters][]. The most common use case is to
|
||||
adapt a text generation model to be used for pooling tasks."""
|
||||
task: TaskOption | None = None
|
||||
"""[DEPRECATED] The task to use the model for. If the model supports more
|
||||
than one model runner, this is used to select which model runner to run.
|
||||
|
||||
Note that the model may support other tasks using the same model runner.
|
||||
"""
|
||||
tokenizer: SkipValidation[str] = None # type: ignore
|
||||
"""Name or path of the Hugging Face tokenizer to use. If unspecified, model
|
||||
name or path will be used."""
|
||||
@@ -335,7 +312,6 @@ class ModelConfig:
|
||||
ignored_factors = {
|
||||
"runner",
|
||||
"convert",
|
||||
"task",
|
||||
"tokenizer",
|
||||
"tokenizer_mode",
|
||||
"seed",
|
||||
@@ -510,97 +486,6 @@ class ModelConfig:
|
||||
is_generative_model = registry.is_text_generation_model(architectures, self)
|
||||
is_pooling_model = registry.is_pooling_model(architectures, self)
|
||||
|
||||
def _task_to_convert(task: TaskOption) -> ConvertType:
|
||||
if task == "embedding" or task == "embed":
|
||||
return "embed"
|
||||
if task == "classify":
|
||||
return "classify"
|
||||
if task == "reward":
|
||||
logger.warning(
|
||||
"Pooling models now default support all pooling; "
|
||||
"you can use it without any settings."
|
||||
)
|
||||
return "embed"
|
||||
if task == "score":
|
||||
new_task = self._get_default_pooling_task(architectures)
|
||||
return "classify" if new_task == "classify" else "embed"
|
||||
|
||||
return "none"
|
||||
|
||||
if self.task is not None:
|
||||
runner: RunnerOption = "auto"
|
||||
convert: ConvertOption = "auto"
|
||||
msg_prefix = (
|
||||
"The 'task' option has been deprecated and will be "
|
||||
"removed in v0.13.0 or v1.0, whichever comes first."
|
||||
)
|
||||
msg_hint = "Please remove this option."
|
||||
|
||||
is_generative_task = self.task in _RUNNER_TASKS["generate"]
|
||||
is_pooling_task = self.task in _RUNNER_TASKS["pooling"]
|
||||
|
||||
if is_generative_model and is_pooling_model:
|
||||
if is_generative_task:
|
||||
runner = "generate"
|
||||
convert = "auto"
|
||||
msg_hint = (
|
||||
"Please replace this option with `--runner "
|
||||
"generate` to continue using this model "
|
||||
"as a generative model."
|
||||
)
|
||||
elif is_pooling_task:
|
||||
runner = "pooling"
|
||||
convert = "auto"
|
||||
msg_hint = (
|
||||
"Please replace this option with `--runner "
|
||||
"pooling` to continue using this model "
|
||||
"as a pooling model."
|
||||
)
|
||||
else: # task == "auto"
|
||||
pass
|
||||
elif is_generative_model or is_pooling_model:
|
||||
if is_generative_task:
|
||||
runner = "generate"
|
||||
convert = "auto"
|
||||
msg_hint = "Please remove this option"
|
||||
elif is_pooling_task:
|
||||
runner = "pooling"
|
||||
convert = _task_to_convert(self.task)
|
||||
msg_hint = (
|
||||
"Please replace this option with `--convert "
|
||||
f"{convert}` to continue using this model "
|
||||
"as a pooling model."
|
||||
)
|
||||
else: # task == "auto"
|
||||
pass
|
||||
else:
|
||||
# Neither generative nor pooling model - try to convert if possible
|
||||
if is_pooling_task:
|
||||
runner = "pooling"
|
||||
convert = _task_to_convert(self.task)
|
||||
msg_hint = (
|
||||
"Please replace this option with `--runner pooling "
|
||||
f"--convert {convert}` to continue using this model "
|
||||
"as a pooling model."
|
||||
)
|
||||
else:
|
||||
debug_info = {
|
||||
"architectures": architectures,
|
||||
"is_generative_model": is_generative_model,
|
||||
"is_pooling_model": is_pooling_model,
|
||||
}
|
||||
raise AssertionError(
|
||||
"The model should be a generative or "
|
||||
"pooling model when task is set to "
|
||||
f"{self.task!r}. Found: {debug_info}"
|
||||
)
|
||||
|
||||
self.runner = runner
|
||||
self.convert = convert
|
||||
|
||||
msg = f"{msg_prefix} {msg_hint}"
|
||||
warnings.warn(msg, DeprecationWarning, stacklevel=2)
|
||||
|
||||
self.runner_type = self._get_runner_type(architectures, self.runner)
|
||||
self.convert_type = self._get_convert_type(
|
||||
architectures, self.runner_type, self.convert
|
||||
@@ -918,22 +803,6 @@ class ModelConfig:
|
||||
|
||||
return convert_type
|
||||
|
||||
def _get_default_pooling_task(
|
||||
self,
|
||||
architectures: list[str],
|
||||
) -> Literal["embed", "classify", "reward"]:
|
||||
if self.registry.is_cross_encoder_model(architectures, self):
|
||||
return "classify"
|
||||
|
||||
for arch in architectures:
|
||||
match = try_match_architecture_defaults(arch, runner_type="pooling")
|
||||
if match:
|
||||
_, (_, convert_type) = match
|
||||
assert convert_type != "none"
|
||||
return convert_type
|
||||
|
||||
return "embed"
|
||||
|
||||
def _parse_quant_hf_config(self, hf_config: PretrainedConfig):
|
||||
quant_cfg = getattr(hf_config, "quantization_config", None)
|
||||
if quant_cfg is None:
|
||||
|
||||
Reference in New Issue
Block a user