[Deprecation] Remove deprecated task, seed and MM settings (#30397)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-12-11 11:59:39 +08:00
committed by GitHub
parent 5a87d8b9b1
commit 7e24e5d4d6
16 changed files with 25 additions and 276 deletions

View File

@@ -73,17 +73,6 @@ logger = init_logger(__name__)
RunnerOption = Literal["auto", RunnerType]
ConvertType = Literal["none", "embed", "classify", "reward"]
ConvertOption = Literal["auto", ConvertType]
TaskOption = Literal[
"auto",
"generate",
"embedding",
"embed",
"classify",
"score",
"reward",
"transcription",
"draft",
]
TokenizerMode = Literal["auto", "hf", "slow", "mistral", "deepseek_v32"]
ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"]
LogprobsMode = Literal[
@@ -93,12 +82,6 @@ HfOverrides = dict[str, Any] | Callable[[PretrainedConfig], PretrainedConfig]
ModelImpl = Literal["auto", "vllm", "transformers", "terratorch"]
LayerBlockType = Literal["attention", "linear_attention", "mamba"]
_RUNNER_TASKS: dict[RunnerType, list[TaskOption]] = {
"generate": ["generate", "transcription"],
"pooling": ["embedding", "embed", "classify", "score", "reward"],
"draft": ["draft"],
}
_RUNNER_CONVERTS: dict[RunnerType, list[ConvertType]] = {
"generate": [],
"pooling": ["embed", "classify", "reward"],
@@ -126,12 +109,6 @@ class ModelConfig:
"""Convert the model using adapters defined in
[vllm.model_executor.models.adapters][]. The most common use case is to
adapt a text generation model to be used for pooling tasks."""
task: TaskOption | None = None
"""[DEPRECATED] The task to use the model for. If the model supports more
than one model runner, this is used to select which model runner to run.
Note that the model may support other tasks using the same model runner.
"""
tokenizer: SkipValidation[str] = None # type: ignore
"""Name or path of the Hugging Face tokenizer to use. If unspecified, model
name or path will be used."""
@@ -335,7 +312,6 @@ class ModelConfig:
ignored_factors = {
"runner",
"convert",
"task",
"tokenizer",
"tokenizer_mode",
"seed",
@@ -510,97 +486,6 @@ class ModelConfig:
is_generative_model = registry.is_text_generation_model(architectures, self)
is_pooling_model = registry.is_pooling_model(architectures, self)
def _task_to_convert(task: TaskOption) -> ConvertType:
if task == "embedding" or task == "embed":
return "embed"
if task == "classify":
return "classify"
if task == "reward":
logger.warning(
"Pooling models now default support all pooling; "
"you can use it without any settings."
)
return "embed"
if task == "score":
new_task = self._get_default_pooling_task(architectures)
return "classify" if new_task == "classify" else "embed"
return "none"
if self.task is not None:
runner: RunnerOption = "auto"
convert: ConvertOption = "auto"
msg_prefix = (
"The 'task' option has been deprecated and will be "
"removed in v0.13.0 or v1.0, whichever comes first."
)
msg_hint = "Please remove this option."
is_generative_task = self.task in _RUNNER_TASKS["generate"]
is_pooling_task = self.task in _RUNNER_TASKS["pooling"]
if is_generative_model and is_pooling_model:
if is_generative_task:
runner = "generate"
convert = "auto"
msg_hint = (
"Please replace this option with `--runner "
"generate` to continue using this model "
"as a generative model."
)
elif is_pooling_task:
runner = "pooling"
convert = "auto"
msg_hint = (
"Please replace this option with `--runner "
"pooling` to continue using this model "
"as a pooling model."
)
else: # task == "auto"
pass
elif is_generative_model or is_pooling_model:
if is_generative_task:
runner = "generate"
convert = "auto"
msg_hint = "Please remove this option"
elif is_pooling_task:
runner = "pooling"
convert = _task_to_convert(self.task)
msg_hint = (
"Please replace this option with `--convert "
f"{convert}` to continue using this model "
"as a pooling model."
)
else: # task == "auto"
pass
else:
# Neither generative nor pooling model - try to convert if possible
if is_pooling_task:
runner = "pooling"
convert = _task_to_convert(self.task)
msg_hint = (
"Please replace this option with `--runner pooling "
f"--convert {convert}` to continue using this model "
"as a pooling model."
)
else:
debug_info = {
"architectures": architectures,
"is_generative_model": is_generative_model,
"is_pooling_model": is_pooling_model,
}
raise AssertionError(
"The model should be a generative or "
"pooling model when task is set to "
f"{self.task!r}. Found: {debug_info}"
)
self.runner = runner
self.convert = convert
msg = f"{msg_prefix} {msg_hint}"
warnings.warn(msg, DeprecationWarning, stacklevel=2)
self.runner_type = self._get_runner_type(architectures, self.runner)
self.convert_type = self._get_convert_type(
architectures, self.runner_type, self.convert
@@ -918,22 +803,6 @@ class ModelConfig:
return convert_type
def _get_default_pooling_task(
self,
architectures: list[str],
) -> Literal["embed", "classify", "reward"]:
if self.registry.is_cross_encoder_model(architectures, self):
return "classify"
for arch in architectures:
match = try_match_architecture_defaults(arch, runner_type="pooling")
if match:
_, (_, convert_type) = match
assert convert_type != "none"
return convert_type
return "embed"
def _parse_quant_hf_config(self, hf_config: PretrainedConfig):
quant_cfg = getattr(hf_config, "quantization_config", None)
if quant_cfg is None: