[Model] Re-add the implicit conversion feature for as_seq_cls_model (#21103)
Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
@@ -551,7 +551,7 @@ class ModelConfig:
|
||||
# For pooling models, self.task is used to indicate the
|
||||
# user-selected task
|
||||
if self.task == "score":
|
||||
if self.registry.is_cross_encoder_model(self.architectures):
|
||||
if self._is_classify_task(self.architectures):
|
||||
self.task = "classify"
|
||||
else:
|
||||
self.task = "embed"
|
||||
@@ -806,6 +806,12 @@ class ModelConfig:
|
||||
f"one of {get_args(TokenizerMode)}.")
|
||||
self.tokenizer_mode = tokenizer_mode
|
||||
|
||||
def _is_classify_task(self, architectures: list[str]):
|
||||
for arch in architectures:
|
||||
if arch.endswith("ForSequenceClassification"):
|
||||
return True
|
||||
return self.registry.is_cross_encoder_model(architectures)
|
||||
|
||||
def _get_preferred_pooling_task(
|
||||
self,
|
||||
architectures: list[str],
|
||||
@@ -813,14 +819,11 @@ class ModelConfig:
|
||||
model_id = self.model
|
||||
if get_pooling_config(model_id, self.revision):
|
||||
return "embed"
|
||||
if self.registry.is_cross_encoder_model(architectures):
|
||||
return "classify"
|
||||
if self.registry.is_transcription_model(architectures):
|
||||
return "transcription"
|
||||
|
||||
suffix_to_preferred_task: list[tuple[str, _ResolvedTask]] = [
|
||||
# Other models follow this pattern
|
||||
("ForSequenceClassification", "classify"),
|
||||
("EmbeddingModel", "embed"),
|
||||
("RewardModel", "reward"),
|
||||
]
|
||||
@@ -878,11 +881,14 @@ class ModelConfig:
|
||||
self,
|
||||
task_option: TaskOption,
|
||||
) -> dict[RunnerType, list[_ResolvedTask]]:
|
||||
return {
|
||||
"generate": self._get_supported_generation_tasks(task_option),
|
||||
"pooling": self._get_supported_pooling_tasks(task_option),
|
||||
"draft": ["draft"]
|
||||
}
|
||||
if self._is_classify_task(self.architectures):
|
||||
return {"generate": [], "pooling": ["classify"], "draft": []}
|
||||
else:
|
||||
return {
|
||||
"generate": self._get_supported_generation_tasks(task_option),
|
||||
"pooling": self._get_supported_pooling_tasks(task_option),
|
||||
"draft": ["draft"]
|
||||
}
|
||||
|
||||
def _get_supported_runner_types(
|
||||
self,
|
||||
@@ -925,12 +931,16 @@ class ModelConfig:
|
||||
f"Available tasks for runner={task_runner!r}: "
|
||||
f"{supported_tasks[task_runner]}")
|
||||
|
||||
if "classify" in supported_tasks.get("pooling", []):
|
||||
# When multiple pooling tasks are present, default to
|
||||
# pooling (eg cross-encoder) for non-standard architectures.
|
||||
return "pooling"
|
||||
|
||||
suffix_to_preferred_runner: list[tuple[str, RunnerType]] = [
|
||||
("ForCausalLM", "generate"),
|
||||
("ForConditionalGeneration", "generate"),
|
||||
("ChatModel", "generate"),
|
||||
("LMHeadModel", "generate"),
|
||||
("ForSequenceClassification", "pooling"),
|
||||
("EmbeddingModel", "pooling"),
|
||||
("RewardModel", "pooling"),
|
||||
]
|
||||
@@ -940,10 +950,6 @@ class ModelConfig:
|
||||
if arch.endswith(suffix) and pref_runner in supported_runner_types:
|
||||
return pref_runner
|
||||
|
||||
if "classify" in supported_tasks.get("pooling", []):
|
||||
# When multiple pooling tasks are present, default to
|
||||
# pooling (eg cross-encoder) for non-standard architectures.
|
||||
return "pooling"
|
||||
if "generate" in supported_runner_types:
|
||||
return "generate"
|
||||
if "pooling" in supported_runner_types:
|
||||
@@ -1525,7 +1531,7 @@ class ModelConfig:
|
||||
|
||||
@property
|
||||
def is_matryoshka(self) -> bool:
|
||||
return (hasattr(self.hf_config, "matryoshka_dimensions")
|
||||
return (bool(getattr(self.hf_config, "matryoshka_dimensions", None))
|
||||
or getattr(self.hf_config, "is_matryoshka", False))
|
||||
|
||||
@property
|
||||
@@ -1539,13 +1545,11 @@ class ModelConfig:
|
||||
return getattr(self.hf_config, "use_pad_token", True)
|
||||
|
||||
def get_and_verify_max_len(self, max_model_len: int):
|
||||
# For pooling models, the tokenizer's `model_max_length` is often a
|
||||
# reliable source for the maximum sequence length. However, for
|
||||
# generative models, this can be incorrect and unduly limit the
|
||||
# context window (e.g., DeepSeek-R1). Therefore, we only consider
|
||||
# tokenizer_config for pooling models.
|
||||
# Consider max_model_len in tokenizer_config only when
|
||||
# pooling models use absolute position_embedding.
|
||||
tokenizer_config = None
|
||||
if self.runner_type == "pooling":
|
||||
if (self.runner_type == "pooling" and getattr(
|
||||
self.hf_config, "position_embedding_type", "") == "absolute"):
|
||||
tokenizer_config = try_get_tokenizer_config(
|
||||
self.tokenizer,
|
||||
trust_remote_code=self.trust_remote_code,
|
||||
|
||||
Reference in New Issue
Block a user