[Model] Deprecate the score task (this will not affect users). (#37537)
Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
This commit is contained in:
@@ -16,25 +16,22 @@ from vllm.utils.import_utils import resolve_obj_by_qualname
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def get_classification_act_fn(
|
||||
def get_act_fn(
|
||||
config: PretrainedConfig,
|
||||
static_num_labels: bool = True,
|
||||
) -> "PoolerActivation":
|
||||
# get classification act_fn
|
||||
# Implement alignment with transformers ForSequenceClassificationLoss
|
||||
# https://github.com/huggingface/transformers/blob/57bb6db6ee4cfaccc45b8d474dfad5a17811ca60/src/transformers/loss/loss_utils.py#L92
|
||||
problem_type = getattr(config, "problem_type", "")
|
||||
if problem_type == "regression":
|
||||
return PoolerIdentity()
|
||||
if problem_type == "single_label_classification":
|
||||
return PoolerClassify()
|
||||
return PoolerClassify(static_num_labels=static_num_labels)
|
||||
if problem_type == "multi_label_classification":
|
||||
return PoolerMultiLabelClassify()
|
||||
|
||||
return PoolerClassify()
|
||||
|
||||
|
||||
def get_cross_encoder_act_fn(
|
||||
config: PretrainedConfig,
|
||||
) -> "PoolerActivation":
|
||||
# get cross_encoder act_fn
|
||||
function_name: str | None = None
|
||||
if (
|
||||
hasattr(config, "sentence_transformers")
|
||||
@@ -55,24 +52,16 @@ def get_cross_encoder_act_fn(
|
||||
fn = resolve_obj_by_qualname(function_name)()
|
||||
return PoolerActivation.wraps(fn)
|
||||
|
||||
return PoolerClassify()
|
||||
return PoolerClassify(static_num_labels=static_num_labels)
|
||||
|
||||
|
||||
def resolve_classifier_act_fn(
|
||||
model_config: ModelConfig,
|
||||
static_num_labels: bool = True,
|
||||
act_fn: "PoolerActivation | str | None" = None,
|
||||
act_fn: "PoolerActivation | None" = None,
|
||||
):
|
||||
if isinstance(act_fn, str):
|
||||
if act_fn == "classify":
|
||||
return get_classification_act_fn(model_config.hf_config)
|
||||
if act_fn == "score":
|
||||
return get_cross_encoder_act_fn(model_config.hf_config)
|
||||
|
||||
raise ValueError(f"act_fn [{act_fn=}] not supported.")
|
||||
|
||||
if act_fn is None:
|
||||
return PoolerClassify(static_num_labels=static_num_labels)
|
||||
return get_act_fn(model_config.hf_config, static_num_labels)
|
||||
|
||||
assert callable(act_fn)
|
||||
return act_fn
|
||||
@@ -97,9 +86,8 @@ class PoolerActivation(nn.Module, ABC):
|
||||
|
||||
def forward(self, pooled_data: _T) -> _T:
|
||||
# shape:
|
||||
# classify (& score) -> (batch_size, num_classes)
|
||||
# embed -> (batch_size, embedding_dim) or list(embedding_dim)
|
||||
# (batch_size, dimensions) or list(dimensions) if using MRL
|
||||
# classify -> (batch_size, num_classes)
|
||||
# embed -> (batch_size, embedding_size) or list(embedding_size)
|
||||
if isinstance(pooled_data, list):
|
||||
return [self.forward_chunk(data) for data in pooled_data]
|
||||
|
||||
|
||||
@@ -56,29 +56,31 @@ class EmbeddingPoolerHead(SequencePoolerHead):
|
||||
|
||||
if isinstance(pooled_data, list):
|
||||
pooled_data = torch.stack(pooled_data)
|
||||
# pooled_data shape: [batchsize, hidden_dimension]
|
||||
# pooled_data shape: [batchsize, hidden_size]
|
||||
|
||||
if self.head_dtype is not None:
|
||||
pooled_data = pooled_data.to(self.head_dtype)
|
||||
|
||||
# Apply ST projector
|
||||
if self.projector is not None:
|
||||
pooled_data = self.projector(pooled_data)
|
||||
# pooled_data shape: [batchsize, embedding_dimension]
|
||||
embeddings = self.projector(pooled_data)
|
||||
else:
|
||||
embeddings = pooled_data
|
||||
# embeddings shape: [batchsize, embedding_size]
|
||||
|
||||
# for matryoshka representation
|
||||
dimensions_list = [pooling_param.dimensions for pooling_param in pooling_params]
|
||||
if any(d is not None for d in dimensions_list):
|
||||
# change the output dimension
|
||||
assert len(pooled_data) == len(dimensions_list)
|
||||
if len(set(dimensions_list)) == 1 and not isinstance(pooled_data, list):
|
||||
assert len(embeddings) == len(dimensions_list)
|
||||
if len(set(dimensions_list)) == 1 and not isinstance(embeddings, list):
|
||||
# if all dimensions are the same
|
||||
d = dimensions_list[0]
|
||||
pooled_data = pooled_data[..., :d]
|
||||
embeddings = embeddings[..., :d]
|
||||
else:
|
||||
pooled_data = [
|
||||
embeddings = [
|
||||
vecs if d is None else vecs[..., :d]
|
||||
for vecs, d in zip(pooled_data, dimensions_list)
|
||||
for vecs, d in zip(embeddings, dimensions_list)
|
||||
]
|
||||
|
||||
# for normalize
|
||||
@@ -86,15 +88,15 @@ class EmbeddingPoolerHead(SequencePoolerHead):
|
||||
flags = [p.use_activation for p in pooling_params]
|
||||
if len(set(flags)) == 1:
|
||||
if flags[0]:
|
||||
pooled_data = self.activation(pooled_data)
|
||||
embeddings = self.activation(embeddings)
|
||||
else:
|
||||
pooled_data = [
|
||||
embeddings = [
|
||||
self.activation(vecs) if f else vecs
|
||||
for vecs, f in zip(pooled_data, flags)
|
||||
for vecs, f in zip(embeddings, flags)
|
||||
]
|
||||
|
||||
# pooled_data shape: [batchsize, embedding_dimension]
|
||||
return pooled_data
|
||||
# embeddings shape: [batchsize, embedding_size]
|
||||
return embeddings
|
||||
|
||||
|
||||
class ClassifierPoolerHead(SequencePoolerHead):
|
||||
@@ -113,7 +115,7 @@ class ClassifierPoolerHead(SequencePoolerHead):
|
||||
self.activation = activation
|
||||
|
||||
def get_supported_tasks(self) -> Set[PoolingTask]:
|
||||
return {"classify", "score"}
|
||||
return {"classify"}
|
||||
|
||||
def forward(
|
||||
self,
|
||||
@@ -131,21 +133,23 @@ class ClassifierPoolerHead(SequencePoolerHead):
|
||||
pooled_data = pooled_data.to(self.head_dtype)
|
||||
|
||||
if self.classifier is not None:
|
||||
pooled_data = self.classifier(pooled_data)
|
||||
# pooled_data shape: [batchsize, num_labels]
|
||||
logits = self.classifier(pooled_data)
|
||||
else:
|
||||
logits = pooled_data
|
||||
|
||||
# logits shape: [batchsize, num_labels]
|
||||
if self.logit_bias is not None:
|
||||
pooled_data -= self.logit_bias
|
||||
logits -= self.logit_bias
|
||||
|
||||
if self.activation is not None:
|
||||
flags = [p.use_activation for p in pooling_params]
|
||||
if len(set(flags)) == 1:
|
||||
pooled_data = self.activation(pooled_data) if flags[0] else pooled_data
|
||||
logits = self.activation(logits) if flags[0] else logits
|
||||
else:
|
||||
pooled_data = [
|
||||
logits = [
|
||||
self.activation(vecs) if f else vecs
|
||||
for vecs, f in zip(pooled_data, flags)
|
||||
for vecs, f in zip(logits, flags)
|
||||
]
|
||||
|
||||
# pooled_data shape: [batchsize, num_labels]
|
||||
return pooled_data
|
||||
# logits shape: [batchsize, num_labels]
|
||||
return logits
|
||||
|
||||
@@ -17,7 +17,7 @@ SequencePoolingMethodOutput: TypeAlias = torch.Tensor | list[torch.Tensor]
|
||||
|
||||
class SequencePoolingMethod(nn.Module, ABC):
|
||||
def get_supported_tasks(self) -> Set[PoolingTask]:
|
||||
return {"token_embed", "token_classify", "embed", "classify", "score"}
|
||||
return {"token_embed", "token_classify", "embed", "classify"}
|
||||
|
||||
def get_pooling_updates(self, task: PoolingTask) -> PoolingParamsUpdate:
|
||||
return PoolingParamsUpdate()
|
||||
|
||||
@@ -108,7 +108,7 @@ def pooler_for_classify(
|
||||
*,
|
||||
pooling: SequencePoolingMethod | SequencePoolingFn | None = None,
|
||||
classifier: ClassifierFn | None = None,
|
||||
act_fn: PoolerActivation | str | None = None,
|
||||
act_fn: PoolerActivation | None = None,
|
||||
):
|
||||
if pooling is None:
|
||||
pooling = get_seq_pooling_method(pooler_config.get_seq_pooling_type())
|
||||
|
||||
@@ -52,13 +52,6 @@ class DispatchPooler(Pooler):
|
||||
pooler_config,
|
||||
pooling=pooling,
|
||||
classifier=classifier,
|
||||
act_fn="classify",
|
||||
),
|
||||
"score": pooler_for_classify(
|
||||
pooler_config,
|
||||
pooling=pooling,
|
||||
classifier=classifier,
|
||||
act_fn="score",
|
||||
),
|
||||
}
|
||||
)
|
||||
@@ -115,7 +108,7 @@ class DispatchPooler(Pooler):
|
||||
|
||||
class IdentityPooler(Pooler):
|
||||
def get_supported_tasks(self) -> Set[PoolingTask]:
|
||||
return {"plugin", "score"}
|
||||
return {"plugin"}
|
||||
|
||||
def forward(
|
||||
self,
|
||||
|
||||
@@ -68,22 +68,24 @@ class TokenEmbeddingPoolerHead(TokenPoolerHead):
|
||||
|
||||
if self.head_dtype is not None:
|
||||
pooled_data = pooled_data.to(self.head_dtype)
|
||||
# pooled_data shape: [n_tokens, hidden_dimension]
|
||||
# pooled_data shape: [n_tokens, hidden_size]
|
||||
|
||||
# Apply ST projector
|
||||
if self.projector is not None:
|
||||
pooled_data = self.projector(pooled_data)
|
||||
# pooled_data shape: [n_tokens, embedding_dimension]
|
||||
embeddings = self.projector(pooled_data)
|
||||
else:
|
||||
embeddings = pooled_data
|
||||
# embeddings shape: [n_tokens, embedding_size]
|
||||
|
||||
# for matryoshka representation
|
||||
pooled_data = pooled_data[..., : pooling_param.dimensions]
|
||||
embeddings = embeddings[..., : pooling_param.dimensions]
|
||||
|
||||
# for normalize
|
||||
if self.activation is not None and pooling_param.use_activation:
|
||||
pooled_data = self.activation(pooled_data)
|
||||
embeddings = self.activation(embeddings)
|
||||
|
||||
# pooled_data shape: [n_tokens, embedding_dimension]
|
||||
return pooled_data
|
||||
# embeddings shape: [n_tokens, embedding_size]
|
||||
return embeddings
|
||||
|
||||
|
||||
class TokenClassifierPoolerHead(TokenPoolerHead):
|
||||
@@ -118,16 +120,16 @@ class TokenClassifierPoolerHead(TokenPoolerHead):
|
||||
# hidden_states shape: [n_token, hidden_size]
|
||||
|
||||
if self.classifier is not None:
|
||||
scores = self.classifier(pooled_data)
|
||||
logits = self.classifier(pooled_data)
|
||||
else:
|
||||
scores = pooled_data
|
||||
# scores shape: [n_token, num_labels]
|
||||
logits = pooled_data
|
||||
# logits shape: [n_token, num_labels]
|
||||
|
||||
if self.logit_bias is not None:
|
||||
scores -= self.logit_bias
|
||||
logits -= self.logit_bias
|
||||
|
||||
if self.activation is not None and pooling_param.use_activation:
|
||||
scores = self.activation(scores)
|
||||
logits = self.activation(logits)
|
||||
|
||||
# scores shape: [n_token, num_labels]
|
||||
return scores
|
||||
# logits shape: [n_token, num_labels]
|
||||
return logits
|
||||
|
||||
@@ -116,7 +116,7 @@ def pooler_for_token_classify(
|
||||
*,
|
||||
pooling: TokenPoolingMethod | TokenPoolingFn | None = None,
|
||||
classifier: ClassifierFn | None = None,
|
||||
act_fn: PoolerActivation | str | None = None,
|
||||
act_fn: PoolerActivation | None = None,
|
||||
):
|
||||
if pooling is None:
|
||||
pooling = get_tok_pooling_method(pooler_config.get_tok_pooling_type())
|
||||
|
||||
Reference in New Issue
Block a user