[Model] Pooling model activation supports per request control by PoolingParams (#20538)

Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
wang.yuqi
2025-08-05 15:37:00 +08:00
committed by GitHub
parent 811ac13d03
commit 586f286789
21 changed files with 948 additions and 173 deletions

View File

@@ -44,6 +44,15 @@ class GteNewModelConfig(VerifyAndUpdateConfig):
}
class JambaForSequenceClassificationConfig(VerifyAndUpdateConfig):
@staticmethod
def verify_and_update_config(vllm_config: "VllmConfig") -> None:
pooler_config = vllm_config.model_config.pooler_config
if pooler_config.activation is None:
pooler_config.activation = False
class JinaRobertaModelConfig(VerifyAndUpdateConfig):
@staticmethod
@@ -155,6 +164,26 @@ class NomicBertModelConfig(VerifyAndUpdateConfig):
vllm_config.recalculate_max_model_len(max_model_len)
class Qwen2ForProcessRewardModelConfig(VerifyAndUpdateConfig):
@staticmethod
def verify_and_update_config(vllm_config: "VllmConfig") -> None:
pooler_config = vllm_config.model_config.pooler_config
if pooler_config.step_tag_id is None:
pooler_config.step_tag_id = 151651
class Qwen2ForRewardModelConfig(VerifyAndUpdateConfig):
@staticmethod
def verify_and_update_config(vllm_config: "VllmConfig") -> None:
pooler_config = vllm_config.model_config.pooler_config
if pooler_config.softmax is None:
pooler_config.softmax = False
class Qwen3ForSequenceClassificationConfig(VerifyAndUpdateConfig):
@staticmethod
@@ -309,8 +338,11 @@ MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = {
"GteModel": SnowflakeGteNewModelConfig,
"GteNewModel": GteNewModelConfig,
"NomicBertModel": NomicBertModelConfig,
"Qwen2ForProcessRewardModel": Qwen2ForProcessRewardModelConfig,
"Qwen2ForRewardModel": Qwen2ForRewardModelConfig,
"Qwen3ForSequenceClassification": Qwen3ForSequenceClassificationConfig,
"XLMRobertaModel": JinaRobertaModelConfig,
"JinaVLForRanking": JinaVLForSequenceClassificationConfig,
"JambaForSequenceClassification": JambaForSequenceClassificationConfig,
"GraniteMoeHybridForCausalLM": GraniteMoeHybridModelConfig,
}

View File

@@ -593,7 +593,5 @@ class JambaForSequenceClassification(JambaForCausalLM):
pooler_config,
classifier=self.score,
default_pooling_type=PoolingType.LAST,
default_normalize=False,
default_softmax=False,
),
})

View File

@@ -90,15 +90,12 @@ class JinaVLForSequenceClassification(Qwen2VLForConditionalGeneration,
prefix=maybe_prefix(prefix, "qwen2_vl"))
config = vllm_config.model_config.hf_config
pooler_config = vllm_config.model_config.pooler_config
assert pooler_config is not None
# logit bias for sigmoid normalization
self.LOGIT_BIAS = 2.65
self.score = JinaVLScorer(config)
pooler_config = vllm_config.model_config.pooler_config
assert pooler_config is not None
self.pooler = DispatchPooler({
"encode":
Pooler.for_encode(pooler_config),

View File

@@ -117,8 +117,5 @@ class Qwen2ForProcessRewardModel(Qwen2RewardBaseModel):
Pooler.for_encode(
pooler_config,
default_pooling_type=PoolingType.STEP,
default_normalize=False,
default_softmax=True,
default_step_tag_id=151651,
)
})