[Deprecate] Deprecate pooling multi task support. (#37956)

Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
Signed-off-by: wang.yuqi <noooop@126.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
wang.yuqi
2026-03-24 22:07:47 +08:00
committed by GitHub
parent 352b90c4a4
commit 1b6cb920e6
18 changed files with 566 additions and 66 deletions

View File

@@ -382,16 +382,19 @@ class LLM:
self.llm_engine = LLMEngine.from_engine_args(
engine_args=engine_args, usage_context=UsageContext.LLM_CLASS
)
self.model_config = self.llm_engine.model_config
self.engine_class = type(self.llm_engine)
self.request_counter = Counter()
self.default_sampling_params: dict[str, Any] | None = None
supported_tasks = self.llm_engine.get_supported_tasks()
logger.info("Supported tasks: %s", supported_tasks)
self.supported_tasks = supported_tasks
self.pooling_task = self.model_config.get_pooling_task(supported_tasks)
if self.pooling_task is not None:
logger.info("Supported pooling task: %s", self.pooling_task)
self.model_config = self.llm_engine.model_config
self.runner_type = self.model_config.runner_type
self.renderer = self.llm_engine.renderer
self.chat_template = load_chat_template(chat_template)
self.io_processor = self.llm_engine.io_processor
@@ -1072,31 +1075,7 @@ class LLM:
pooled hidden states in the same order as the input prompts.
"""
if pooling_task is None:
raise ValueError(
"pooling_task required for `LLM.encode`\n"
"Please use one of the more specific methods or set the "
"pooling_task when using `LLM.encode`:\n"
" - For embeddings, use `LLM.embed(...)` "
'or `pooling_task="embed"`.\n'
" - For classification logits, use `LLM.classify(...)` "
'or `pooling_task="classify"`.\n'
" - For similarity scores, use `LLM.score(...)`.\n"
" - For rewards, use `LLM.reward(...)` "
'or `pooling_task="token_classify"`\n'
" - For token classification, "
'use `pooling_task="token_classify"`\n'
' - For multi-vector retrieval, use `pooling_task="token_embed"`'
)
model_config = self.model_config
runner_type = model_config.runner_type
if runner_type != "pooling":
raise ValueError(
"LLM.encode() is only supported for pooling models. "
"Try passing `--runner pooling` to use the model as a "
"pooling model."
)
self._verify_pooling_task(pooling_task)
if isinstance(prompts, dict) and "data" in prompts:
if self.io_processor is None:
@@ -1206,6 +1185,65 @@ class LLM:
)
return outputs
def _verify_pooling_task(self, pooling_task: PoolingTask | None):
if self.runner_type != "pooling":
raise ValueError(
"LLM.encode() is only supported for pooling models. "
"Try passing `--runner pooling` to use the model as a "
"pooling model."
)
if pooling_task is None:
raise ValueError(
"pooling_task required for `LLM.encode`\n"
"Please use one of the more specific methods or set the "
"pooling_task when using `LLM.encode`:\n"
" - For embeddings, use `LLM.embed(...)` "
'or `pooling_task="embed"`.\n'
" - For classification logits, use `LLM.classify(...)` "
'or `pooling_task="classify"`.\n'
" - For similarity scores, use `LLM.score(...)`.\n"
" - For rewards, use `LLM.reward(...)` "
'or `pooling_task="token_classify"`\n'
" - For token classification, "
'use `pooling_task="token_classify"`\n'
' - For multi-vector retrieval, use `pooling_task="token_embed"`'
)
if (
pooling_task in ("embed", "token_embed")
and pooling_task not in self.supported_tasks
):
raise ValueError(
"Embedding API is not supported by this model. "
"Try converting the model using `--convert embed`."
)
if (
pooling_task in ("classify", "token_classify")
and pooling_task not in self.supported_tasks
):
raise ValueError(
"Classification API is not supported by this model. "
"Try converting the model using `--convert classify`."
)
# plugin task uses io_processor.parse_request to verify inputs
if pooling_task != "plugin" and pooling_task != self.pooling_task:
if pooling_task not in self.supported_tasks:
raise ValueError(
f"Unsupported task: {pooling_task!r} "
f"Supported tasks: {self.supported_tasks}"
)
else:
logger.warning_once(
"Pooling multitask support is deprecated and will "
"be removed in v0.20. When the default pooling task is "
"not what you want, you need to manually specify it "
'via PoolerConfig(task="%s"). ',
pooling_task,
)
def embed(
self,
prompts: PromptType | Sequence[PromptType],
@@ -1239,11 +1277,6 @@ class LLM:
A list of `EmbeddingRequestOutput` objects containing the
embedding vectors in the same order as the input prompts.
"""
if "embed" not in self.supported_tasks:
raise ValueError(
"Embedding API is not supported by this model. "
"Try converting the model using `--convert embed`."
)
items = self.encode(
prompts,
@@ -1289,11 +1322,6 @@ class LLM:
A list of `ClassificationRequestOutput` objects containing the
embedding vectors in the same order as the input prompts.
"""
if "classify" not in self.supported_tasks:
raise ValueError(
"Classification API is not supported by this model. "
"Try converting the model using `--convert classify`."
)
items = self.encode(
prompts,