[Deprecate] Deprecate pooling multi task support. (#37956)
Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io> Signed-off-by: wang.yuqi <noooop@126.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
@@ -382,16 +382,19 @@ class LLM:
|
||||
self.llm_engine = LLMEngine.from_engine_args(
|
||||
engine_args=engine_args, usage_context=UsageContext.LLM_CLASS
|
||||
)
|
||||
self.model_config = self.llm_engine.model_config
|
||||
self.engine_class = type(self.llm_engine)
|
||||
|
||||
self.request_counter = Counter()
|
||||
self.default_sampling_params: dict[str, Any] | None = None
|
||||
|
||||
supported_tasks = self.llm_engine.get_supported_tasks()
|
||||
logger.info("Supported tasks: %s", supported_tasks)
|
||||
self.supported_tasks = supported_tasks
|
||||
self.pooling_task = self.model_config.get_pooling_task(supported_tasks)
|
||||
if self.pooling_task is not None:
|
||||
logger.info("Supported pooling task: %s", self.pooling_task)
|
||||
|
||||
self.model_config = self.llm_engine.model_config
|
||||
self.runner_type = self.model_config.runner_type
|
||||
self.renderer = self.llm_engine.renderer
|
||||
self.chat_template = load_chat_template(chat_template)
|
||||
self.io_processor = self.llm_engine.io_processor
|
||||
@@ -1072,31 +1075,7 @@ class LLM:
|
||||
pooled hidden states in the same order as the input prompts.
|
||||
"""
|
||||
|
||||
if pooling_task is None:
|
||||
raise ValueError(
|
||||
"pooling_task required for `LLM.encode`\n"
|
||||
"Please use one of the more specific methods or set the "
|
||||
"pooling_task when using `LLM.encode`:\n"
|
||||
" - For embeddings, use `LLM.embed(...)` "
|
||||
'or `pooling_task="embed"`.\n'
|
||||
" - For classification logits, use `LLM.classify(...)` "
|
||||
'or `pooling_task="classify"`.\n'
|
||||
" - For similarity scores, use `LLM.score(...)`.\n"
|
||||
" - For rewards, use `LLM.reward(...)` "
|
||||
'or `pooling_task="token_classify"`\n'
|
||||
" - For token classification, "
|
||||
'use `pooling_task="token_classify"`\n'
|
||||
' - For multi-vector retrieval, use `pooling_task="token_embed"`'
|
||||
)
|
||||
|
||||
model_config = self.model_config
|
||||
runner_type = model_config.runner_type
|
||||
if runner_type != "pooling":
|
||||
raise ValueError(
|
||||
"LLM.encode() is only supported for pooling models. "
|
||||
"Try passing `--runner pooling` to use the model as a "
|
||||
"pooling model."
|
||||
)
|
||||
self._verify_pooling_task(pooling_task)
|
||||
|
||||
if isinstance(prompts, dict) and "data" in prompts:
|
||||
if self.io_processor is None:
|
||||
@@ -1206,6 +1185,65 @@ class LLM:
|
||||
)
|
||||
return outputs
|
||||
|
||||
def _verify_pooling_task(self, pooling_task: PoolingTask | None):
|
||||
if self.runner_type != "pooling":
|
||||
raise ValueError(
|
||||
"LLM.encode() is only supported for pooling models. "
|
||||
"Try passing `--runner pooling` to use the model as a "
|
||||
"pooling model."
|
||||
)
|
||||
|
||||
if pooling_task is None:
|
||||
raise ValueError(
|
||||
"pooling_task required for `LLM.encode`\n"
|
||||
"Please use one of the more specific methods or set the "
|
||||
"pooling_task when using `LLM.encode`:\n"
|
||||
" - For embeddings, use `LLM.embed(...)` "
|
||||
'or `pooling_task="embed"`.\n'
|
||||
" - For classification logits, use `LLM.classify(...)` "
|
||||
'or `pooling_task="classify"`.\n'
|
||||
" - For similarity scores, use `LLM.score(...)`.\n"
|
||||
" - For rewards, use `LLM.reward(...)` "
|
||||
'or `pooling_task="token_classify"`\n'
|
||||
" - For token classification, "
|
||||
'use `pooling_task="token_classify"`\n'
|
||||
' - For multi-vector retrieval, use `pooling_task="token_embed"`'
|
||||
)
|
||||
|
||||
if (
|
||||
pooling_task in ("embed", "token_embed")
|
||||
and pooling_task not in self.supported_tasks
|
||||
):
|
||||
raise ValueError(
|
||||
"Embedding API is not supported by this model. "
|
||||
"Try converting the model using `--convert embed`."
|
||||
)
|
||||
|
||||
if (
|
||||
pooling_task in ("classify", "token_classify")
|
||||
and pooling_task not in self.supported_tasks
|
||||
):
|
||||
raise ValueError(
|
||||
"Classification API is not supported by this model. "
|
||||
"Try converting the model using `--convert classify`."
|
||||
)
|
||||
|
||||
# plugin task uses io_processor.parse_request to verify inputs
|
||||
if pooling_task != "plugin" and pooling_task != self.pooling_task:
|
||||
if pooling_task not in self.supported_tasks:
|
||||
raise ValueError(
|
||||
f"Unsupported task: {pooling_task!r} "
|
||||
f"Supported tasks: {self.supported_tasks}"
|
||||
)
|
||||
else:
|
||||
logger.warning_once(
|
||||
"Pooling multitask support is deprecated and will "
|
||||
"be removed in v0.20. When the default pooling task is "
|
||||
"not what you want, you need to manually specify it "
|
||||
'via PoolerConfig(task="%s"). ',
|
||||
pooling_task,
|
||||
)
|
||||
|
||||
def embed(
|
||||
self,
|
||||
prompts: PromptType | Sequence[PromptType],
|
||||
@@ -1239,11 +1277,6 @@ class LLM:
|
||||
A list of `EmbeddingRequestOutput` objects containing the
|
||||
embedding vectors in the same order as the input prompts.
|
||||
"""
|
||||
if "embed" not in self.supported_tasks:
|
||||
raise ValueError(
|
||||
"Embedding API is not supported by this model. "
|
||||
"Try converting the model using `--convert embed`."
|
||||
)
|
||||
|
||||
items = self.encode(
|
||||
prompts,
|
||||
@@ -1289,11 +1322,6 @@ class LLM:
|
||||
A list of `ClassificationRequestOutput` objects containing the
|
||||
embedding vectors in the same order as the input prompts.
|
||||
"""
|
||||
if "classify" not in self.supported_tasks:
|
||||
raise ValueError(
|
||||
"Classification API is not supported by this model. "
|
||||
"Try converting the model using `--convert classify`."
|
||||
)
|
||||
|
||||
items = self.encode(
|
||||
prompts,
|
||||
|
||||
Reference in New Issue
Block a user