[Performance] V1 Classify Models E2E Performance Optimization (#23541)
Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
@@ -1248,10 +1248,17 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
and "encode" in supported_tasks):
|
||||
supported_tasks.remove("encode")
|
||||
|
||||
logger.info_once("Chunked prefill is not supported with "
|
||||
"encode task which using ALL pooling. "
|
||||
"Please turn off chunked prefill by "
|
||||
"`--no-enable-chunked-prefill` before using it.")
|
||||
logger.debug_once("Chunked prefill is not supported with "
|
||||
"encode task which using ALL pooling. "
|
||||
"Please turn off chunked prefill by "
|
||||
"`--no-enable-chunked-prefill` before using it.")
|
||||
|
||||
if "score" in supported_tasks:
|
||||
num_labels = getattr(self.model_config.hf_config, "num_labels", 0)
|
||||
if num_labels != 1:
|
||||
supported_tasks.remove("score")
|
||||
logger.debug_once(
|
||||
"Score API is only enabled for num_labels == 1.")
|
||||
|
||||
return supported_tasks
|
||||
|
||||
|
||||
Reference in New Issue
Block a user