[Misc] Split up pooling tasks (#10820)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2024-12-11 17:28:00 +08:00
committed by GitHub
parent 40766ca1b8
commit 8f10d5e393
27 changed files with 527 additions and 168 deletions

View File

@@ -288,7 +288,7 @@ class LLMEngine:
self.model_executor = executor_class(vllm_config=vllm_config, )
if self.model_config.task != "embedding":
if self.model_config.runner_type != "pooling":
self._initialize_kv_caches()
# If usage stat is enabled, collect relevant info.
@@ -1123,7 +1123,7 @@ class LLMEngine:
seq_group.metrics.model_execute_time = (
o.model_execute_time)
if self.model_config.task == "embedding":
if self.model_config.runner_type == "pooling":
self._process_sequence_group_outputs(seq_group, output)
else:
self.output_processor.process_prompt_logprob(seq_group, output)