[Frontend] Separate pooling APIs in offline inference (#11129)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2024-12-13 18:40:07 +08:00
committed by GitHub
parent f93bf2b189
commit eeec9e3390
21 changed files with 669 additions and 304 deletions

View File

@@ -133,7 +133,7 @@ def run_encode(model: str, modality: QueryModality):
if req_data.image is not None:
mm_data["image"] = req_data.image
outputs = req_data.llm.encode({
outputs = req_data.llm.embed({
"prompt": req_data.prompt,
"multi_modal_data": mm_data,
})