[Frontend] Support multimodal inputs for late-interaction scoring (ColQwen3) + NewModel: nvidia/nemotron-colembed (#34574)
Signed-off-by: craftsangjae <craftsangjae@gmail.com>
This commit is contained in:
@@ -50,6 +50,7 @@ from vllm.entrypoints.pooling.score.utils import (
|
||||
compress_token_type_ids,
|
||||
compute_maxsim_score,
|
||||
get_score_prompt,
|
||||
score_data_to_prompts,
|
||||
validate_score_input,
|
||||
)
|
||||
from vllm.entrypoints.utils import log_non_default_args
|
||||
@@ -1395,25 +1396,13 @@ class LLM:
|
||||
|
||||
tokenizer = self.get_tokenizer()
|
||||
|
||||
# Extract text from ScoreData
|
||||
text_1: list[str] = []
|
||||
for text in data_1:
|
||||
if not isinstance(text, str):
|
||||
raise NotImplementedError(
|
||||
"Late interaction scores currently do not support multimodal input."
|
||||
)
|
||||
text_1.append(text)
|
||||
# Convert ScoreData to PromptType (handles both text and multimodal)
|
||||
model_config = self.model_config
|
||||
prompts_1 = score_data_to_prompts(data_1, "query", model_config)
|
||||
prompts_2 = score_data_to_prompts(data_2, "document", model_config)
|
||||
|
||||
text_2: list[str] = []
|
||||
for text in data_2:
|
||||
if not isinstance(text, str):
|
||||
raise NotImplementedError(
|
||||
"Late interaction scores currently do not support multimodal input."
|
||||
)
|
||||
text_2.append(text)
|
||||
|
||||
encoded_output = self.encode(
|
||||
text_1 + text_2,
|
||||
encoded_output: list[PoolingRequestOutput] = self.encode(
|
||||
prompts_1 + prompts_2,
|
||||
use_tqdm=use_tqdm,
|
||||
lora_request=lora_request,
|
||||
pooling_params=pooling_params,
|
||||
@@ -1421,8 +1410,8 @@ class LLM:
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
)
|
||||
|
||||
encoded_output_1 = encoded_output[0 : len(text_1)]
|
||||
encoded_output_2 = encoded_output[len(text_1) :]
|
||||
encoded_output_1: list[PoolingRequestOutput] = encoded_output[: len(prompts_1)]
|
||||
encoded_output_2: list[PoolingRequestOutput] = encoded_output[len(prompts_1) :]
|
||||
|
||||
if len(encoded_output_1) == 1:
|
||||
encoded_output_1 = encoded_output_1 * len(encoded_output_2)
|
||||
|
||||
Reference in New Issue
Block a user