[bugfix] do not add extra linebreak for score/rerank with chat template (#38617)
Signed-off-by: augusto.yjh <augusto.yjh@antgroup.com> Signed-off-by: wang.yuqi <noooop@126.com> Co-authored-by: wang.yuqi <yuqi.wang@daocloud.io> Co-authored-by: wang.yuqi <noooop@126.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
@@ -234,7 +234,7 @@ async def test_score_api_queries_str_documents_image_url_plus_text_content(
|
||||
assert score.id is not None
|
||||
assert score.data is not None
|
||||
assert len(score.data) == 1
|
||||
assert score.usage.prompt_tokens == 108
|
||||
assert score.usage.prompt_tokens == 107
|
||||
assert_score(
|
||||
score.data[0].score, TEXT_VS_TEXT_PLUS_IMAGE, backend, "text_vs_text_plus_image"
|
||||
)
|
||||
@@ -264,7 +264,7 @@ async def test_score_api_queries_str_documents_list(
|
||||
assert score.id is not None
|
||||
assert score.data is not None
|
||||
assert len(score.data) == 4
|
||||
assert score.usage.prompt_tokens == 368
|
||||
assert score.usage.prompt_tokens == 367
|
||||
assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "list[0]_text_vs_text")
|
||||
assert_score(score.data[1].score, TEXT_VS_TEXT, backend, "list[1]_text_vs_text")
|
||||
assert_score(score.data[2].score, TEXT_VS_IMAGE, backend, "list[2]_text_vs_image")
|
||||
@@ -353,7 +353,7 @@ async def test_score_api_queries_list_documents_list(
|
||||
assert score.id is not None
|
||||
assert score.data is not None
|
||||
assert len(score.data) == 4
|
||||
assert score.usage.prompt_tokens == 368
|
||||
assert score.usage.prompt_tokens == 367
|
||||
assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "paired[0]_text_vs_text")
|
||||
assert_score(score.data[1].score, TEXT_VS_TEXT, backend, "paired[1]_text_vs_text")
|
||||
assert_score(score.data[2].score, TEXT_VS_IMAGE, backend, "paired[2]_text_vs_image")
|
||||
|
||||
@@ -1187,6 +1187,7 @@ def _get_full_multimodal_text_prompt(
|
||||
placeholder_storage: dict[str, list],
|
||||
texts: list[str],
|
||||
interleave_strings: bool,
|
||||
multimodal_content_part_separator: str = "\n",
|
||||
) -> str:
|
||||
"""Combine multimodal prompts for a multimodal language model."""
|
||||
|
||||
@@ -1232,9 +1233,11 @@ def _get_full_multimodal_text_prompt(
|
||||
# NOTE: Default behaviour: we always add missing placeholders
|
||||
# at the front of the prompt, if interleave_strings=False
|
||||
if text_prompt:
|
||||
return "\n".join(missing_placeholders + [text_prompt])
|
||||
return multimodal_content_part_separator.join(
|
||||
missing_placeholders + [text_prompt]
|
||||
)
|
||||
else:
|
||||
return "\n".join(missing_placeholders)
|
||||
return multimodal_content_part_separator.join(missing_placeholders)
|
||||
|
||||
|
||||
# No need to validate using Pydantic again
|
||||
@@ -1384,6 +1387,7 @@ def _parse_chat_message_content_parts(
|
||||
wrap_dicts: bool,
|
||||
interleave_strings: bool,
|
||||
mm_processor_kwargs: dict[str, Any] | None = None,
|
||||
multimodal_content_part_separator="\n",
|
||||
) -> list[ConversationMessage]:
|
||||
content = list[_ContentPart]()
|
||||
|
||||
@@ -1406,7 +1410,10 @@ def _parse_chat_message_content_parts(
|
||||
mm_placeholder_storage = mm_parser.mm_placeholder_storage()
|
||||
if mm_placeholder_storage:
|
||||
text_prompt = _get_full_multimodal_text_prompt(
|
||||
mm_placeholder_storage, texts, interleave_strings
|
||||
mm_placeholder_storage,
|
||||
texts,
|
||||
interleave_strings,
|
||||
multimodal_content_part_separator=multimodal_content_part_separator,
|
||||
)
|
||||
else:
|
||||
text_prompt = "\n".join(texts)
|
||||
|
||||
@@ -150,6 +150,7 @@ def _parse_score_content(
|
||||
mm_tracker=mm_tracker,
|
||||
wrap_dicts=False,
|
||||
interleave_strings=False,
|
||||
multimodal_content_part_separator="",
|
||||
)
|
||||
|
||||
if parse_res:
|
||||
|
||||
Reference in New Issue
Block a user