diff --git a/tests/entrypoints/pooling/scoring/test_cross_encoder_online_vision.py b/tests/entrypoints/pooling/scoring/test_cross_encoder_online_vision.py
index f62ffb2bf..d773802a9 100644
--- a/tests/entrypoints/pooling/scoring/test_cross_encoder_online_vision.py
+++ b/tests/entrypoints/pooling/scoring/test_cross_encoder_online_vision.py
@@ -234,7 +234,7 @@ async def test_score_api_queries_str_documents_image_url_plus_text_content(
     assert score.id is not None
     assert score.data is not None
     assert len(score.data) == 1
-    assert score.usage.prompt_tokens == 108
+    assert score.usage.prompt_tokens == 107
     assert_score(
         score.data[0].score, TEXT_VS_TEXT_PLUS_IMAGE, backend, "text_vs_text_plus_image"
     )
@@ -264,7 +264,7 @@ async def test_score_api_queries_str_documents_list(
     assert score.id is not None
     assert score.data is not None
     assert len(score.data) == 4
-    assert score.usage.prompt_tokens == 368
+    assert score.usage.prompt_tokens == 367
     assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "list[0]_text_vs_text")
     assert_score(score.data[1].score, TEXT_VS_TEXT, backend, "list[1]_text_vs_text")
     assert_score(score.data[2].score, TEXT_VS_IMAGE, backend, "list[2]_text_vs_image")
@@ -353,7 +353,7 @@ async def test_score_api_queries_list_documents_list(
     assert score.id is not None
     assert score.data is not None
     assert len(score.data) == 4
-    assert score.usage.prompt_tokens == 368
+    assert score.usage.prompt_tokens == 367
     assert_score(score.data[0].score, TEXT_VS_TEXT, backend, "paired[0]_text_vs_text")
     assert_score(score.data[1].score, TEXT_VS_TEXT, backend, "paired[1]_text_vs_text")
     assert_score(score.data[2].score, TEXT_VS_IMAGE, backend, "paired[2]_text_vs_image")
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 51e62042f..c1324d2f0 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -1187,6 +1187,7 @@ def _get_full_multimodal_text_prompt(
     placeholder_storage: dict[str, list],
     texts: list[str],
     interleave_strings: bool,
+    multimodal_content_part_separator: str = "\n",
 ) -> str:
     """Combine multimodal prompts for a multimodal language model."""
 
@@ -1232,9 +1233,11 @@ def _get_full_multimodal_text_prompt(
     # NOTE: Default behaviour: we always add missing placeholders
     # at the front of the prompt, if interleave_strings=False
     if text_prompt:
-        return "\n".join(missing_placeholders + [text_prompt])
+        return multimodal_content_part_separator.join(
+            missing_placeholders + [text_prompt]
+        )
     else:
-        return "\n".join(missing_placeholders)
+        return multimodal_content_part_separator.join(missing_placeholders)
 
 
 # No need to validate using Pydantic again
@@ -1384,6 +1387,7 @@ def _parse_chat_message_content_parts(
     wrap_dicts: bool,
     interleave_strings: bool,
     mm_processor_kwargs: dict[str, Any] | None = None,
+    multimodal_content_part_separator="\n",
 ) -> list[ConversationMessage]:
     content = list[_ContentPart]()
 
@@ -1406,7 +1410,10 @@ def _parse_chat_message_content_parts(
     mm_placeholder_storage = mm_parser.mm_placeholder_storage()
     if mm_placeholder_storage:
         text_prompt = _get_full_multimodal_text_prompt(
-            mm_placeholder_storage, texts, interleave_strings
+            mm_placeholder_storage,
+            texts,
+            interleave_strings,
+            multimodal_content_part_separator=multimodal_content_part_separator,
         )
     else:
         text_prompt = "\n".join(texts)
diff --git a/vllm/entrypoints/pooling/scoring/utils.py b/vllm/entrypoints/pooling/scoring/utils.py
index 7330e620f..812a75ab8 100644
--- a/vllm/entrypoints/pooling/scoring/utils.py
+++ b/vllm/entrypoints/pooling/scoring/utils.py
@@ -150,6 +150,7 @@ def _parse_score_content(
         mm_tracker=mm_tracker,
         wrap_dicts=False,
         interleave_strings=False,
+        multimodal_content_part_separator="",
     )
 
     if parse_res: