From f3d8a3467111d861cb814152f9c5c8aeaff335c2 Mon Sep 17 00:00:00 2001
From: "wang.yuqi" <yuqi.wang@daocloud.io>
Date: Tue, 3 Feb 2026 22:43:47 +0800
Subject: [PATCH] [Bugfix] Do not add extra \n for image-only cases when
 constructing multimodal text prompts. (#33647)

Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
---
 vllm/entrypoints/chat_utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index c77c18a58..0077a897d 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -1164,7 +1164,10 @@ def _get_full_multimodal_text_prompt(
 
     # NOTE: Default behaviour: we always add missing placeholders
     # at the front of the prompt, if interleave_strings=False
-    return "\n".join(missing_placeholders + [text_prompt])
+    if text_prompt:
+        return "\n".join(missing_placeholders + [text_prompt])
+    else:
+        return "\n".join(missing_placeholders)
 
 
 # No need to validate using Pydantic again