diff --git a/docs/serving/openai_compatible_server.md b/docs/serving/openai_compatible_server.md
index fb4b0b634..3020cb7b4 100644
--- a/docs/serving/openai_compatible_server.md
+++ b/docs/serving/openai_compatible_server.md
@@ -362,7 +362,7 @@ and passing a list of `messages` in the request. Refer to the examples below for
         `MrLight/dse-qwen2-2b-mrl-v1` requires a placeholder image of the minimum image size for text query embeddings. See the full code
         example below for details.
 
-Full example: [examples/pooling/embed/openai_chat_embedding_client_for_multimodal.py](../../examples/pooling/embed/openai_chat_embedding_client_for_multimodal.py)
+Full example: [examples/pooling/embed/vision_embedding_online.py](../../examples/pooling/embed/vision_embedding_online.py)
 
 #### Extra parameters
 
@@ -667,7 +667,7 @@ Usually, the score for a sentence pair refers to the similarity between two sent
 
 You can find the documentation for cross encoder models at [sbert.net](https://www.sbert.net/docs/package_reference/cross_encoder/cross_encoder.html).
 
-Code example: [examples/pooling/score/openai_cross_encoder_score.py](../../examples/pooling/score/openai_cross_encoder_score.py)
+Code example: [examples/pooling/score/score_api_online.py](../../examples/pooling/score/score_api_online.py)
 
 #### Score Template
 
@@ -863,7 +863,10 @@ You can pass multi-modal inputs to scoring models by passing `content` including
         print("Scoring output:", response_json["data"][0]["score"])
         print("Scoring output:", response_json["data"][1]["score"])
         ```
-Full example: [examples/pooling/score/openai_cross_encoder_score_for_multimodal.py](../../examples/pooling/score/openai_cross_encoder_score_for_multimodal.py)
+Full example:
+
+- [examples/pooling/score/vision_score_api_online.py](../../examples/pooling/score/vision_score_api_online.py)
+- examples/pooling/score/vision_rerank_api_online.py](../../examples/pooling/score/vision_rerank_api_online.py)
 
 #### Extra parameters
 
@@ -893,7 +896,7 @@ endpoints are compatible with both [Jina AI's re-rank API interface](https://jin
 [Cohere's re-rank API interface](https://docs.cohere.com/v2/reference/rerank) to ensure compatibility with
 popular open-source tools.
 
-Code example: [examples/pooling/score/openai_reranker.py](../../examples/pooling/score/openai_reranker.py)
+Code example: [examples/pooling/score/rerank_api_online.py](../../examples/pooling/score/rerank_api_online.py)
 
 #### Example Request
 
diff --git a/examples/pooling/embed/vision_embedding_offline.py b/examples/pooling/embed/vision_embedding_offline.py
new file mode 100644
index 000000000..ef272bade
--- /dev/null
+++ b/examples/pooling/embed/vision_embedding_offline.py
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# ruff: noqa: E501
+"""
+This example shows how to use vLLM for running offline inference with
+the correct prompt format on vision language models for multimodal embedding.
+
+For most models, the prompt format should follow corresponding examples
+on HuggingFace model repository.
+"""
+
+import argparse
+from dataclasses import asdict
+
+from vllm import LLM, EngineArgs
+from vllm.multimodal.utils import fetch_image
+
+image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/cat_snow.jpg"
+text = "A cat standing in the snow."
+multi_modal_data = {"image": fetch_image(image_url)}
+
+
+def print_embeddings(embeds):
+    embeds_trimmed = (str(embeds[:4])[:-1] + ", ...]") if len(embeds) > 4 else embeds
+    print(f"Embeddings: {embeds_trimmed} (size={len(embeds)})")
+
+
+def run_qwen3_vl():
+    engine_args = EngineArgs(
+        model="Qwen/Qwen3-VL-Embedding-2B",
+        runner="pooling",
+        max_model_len=8192,
+        limit_mm_per_prompt={"image": 1},
+    )
+    default_instruction = "Represent the user's input."
+    image_placeholder = "<|vision_start|><|image_pad|><|vision_end|>"
+    text_prompt = f"<|im_start|>system\n{default_instruction}<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n"
+    image_prompt = f"<|im_start|>system\n{default_instruction}<|im_end|>\n<|im_start|>user\n{image_placeholder}<|im_end|>\n<|im_start|>assistant\n"
+    image_text_prompt = f"<|im_start|>system\n{default_instruction}<|im_end|>\n<|im_start|>user\n{image_placeholder}{text}<|im_end|>\n<|im_start|>assistant\n"
+
+    llm = LLM(**asdict(engine_args))
+
+    print("Text embedding output:")
+    outputs = llm.embed(text_prompt, use_tqdm=False)
+    print_embeddings(outputs[0].outputs.embedding)
+
+    print("Image embedding output:")
+    outputs = llm.embed(
+        {
+            "prompt": image_prompt,
+            "multi_modal_data": multi_modal_data,
+        },
+        use_tqdm=False,
+    )
+    print_embeddings(outputs[0].outputs.embedding)
+
+    print("Image+Text embedding output:")
+    outputs = llm.embed(
+        {
+            "prompt": image_text_prompt,
+            "multi_modal_data": multi_modal_data,
+        },
+        use_tqdm=False,
+    )
+    print_embeddings(outputs[0].outputs.embedding)
+
+
+model_example_map = {
+    "qwen3_vl": run_qwen3_vl,
+}
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        "Script to run a specified VLM through vLLM offline api."
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        choices=model_example_map.keys(),
+        required=True,
+        help="The name of the embedding model.",
+    )
+    return parser.parse_args()
+
+
+def main(args):
+    model_example_map[args.model]()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
diff --git a/examples/pooling/embed/openai_chat_embedding_client_for_multimodal.py b/examples/pooling/embed/vision_embedding_online.py
similarity index 66%
rename from examples/pooling/embed/openai_chat_embedding_client_for_multimodal.py
rename to examples/pooling/embed/vision_embedding_online.py
index a7ab7e73e..66c824739 100644
--- a/examples/pooling/embed/openai_chat_embedding_client_for_multimodal.py
+++ b/examples/pooling/embed/vision_embedding_online.py
@@ -21,7 +21,8 @@ from PIL import Image
 openai_api_key = "EMPTY"
 openai_api_base = "http://localhost:8000/v1"
 
-image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/vision_model_images/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+image_url = "https://vllm-public-assets.s3.us-west-2.amazonaws.com/multimodal_asset/cat_snow.jpg"
+text = "A cat standing in the snow."
 
 
 def create_chat_embeddings(
@@ -30,6 +31,8 @@ def create_chat_embeddings(
     messages: list[ChatCompletionMessageParam],
     model: str,
     encoding_format: Literal["base64", "float"] | NotGiven = NOT_GIVEN,
+    continue_final_message: bool = False,
+    add_special_tokens: bool = False,
 ) -> CreateEmbeddingResponse:
     """
     Convenience function for accessing vLLM's Chat Embeddings API,
@@ -38,10 +41,21 @@ def create_chat_embeddings(
     return client.post(
         "/embeddings",
         cast_to=CreateEmbeddingResponse,
-        body={"messages": messages, "model": model, "encoding_format": encoding_format},
+        body={
+            "messages": messages,
+            "model": model,
+            "encoding_format": encoding_format,
+            "continue_final_message": continue_final_message,
+            "add_special_tokens": add_special_tokens,
+        },
     )
 
 
+def print_embeddings(embeds):
+    embeds_trimmed = (str(embeds[:4])[:-1] + ", ...]") if len(embeds) > 4 else embeds
+    print(f"Embeddings: {embeds_trimmed} (size={len(embeds)})")
+
+
 def run_clip(client: OpenAI, model: str):
     """
     Start the server using:
@@ -145,6 +159,113 @@ def run_dse_qwen2_vl(client: OpenAI, model: str):
     print("Text embedding output:", response.data[0].embedding)
 
 
+def run_qwen3_vl(client: OpenAI, model: str):
+    """
+    Start the server using:
+
+    vllm serve Qwen/Qwen3-VL-Embedding-2B \
+        --runner pooling \
+        --max-model-len 8192
+    """
+
+    default_instruction = "Represent the user's input."
+
+    print("Text embedding output:")
+    response = create_chat_embeddings(
+        client,
+        messages=[
+            {
+                "role": "system",
+                "content": [
+                    {"type": "text", "text": default_instruction},
+                ],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": text},
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": [
+                    {"type": "text", "text": ""},
+                ],
+            },
+        ],
+        model=model,
+        encoding_format="float",
+        continue_final_message=True,
+        add_special_tokens=True,
+    )
+    print_embeddings(response.data[0].embedding)
+
+    print("Image embedding output:")
+    response = create_chat_embeddings(
+        client,
+        messages=[
+            {
+                "role": "system",
+                "content": [
+                    {"type": "text", "text": default_instruction},
+                ],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                    {"type": "text", "text": ""},
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": [
+                    {"type": "text", "text": ""},
+                ],
+            },
+        ],
+        model=model,
+        encoding_format="float",
+        continue_final_message=True,
+        add_special_tokens=True,
+    )
+    print_embeddings(response.data[0].embedding)
+
+    print("Image+Text embedding output:")
+    response = create_chat_embeddings(
+        client,
+        messages=[
+            {
+                "role": "system",
+                "content": [
+                    {"type": "text", "text": default_instruction},
+                ],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                    {
+                        "type": "text",
+                        "text": f"{text}",
+                    },
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": [
+                    {"type": "text", "text": ""},
+                ],
+            },
+        ],
+        model=model,
+        encoding_format="float",
+        continue_final_message=True,
+        add_special_tokens=True,
+    )
+    print_embeddings(response.data[0].embedding)
+
+
 def run_siglip(client: OpenAI, model: str):
     """
     Start the server using:
@@ -213,7 +334,8 @@ def run_vlm2vec(client: OpenAI, model: str):
         encoding_format="float",
     )
 
-    print("Image embedding output:", response.data[0].embedding)
+    print("Image embedding output:")
+    print_embeddings(response.data[0].embedding)
 
     response = create_chat_embeddings(
         client,
@@ -233,7 +355,8 @@ def run_vlm2vec(client: OpenAI, model: str):
         encoding_format="float",
     )
 
-    print("Image+Text embedding output:", response.data[0].embedding)
+    print("Image+Text embedding output:")
+    print_embeddings(response.data[0].embedding)
 
     response = create_chat_embeddings(
         client,
@@ -249,11 +372,13 @@ def run_vlm2vec(client: OpenAI, model: str):
         encoding_format="float",
     )
 
-    print("Text embedding output:", response.data[0].embedding)
+    print("Text embedding output:")
+    print_embeddings(response.data[0].embedding)
 
 
 model_example_map = {
     "clip": run_clip,
+    "qwen3_vl": run_qwen3_vl,
     "dse_qwen2_vl": run_dse_qwen2_vl,
     "siglip": run_siglip,
     "vlm2vec": run_vlm2vec,
diff --git a/examples/pooling/score/cohere_rerank_client.py b/examples/pooling/score/cohere_rerank_online.py
similarity index 100%
rename from examples/pooling/score/cohere_rerank_client.py
rename to examples/pooling/score/cohere_rerank_online.py
diff --git a/examples/pooling/score/openai_cross_encoder_score_for_multimodal.py b/examples/pooling/score/openai_cross_encoder_score_for_multimodal.py
deleted file mode 100644
index 80ed2c27d..000000000
--- a/examples/pooling/score/openai_cross_encoder_score_for_multimodal.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""
-Example online usage of Score API.
-
-Run `vllm serve <model> --runner pooling` to start up the server in vLLM.
-"""
-
-import argparse
-import pprint
-
-import requests
-
-
-def post_http_request(prompt: dict, api_url: str) -> requests.Response:
-    headers = {"User-Agent": "Test Client"}
-    response = requests.post(api_url, headers=headers, json=prompt)
-    return response
-
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--host", type=str, default="localhost")
-    parser.add_argument("--port", type=int, default=8000)
-    parser.add_argument("--model", type=str, default="jinaai/jina-reranker-m0")
-    return parser.parse_args()
-
-
-def main(args):
-    api_url = f"http://{args.host}:{args.port}/score"
-    model_name = args.model
-
-    text_1 = "slm markdown"
-    text_2 = {
-        "content": [
-            {
-                "type": "image_url",
-                "image_url": {
-                    "url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/handelsblatt-preview.png"
-                },
-            },
-            {
-                "type": "image_url",
-                "image_url": {
-                    "url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png"
-                },
-            },
-        ]
-    }
-    prompt = {"model": model_name, "text_1": text_1, "text_2": text_2}
-    score_response = post_http_request(prompt=prompt, api_url=api_url)
-    print("\nPrompt when text_1 is string and text_2 is a image list:")
-    pprint.pprint(prompt)
-    print("\nScore Response:")
-    pprint.pprint(score_response.json())
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    main(args)
diff --git a/examples/pooling/score/openai_reranker.py b/examples/pooling/score/rerank_api_online.py
similarity index 100%
rename from examples/pooling/score/openai_reranker.py
rename to examples/pooling/score/rerank_api_online.py
diff --git a/examples/pooling/score/openai_cross_encoder_score.py b/examples/pooling/score/score_api_online.py
similarity index 100%
rename from examples/pooling/score/openai_cross_encoder_score.py
rename to examples/pooling/score/score_api_online.py
diff --git a/examples/pooling/score/vision_rerank_api_online.py b/examples/pooling/score/vision_rerank_api_online.py
new file mode 100644
index 000000000..37a7decf3
--- /dev/null
+++ b/examples/pooling/score/vision_rerank_api_online.py
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# ruff: noqa: E501
+
+"""
+Example Python client for multimodal rerank API which is compatible with
+Jina and Cohere https://jina.ai/reranker
+
+Run `vllm serve <model> --runner pooling` to start up the server in vLLM.
+e.g.
+    vllm serve jinaai/jina-reranker-m0 --runner pooling
+
+    vllm serve Qwen/Qwen3-VL-Reranker-2B \
+        --runner pooling \
+        --max-model-len 4096 \
+        --hf_overrides '{"architectures": ["Qwen3VLForSequenceClassification"],"classifier_from_token": ["no", "yes"],"is_original_qwen3_reranker": true}' \
+        --chat-template examples/pooling/score/template/qwen3_vl_reranker.jinja
+"""
+
+import argparse
+import json
+
+import requests
+
+headers = {"accept": "application/json", "Content-Type": "application/json"}
+
+query = "A woman playing with her dog on a beach at sunset."
+documents = {
+    "content": [
+        {
+            "type": "text",
+            "text": (
+                "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, "  # noqa: E501
+                "as the dog offers its paw in a heartwarming display of companionship and trust."  # noqa: E501
+            ),
+        },
+        {
+            "type": "image_url",
+            "image_url": {
+                "url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
+            },
+        },
+    ]
+}
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="localhost")
+    parser.add_argument("--port", type=int, default=8000)
+    return parser.parse_args()
+
+
+def main(args):
+    base_url = f"http://{args.host}:{args.port}"
+    models_url = base_url + "/v1/models"
+    rerank_url = base_url + "/rerank"
+
+    response = requests.get(models_url, headers=headers)
+    model = response.json()["data"][0]["id"]
+
+    data = {
+        "model": model,
+        "query": query,
+        "documents": documents,
+    }
+    response = requests.post(rerank_url, headers=headers, json=data)
+
+    # Check the response
+    if response.status_code == 200:
+        print("Request successful!")
+        print(json.dumps(response.json(), indent=2))
+    else:
+        print(f"Request failed with status code: {response.status_code}")
+        print(response.text)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
diff --git a/examples/pooling/score/vision_language_reranker.py b/examples/pooling/score/vision_reranker_offline.py
similarity index 100%
rename from examples/pooling/score/vision_language_reranker.py
rename to examples/pooling/score/vision_reranker_offline.py
diff --git a/examples/pooling/score/vision_score_api_online.py b/examples/pooling/score/vision_score_api_online.py
new file mode 100644
index 000000000..b4b4825ee
--- /dev/null
+++ b/examples/pooling/score/vision_score_api_online.py
@@ -0,0 +1,71 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# ruff: noqa: E501
+
+"""
+Example online usage of Score API.
+
+Run `vllm serve <model> --runner pooling` to start up the server in vLLM.
+e.g.
+    vllm serve jinaai/jina-reranker-m0 --runner pooling
+
+    vllm serve Qwen/Qwen3-VL-Reranker-2B \
+        --runner pooling \
+        --max-model-len 4096 \
+        --hf_overrides '{"architectures": ["Qwen3VLForSequenceClassification"],"classifier_from_token": ["no", "yes"],"is_original_qwen3_reranker": true}' \
+        --chat-template examples/pooling/score/template/qwen3_vl_reranker.jinja
+"""
+
+import argparse
+import json
+import pprint
+
+import requests
+
+headers = {"accept": "application/json", "Content-Type": "application/json"}
+
+text_1 = "slm markdown"
+text_2 = {
+    "content": [
+        {
+            "type": "image_url",
+            "image_url": {
+                "url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/handelsblatt-preview.png"
+            },
+        },
+        {
+            "type": "image_url",
+            "image_url": {
+                "url": "https://raw.githubusercontent.com/jina-ai/multimodal-reranker-test/main/paper-11.png"
+            },
+        },
+    ]
+}
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="localhost")
+    parser.add_argument("--port", type=int, default=8000)
+    return parser.parse_args()
+
+
+def main(args):
+    base_url = f"http://{args.host}:{args.port}"
+    models_url = base_url + "/v1/models"
+    score_url = base_url + "/score"
+
+    response = requests.get(models_url, headers=headers)
+    model = response.json()["data"][0]["id"]
+
+    prompt = {"model": model, "text_1": text_1, "text_2": text_2}
+    response = requests.post(score_url, headers=headers, json=prompt)
+    print("\nPrompt when text_1 is string and text_2 is a image list:")
+    pprint.pprint(prompt)
+    print("\nScore Response:")
+    print(json.dumps(response.json(), indent=2))
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)