From b91cb3fa5c40993a1e56ffb6915db9ffebf9aa0a Mon Sep 17 00:00:00 2001
From: Ricardo Decal <crypdick@users.noreply.github.com>
Date: Tue, 8 Jul 2025 02:09:06 -0700
Subject: [PATCH] [Docs] Improve documentation for Deepseek R1 on Ray Serve LLM
 (#20601)

Signed-off-by: Ricardo Decal <rdecal@anyscale.com>
---
 examples/online_serving/ray_serve_deepseek.py | 31 ++++++++++++-------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/examples/online_serving/ray_serve_deepseek.py b/examples/online_serving/ray_serve_deepseek.py
index 9471563dd..d24b553df 100644
--- a/examples/online_serving/ray_serve_deepseek.py
+++ b/examples/online_serving/ray_serve_deepseek.py
@@ -1,13 +1,21 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """
-Example to deploy DeepSeek R1 or V3 with Ray Serve LLM.
-See more details at:
-https://docs.ray.io/en/latest/serve/tutorials/serve-deepseek.html
-And see Ray Serve LLM documentation at:
-https://docs.ray.io/en/latest/serve/llm/serving-llms.html
+Deploy DeepSeek R1 or V3 with Ray Serve LLM.
 
-Run `python3 ray_serve_deepseek.py` to deploy the model.
+Ray Serve LLM is a scalable and production-grade model serving library built
+on the Ray distributed computing framework and first-class support for the vLLM engine.
+
+Key features:
+- Automatic scaling, back-pressure, and load balancing across a Ray cluster.
+- Unified multi-node multi-model deployment.
+- Exposes an OpenAI-compatible HTTP API.
+- Multi-LoRA support with shared base models.
+
+Run `python3 ray_serve_deepseek.py` to launch an endpoint.
+
+Learn more in the official Ray Serve LLM documentation:
+https://docs.ray.io/en/latest/serve/llm/serving-llms.html
 """
 
 from ray import serve
@@ -16,9 +24,8 @@ from ray.serve.llm import LLMConfig, build_openai_app
 llm_config = LLMConfig(
     model_loading_config={
         "model_id": "deepseek",
-        # Since DeepSeek model is huge, it is recommended to pre-download
-        # the model to local disk, say /path/to/the/model and specify:
-        # model_source="/path/to/the/model"
+        # Pre-downloading the model to local storage is recommended since
+        # the model is large. Set model_source="/path/to/the/model".
         "model_source": "deepseek-ai/DeepSeek-R1",
     },
     deployment_config={
@@ -27,10 +34,10 @@ llm_config = LLMConfig(
             "max_replicas": 1,
         }
     },
-    # Change to the accelerator type of the node
+    # Set to the node's accelerator type.
     accelerator_type="H100",
     runtime_env={"env_vars": {"VLLM_USE_V1": "1"}},
-    # Customize engine arguments as needed (e.g. vLLM engine kwargs)
+    # Customize engine arguments as required (for example, vLLM engine kwargs).
     engine_kwargs={
         "tensor_parallel_size": 8,
         "pipeline_parallel_size": 2,
@@ -44,6 +51,6 @@ llm_config = LLMConfig(
     },
 )
 
-# Deploy the application
+# Deploy the application.
 llm_app = build_openai_app({"llm_configs": [llm_config]})
 serve.run(llm_app)