[Doc] ruff format some Python examples (#26767)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-10-14 18:21:53 +08:00
parent 70b1b330e1
commit ef9676a1f1
20 changed files with 341 additions and 290 deletions
--- a/docs/configuration/conserving_memory.md
+++ b/docs/configuration/conserving_memory.md
@@ -11,8 +11,7 @@ The following code splits the model across 2 GPUs.
 ```python
 from vllm import LLM

-llm = LLM(model="ibm-granite/granite-3.1-8b-instruct",
-          tensor_parallel_size=2)
+llm = LLM(model="ibm-granite/granite-3.1-8b-instruct", tensor_parallel_size=2)
 ```

 !!! warning
@@ -43,9 +42,7 @@ and the maximum batch size (`max_num_seqs` option).
 ```python
 from vllm import LLM

-llm = LLM(model="adept/fuyu-8b",
-          max_model_len=2048,
-          max_num_seqs=2)
+llm = LLM(model="adept/fuyu-8b", max_model_len=2048, max_num_seqs=2)
 ```

 ## Reduce CUDA Graphs
@@ -78,8 +75,7 @@ You can disable graph capturing completely via the `enforce_eager` flag:
 ```python
 from vllm import LLM

-llm = LLM(model="meta-llama/Llama-3.1-8B-Instruct",
-          enforce_eager=True)
+llm = LLM(model="meta-llama/Llama-3.1-8B-Instruct", enforce_eager=True)
 ```

 ## Adjust cache size
@@ -97,8 +93,10 @@ You can allow a smaller number of multi-modal items per prompt to reduce the mem
 from vllm import LLM

 # Accept up to 3 images and 1 video per prompt
-llm = LLM(model="Qwen/Qwen2.5-VL-3B-Instruct",
-          limit_mm_per_prompt={"image": 3, "video": 1})
+llm = LLM(
+    model="Qwen/Qwen2.5-VL-3B-Instruct",
+    limit_mm_per_prompt={"image": 3, "video": 1},
+)
 ```

 You can go a step further and disable unused modalities completely by setting its limit to zero.
@@ -108,8 +106,10 @@ For example, if your application only accepts image input, there is no need to a
 from vllm import LLM

 # Accept any number of images but no videos
-llm = LLM(model="Qwen/Qwen2.5-VL-3B-Instruct",
-          limit_mm_per_prompt={"video": 0})
+llm = LLM(
+    model="Qwen/Qwen2.5-VL-3B-Instruct",
+    limit_mm_per_prompt={"video": 0},
+)
 ```

 You can even run a multi-modal model for text-only inference:
@@ -118,8 +118,10 @@ You can even run a multi-modal model for text-only inference:
 from vllm import LLM

 # Don't accept images. Just text.
-llm = LLM(model="google/gemma-3-27b-it",
-          limit_mm_per_prompt={"image": 0})
+llm = LLM(
+    model="google/gemma-3-27b-it",
+    limit_mm_per_prompt={"image": 0},
+)
 ```

 ### Configurable options
@@ -173,14 +175,14 @@ Here are some examples:
 from vllm import LLM

 # Available for Qwen2-VL series models
-llm = LLM(model="Qwen/Qwen2.5-VL-3B-Instruct",
-          mm_processor_kwargs={
-              "max_pixels": 768 * 768,  # Default is 1280 * 28 * 28
-          })
+llm = LLM(
+    model="Qwen/Qwen2.5-VL-3B-Instruct",
+    mm_processor_kwargs={"max_pixels": 768 * 768},  # Default is 1280 * 28 * 28
+)

 # Available for InternVL series models
-llm = LLM(model="OpenGVLab/InternVL2-2B",
-          mm_processor_kwargs={
-              "max_dynamic_patch": 4,  # Default is 12
-          })
+llm = LLM(
+    model="OpenGVLab/InternVL2-2B",
+    mm_processor_kwargs={"max_dynamic_patch": 4},  # Default is 12
+)
 ```