[Doc] Update reproducibility doc and example (#18741)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-05-27 15:03:13 +08:00
parent 25a817f202
commit 753944fa9b
2 changed files with 47 additions and 43 deletions
--- a/examples/offline_inference/reproducibility.py
+++ b/examples/offline_inference/reproducibility.py
@@ -1,24 +1,22 @@
 # SPDX-License-Identifier: Apache-2.0
+"""
+Demonstrates how to achieve reproducibility in vLLM.
+
+Main article: https://docs.vllm.ai/en/latest/usage/reproducibility.html
+"""
+
 import os
+import random

 from vllm import LLM, SamplingParams

-# vLLM does not guarantee the reproducibility of the results by default,
-# for the sake of performance. You need to do the following to achieve
-# reproducible results:
-# 1. Turn off multiprocessing to make the scheduling deterministic.
-#    NOTE(woosuk): This is not needed and will be ignored for V0.
+# V1 only: Turn off multiprocessing to make the scheduling deterministic.
 os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
-# 2. Fix the global seed for reproducibility. The default seed is None, which is
+
+# V0 only: Set the global seed. The default seed is None, which is
 # not reproducible.
 SEED = 42

-# NOTE(woosuk): Even with the above two settings, vLLM only provides
-# reproducibility when it runs on the same hardware and the same vLLM version.
-# Also, the online serving API (`vllm serve`) does not support reproducibility
-# because it is almost impossible to make the scheduling deterministic in the
-# online serving setting.
-
 prompts = [
    "Hello, my name is",
    "The president of the United States is",
@@ -38,6 +36,11 @@ def main():
        print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}")
        print("-" * 50)

+    # Try generating random numbers outside vLLM
+    # The same number is output across runs, meaning that the random state
+    # in the user code has been updated by vLLM
+    print(random.randint(0, 100))
+

 if __name__ == "__main__":
    main()