[Core][Doc] Default to multiprocessing for single-node distributed case (#5230)

Co-authored-by: Antoni Baum <antoni.baum@protonmail.com>
2024-06-11 11:10:41 -07:00
parent c4bd03c7c5
commit 99dac099ab
5 changed files with 31 additions and 14 deletions
--- a/tests/spec_decode/e2e/conftest.py
+++ b/tests/spec_decode/e2e/conftest.py
@@ -77,7 +77,11 @@ class AsyncLLM:
            swap_space=swap_space,
            enforce_eager=enforce_eager,
            max_seq_len_to_capture=max_seq_len_to_capture,
+            # For now use ray for the distributed back-end, since
+            # we rely on the use of engine_use_ray=True to avoid
+            # reinitializing CUDA in the same process (driver worker)
            engine_use_ray=True,
+            distributed_executor_backend="ray",
            disable_custom_all_reduce=disable_custom_all_reduce,
            **kwargs,
        )