[Core] Optimize SPMD architecture with delta + serialization optimization (#7109)

2024-08-18 17:57:20 -07:00
parent 200a2ffa6b
commit ff7ec82c4d
36 changed files with 722 additions and 346 deletions
--- a/tests/distributed/test_basic_distributed_correctness.py
+++ b/tests/distributed/test_basic_distributed_correctness.py
@@ -22,7 +22,8 @@ TARGET_TEST_SUITE = os.environ.get("TARGET_TEST_SUITE", "L4")
@pytest.mark.skipif(cuda_device_count_stateless() < 2,
                    reason="Need at least 2 GPUs to run the test.")
@pytest.mark.parametrize(
-    "model, distributed_executor_backend, attention_backend, test_suite", [
+    "model, distributed_executor_backend, attention_backend, "
+    "test_suite", [
        ("facebook/opt-125m", "ray", "", "L4"),
        ("facebook/opt-125m", "mp", "", "L4"),
        ("meta-llama/Llama-2-7b-hf", "ray", "", "L4"),