diff --git a/tests/entrypoints/weight_transfer/test_weight_transfer_llm.py b/tests/entrypoints/weight_transfer/test_weight_transfer_llm.py index 9f2309c76..cd13aca7e 100644 --- a/tests/entrypoints/weight_transfer/test_weight_transfer_llm.py +++ b/tests/entrypoints/weight_transfer/test_weight_transfer_llm.py @@ -124,6 +124,8 @@ def test_init_weight_transfer_engine_calls_engine(): if torch.cuda.device_count() < 1: pytest.skip("Need at least 1 GPU for this test") + # Run in-process so mock.patch works (spawn won't inherit the mock) + os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0" # Enable insecure serialization to allow pickling functions for collective_rpc os.environ["VLLM_ALLOW_INSECURE_SERIALIZATION"] = "1" @@ -171,6 +173,8 @@ def test_update_weights_calls_engine(): if torch.cuda.device_count() < 1: pytest.skip("Need at least 1 GPU for this test") + # Run in-process so mock.patch works (spawn won't inherit the mock) + os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0" # Enable insecure serialization to allow pickling functions for collective_rpc os.environ["VLLM_ALLOW_INSECURE_SERIALIZATION"] = "1" @@ -228,6 +232,8 @@ def test_full_weight_transfer_flow(): if torch.cuda.device_count() < 1: pytest.skip("Need at least 1 GPU for this test") + # Run in-process so mock.patch works (spawn won't inherit the mock) + os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0" # Enable insecure serialization to allow pickling functions for collective_rpc os.environ["VLLM_ALLOW_INSECURE_SERIALIZATION"] = "1"