diff --git a/tests/distributed/test_weight_transfer.py b/tests/distributed/test_weight_transfer.py index 04747e732..b370721b3 100644 --- a/tests/distributed/test_weight_transfer.py +++ b/tests/distributed/test_weight_transfer.py @@ -456,11 +456,13 @@ class TestIPCWeightTransferUpdateInfoValidation: ipc_handles=ipc_handles, ) - def test_valid_update_info_from_pickled(self): + def test_valid_update_info_from_pickled(self, monkeypatch): """Test creating IPCWeightTransferUpdateInfo from pickled handles.""" if torch.cuda.device_count() < 1: pytest.skip("Need at least 1 GPU for this test") + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") + dummy_tensor = torch.ones(10, 10, device="cuda:0") ipc_handle = reduce_tensor(dummy_tensor) gpu_uuid = str(torch.cuda.get_device_properties(0).uuid) @@ -477,6 +479,18 @@ class TestIPCWeightTransferUpdateInfoValidation: assert info.ipc_handles == ipc_handles assert info.ipc_handles_pickled is None + def test_pickled_requires_insecure_serialization_flag(self, monkeypatch): + """Test that pickled handles are rejected unless env flag is enabled.""" + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "0") + + with pytest.raises(ValueError, match="VLLM_ALLOW_INSECURE_SERIALIZATION=1"): + IPCWeightTransferUpdateInfo( + names=[], + dtype_names=[], + shapes=[], + ipc_handles_pickled=base64.b64encode(pickle.dumps([])).decode("utf-8"), + ) + def test_both_handles_and_pickled_raises(self): """Test that providing both ipc_handles and ipc_handles_pickled raises.""" if torch.cuda.device_count() < 1: @@ -556,11 +570,13 @@ class TestIPCEngineParsing: assert update_info.shapes == [[100, 100], [50]] assert len(update_info.ipc_handles) == 2 - def test_parse_update_info_pickled(self): + def test_parse_update_info_pickled(self, monkeypatch): """Test parsing update info with pickled IPC handles (HTTP path).""" if torch.cuda.device_count() < 1: pytest.skip("Need at least 1 GPU for this test") + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") + config = WeightTransferConfig(backend="ipc") parallel_config = create_mock_parallel_config() engine = IPCWeightTransferEngine(config, parallel_config) diff --git a/vllm/distributed/weight_transfer/ipc_engine.py b/vllm/distributed/weight_transfer/ipc_engine.py index 2edbec625..85dd34553 100644 --- a/vllm/distributed/weight_transfer/ipc_engine.py +++ b/vllm/distributed/weight_transfer/ipc_engine.py @@ -12,6 +12,7 @@ import requests import torch from torch.multiprocessing.reductions import reduce_tensor +from vllm import envs from vllm.config.parallel import ParallelConfig from vllm.config.weight_transfer import WeightTransferConfig from vllm.distributed.weight_transfer.base import ( @@ -74,6 +75,13 @@ class IPCWeightTransferUpdateInfo(WeightTransferUpdateInfo): raise ValueError( "Cannot specify both `ipc_handles` and `ipc_handles_pickled`" ) + + if not envs.VLLM_ALLOW_INSECURE_SERIALIZATION: + raise ValueError( + "Refusing to deserialize `ipc_handles_pickled` without " + "VLLM_ALLOW_INSECURE_SERIALIZATION=1" + ) + self.ipc_handles = pickle.loads(base64.b64decode(self.ipc_handles_pickled)) self.ipc_handles_pickled = None