[V1] Add VLLM_ALLOW_INSECURE_SERIALIZATION env var (#17490)

Signed-off-by: Russell Bryant <rbryant@redhat.com> Signed-off-by: Nick Hill <nhill@redhat.com> Co-authored-by: Nick Hill <nhill@redhat.com>
2025-05-08 01:34:02 -04:00
parent 998eea4a0e
commit 6930a41116
4 changed files with 170 additions and 115 deletions
--- a/tests/v1/test_serial_utils.py
+++ b/tests/v1/test_serial_utils.py
@@ -9,8 +9,8 @@ import pytest
 import torch

 from vllm.multimodal.inputs import (MultiModalBatchedField,
-                                    MultiModalFieldElem, MultiModalKwargs,
-                                    MultiModalKwargsItem,
+                                    MultiModalFieldElem, MultiModalFlatField,
+                                    MultiModalKwargs, MultiModalKwargsItem,
                                    MultiModalSharedField, NestedTensors)
 from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder

@@ -36,59 +36,62 @@ class MyType:
    empty_tensor: torch.Tensor


-def test_encode_decode():
+def test_encode_decode(monkeypatch: pytest.MonkeyPatch):
    """Test encode/decode loop with zero-copy tensors."""

-    obj = MyType(
-        tensor1=torch.randint(low=0,
-                              high=100,
-                              size=(1024, ),
-                              dtype=torch.int32),
-        a_string="hello",
-        list_of_tensors=[
-            torch.rand((1, 10), dtype=torch.float32),
-            torch.rand((3, 5, 4000), dtype=torch.float64),
-            torch.tensor(1984),  # test scalar too
-            # Make sure to test bf16 which numpy doesn't support.
-            torch.rand((3, 5, 1000), dtype=torch.bfloat16),
-            torch.tensor([float("-inf"), float("inf")] * 1024,
-                         dtype=torch.bfloat16),
-        ],
-        numpy_array=np.arange(512),
-        unrecognized=UnrecognizedType(33),
-        small_f_contig_tensor=torch.rand(5, 4).t(),
-        large_f_contig_tensor=torch.rand(1024, 4).t(),
-        small_non_contig_tensor=torch.rand(2, 4)[:, 1:3],
-        large_non_contig_tensor=torch.rand(1024, 512)[:, 10:20],
-        empty_tensor=torch.empty(0),
-    )
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")

-    encoder = MsgpackEncoder(size_threshold=256)
-    decoder = MsgpackDecoder(MyType)
+        obj = MyType(
+            tensor1=torch.randint(low=0,
+                                  high=100,
+                                  size=(1024, ),
+                                  dtype=torch.int32),
+            a_string="hello",
+            list_of_tensors=[
+                torch.rand((1, 10), dtype=torch.float32),
+                torch.rand((3, 5, 4000), dtype=torch.float64),
+                torch.tensor(1984),  # test scalar too
+                # Make sure to test bf16 which numpy doesn't support.
+                torch.rand((3, 5, 1000), dtype=torch.bfloat16),
+                torch.tensor([float("-inf"), float("inf")] * 1024,
+                             dtype=torch.bfloat16),
+            ],
+            numpy_array=np.arange(512),
+            unrecognized=UnrecognizedType(33),
+            small_f_contig_tensor=torch.rand(5, 4).t(),
+            large_f_contig_tensor=torch.rand(1024, 4).t(),
+            small_non_contig_tensor=torch.rand(2, 4)[:, 1:3],
+            large_non_contig_tensor=torch.rand(1024, 512)[:, 10:20],
+            empty_tensor=torch.empty(0),
+        )

-    encoded = encoder.encode(obj)
+        encoder = MsgpackEncoder(size_threshold=256)
+        decoder = MsgpackDecoder(MyType)

-    # There should be the main buffer + 4 large tensor buffers
-    # + 1 large numpy array. "large" is <= 512 bytes.
-    # The two small tensors are encoded inline.
-    assert len(encoded) == 8
+        encoded = encoder.encode(obj)

-    decoded: MyType = decoder.decode(encoded)
+        # There should be the main buffer + 4 large tensor buffers
+        # + 1 large numpy array. "large" is <= 512 bytes.
+        # The two small tensors are encoded inline.
+        assert len(encoded) == 8

-    assert_equal(decoded, obj)
+        decoded: MyType = decoder.decode(encoded)

-    # Test encode_into case
+        assert_equal(decoded, obj)

-    preallocated = bytearray()
+        # Test encode_into case

-    encoded2 = encoder.encode_into(obj, preallocated)
+        preallocated = bytearray()

-    assert len(encoded2) == 8
-    assert encoded2[0] is preallocated
+        encoded2 = encoder.encode_into(obj, preallocated)

-    decoded2: MyType = decoder.decode(encoded2)
+        assert len(encoded2) == 8
+        assert encoded2[0] is preallocated

-    assert_equal(decoded2, obj)
+        decoded2: MyType = decoder.decode(encoded2)
+
+        assert_equal(decoded2, obj)


 class MyRequest(msgspec.Struct):
@@ -122,7 +125,7 @@ def test_multimodal_kwargs():
    total_len = sum(memoryview(x).cast("B").nbytes for x in encoded)

    # expected total encoding length, should be 44559, +-20 for minor changes
-    assert total_len >= 44539 and total_len <= 44579
+    assert 44539 <= total_len <= 44579
    decoded: MultiModalKwargs = decoder.decode(encoded).mm[0]
    assert all(nested_equal(d[k], decoded[k]) for k in d)

@@ -135,14 +138,15 @@ def test_multimodal_items_by_modality():
        "video",
        "v0",
        [torch.zeros(1000, dtype=torch.int8) for _ in range(4)],
-        MultiModalBatchedField(),
+        MultiModalFlatField(
+            [[slice(1, 2, 3), slice(4, 5, 6)], [slice(None, 2)]], 0),
    )
    e3 = MultiModalFieldElem("image", "i0", torch.zeros(1000,
                                                        dtype=torch.int32),
                             MultiModalSharedField(4))
-    e4 = MultiModalFieldElem("image", "i1", torch.zeros(1000,
-                                                        dtype=torch.int32),
-                             MultiModalBatchedField())
+    e4 = MultiModalFieldElem(
+        "image", "i1", torch.zeros(1000, dtype=torch.int32),
+        MultiModalFlatField([slice(1, 2, 3), slice(4, 5, 6)], 2))
    audio = MultiModalKwargsItem.from_elems([e1])
    video = MultiModalKwargsItem.from_elems([e2])
    image = MultiModalKwargsItem.from_elems([e3, e4])
@@ -161,7 +165,7 @@ def test_multimodal_items_by_modality():
    total_len = sum(memoryview(x).cast("B").nbytes for x in encoded)

    # expected total encoding length, should be 14255, +-20 for minor changes
-    assert total_len >= 14235 and total_len <= 14275
+    assert 14250 <= total_len <= 14300
    decoded: MultiModalKwargs = decoder.decode(encoded).mm[0]

    # check all modalities were recovered and do some basic sanity checks
@@ -178,8 +182,7 @@ def test_multimodal_items_by_modality():
 def nested_equal(a: NestedTensors, b: NestedTensors):
    if isinstance(a, torch.Tensor):
        return torch.equal(a, b)
-    else:
-        return all(nested_equal(x, y) for x, y in zip(a, b))
+    return all(nested_equal(x, y) for x, y in zip(a, b))


 def assert_equal(obj1: MyType, obj2: MyType):
@@ -199,11 +202,10 @@ def assert_equal(obj1: MyType, obj2: MyType):
    assert torch.equal(obj1.empty_tensor, obj2.empty_tensor)


-@pytest.mark.parametrize("allow_pickle", [True, False])
-def test_dict_serialization(allow_pickle: bool):
+def test_dict_serialization():
    """Test encoding and decoding of a generic Python object using pickle."""
-    encoder = MsgpackEncoder(allow_pickle=allow_pickle)
-    decoder = MsgpackDecoder(allow_pickle=allow_pickle)
+    encoder = MsgpackEncoder()
+    decoder = MsgpackDecoder()

    # Create a sample Python object
    obj = {"key": "value", "number": 42}
@@ -218,11 +220,10 @@ def test_dict_serialization(allow_pickle: bool):
    assert obj == decoded, "Decoded object does not match the original object."


-@pytest.mark.parametrize("allow_pickle", [True, False])
-def test_tensor_serialization(allow_pickle: bool):
+def test_tensor_serialization():
    """Test encoding and decoding of a torch.Tensor."""
-    encoder = MsgpackEncoder(allow_pickle=allow_pickle)
-    decoder = MsgpackDecoder(torch.Tensor, allow_pickle=allow_pickle)
+    encoder = MsgpackEncoder()
+    decoder = MsgpackDecoder(torch.Tensor)

    # Create a sample tensor
    tensor = torch.rand(10, 10)
@@ -238,11 +239,10 @@ def test_tensor_serialization(allow_pickle: bool):
        tensor, decoded), "Decoded tensor does not match the original tensor."


-@pytest.mark.parametrize("allow_pickle", [True, False])
-def test_numpy_array_serialization(allow_pickle: bool):
+def test_numpy_array_serialization():
    """Test encoding and decoding of a numpy array."""
-    encoder = MsgpackEncoder(allow_pickle=allow_pickle)
-    decoder = MsgpackDecoder(np.ndarray, allow_pickle=allow_pickle)
+    encoder = MsgpackEncoder()
+    decoder = MsgpackDecoder(np.ndarray)

    # Create a sample numpy array
    array = np.random.rand(10, 10)
@@ -268,26 +268,31 @@ class CustomClass:
        return isinstance(other, CustomClass) and self.value == other.value


-def test_custom_class_serialization_allowed_with_pickle():
+def test_custom_class_serialization_allowed_with_pickle(
+        monkeypatch: pytest.MonkeyPatch):
    """Test that serializing a custom class succeeds when allow_pickle=True."""
-    encoder = MsgpackEncoder(allow_pickle=True)
-    decoder = MsgpackDecoder(CustomClass, allow_pickle=True)

-    obj = CustomClass("test_value")
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
+        encoder = MsgpackEncoder()
+        decoder = MsgpackDecoder(CustomClass)

-    # Encode the custom class
-    encoded = encoder.encode(obj)
+        obj = CustomClass("test_value")

-    # Decode the custom class
-    decoded = decoder.decode(encoded)
+        # Encode the custom class
+        encoded = encoder.encode(obj)

-    # Verify the decoded object matches the original
-    assert obj == decoded, "Decoded object does not match the original object."
+        # Decode the custom class
+        decoded = decoder.decode(encoded)
+
+        # Verify the decoded object matches the original
+        assert obj == decoded, (
+            "Decoded object does not match the original object.")


 def test_custom_class_serialization_disallowed_without_pickle():
    """Test that serializing a custom class fails when allow_pickle=False."""
-    encoder = MsgpackEncoder(allow_pickle=False)
+    encoder = MsgpackEncoder()

    obj = CustomClass("test_value")