[V1] Add VLLM_ALLOW_INSECURE_SERIALIZATION env var (#17490)

Signed-off-by: Russell Bryant <rbryant@redhat.com>
Signed-off-by: Nick Hill <nhill@redhat.com>
Co-authored-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Russell Bryant
2025-05-08 01:34:02 -04:00
committed by GitHub
parent 998eea4a0e
commit 6930a41116
4 changed files with 170 additions and 115 deletions

View File

@@ -9,8 +9,8 @@ import pytest
import torch
from vllm.multimodal.inputs import (MultiModalBatchedField,
MultiModalFieldElem, MultiModalKwargs,
MultiModalKwargsItem,
MultiModalFieldElem, MultiModalFlatField,
MultiModalKwargs, MultiModalKwargsItem,
MultiModalSharedField, NestedTensors)
from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder
@@ -36,59 +36,62 @@ class MyType:
empty_tensor: torch.Tensor
def test_encode_decode():
def test_encode_decode(monkeypatch: pytest.MonkeyPatch):
"""Test encode/decode loop with zero-copy tensors."""
obj = MyType(
tensor1=torch.randint(low=0,
high=100,
size=(1024, ),
dtype=torch.int32),
a_string="hello",
list_of_tensors=[
torch.rand((1, 10), dtype=torch.float32),
torch.rand((3, 5, 4000), dtype=torch.float64),
torch.tensor(1984), # test scalar too
# Make sure to test bf16 which numpy doesn't support.
torch.rand((3, 5, 1000), dtype=torch.bfloat16),
torch.tensor([float("-inf"), float("inf")] * 1024,
dtype=torch.bfloat16),
],
numpy_array=np.arange(512),
unrecognized=UnrecognizedType(33),
small_f_contig_tensor=torch.rand(5, 4).t(),
large_f_contig_tensor=torch.rand(1024, 4).t(),
small_non_contig_tensor=torch.rand(2, 4)[:, 1:3],
large_non_contig_tensor=torch.rand(1024, 512)[:, 10:20],
empty_tensor=torch.empty(0),
)
with monkeypatch.context() as m:
m.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
encoder = MsgpackEncoder(size_threshold=256)
decoder = MsgpackDecoder(MyType)
obj = MyType(
tensor1=torch.randint(low=0,
high=100,
size=(1024, ),
dtype=torch.int32),
a_string="hello",
list_of_tensors=[
torch.rand((1, 10), dtype=torch.float32),
torch.rand((3, 5, 4000), dtype=torch.float64),
torch.tensor(1984), # test scalar too
# Make sure to test bf16 which numpy doesn't support.
torch.rand((3, 5, 1000), dtype=torch.bfloat16),
torch.tensor([float("-inf"), float("inf")] * 1024,
dtype=torch.bfloat16),
],
numpy_array=np.arange(512),
unrecognized=UnrecognizedType(33),
small_f_contig_tensor=torch.rand(5, 4).t(),
large_f_contig_tensor=torch.rand(1024, 4).t(),
small_non_contig_tensor=torch.rand(2, 4)[:, 1:3],
large_non_contig_tensor=torch.rand(1024, 512)[:, 10:20],
empty_tensor=torch.empty(0),
)
encoded = encoder.encode(obj)
encoder = MsgpackEncoder(size_threshold=256)
decoder = MsgpackDecoder(MyType)
# There should be the main buffer + 4 large tensor buffers
# + 1 large numpy array. "large" is <= 512 bytes.
# The two small tensors are encoded inline.
assert len(encoded) == 8
encoded = encoder.encode(obj)
decoded: MyType = decoder.decode(encoded)
# There should be the main buffer + 4 large tensor buffers
# + 1 large numpy array. "large" is <= 512 bytes.
# The two small tensors are encoded inline.
assert len(encoded) == 8
assert_equal(decoded, obj)
decoded: MyType = decoder.decode(encoded)
# Test encode_into case
assert_equal(decoded, obj)
preallocated = bytearray()
# Test encode_into case
encoded2 = encoder.encode_into(obj, preallocated)
preallocated = bytearray()
assert len(encoded2) == 8
assert encoded2[0] is preallocated
encoded2 = encoder.encode_into(obj, preallocated)
decoded2: MyType = decoder.decode(encoded2)
assert len(encoded2) == 8
assert encoded2[0] is preallocated
assert_equal(decoded2, obj)
decoded2: MyType = decoder.decode(encoded2)
assert_equal(decoded2, obj)
class MyRequest(msgspec.Struct):
@@ -122,7 +125,7 @@ def test_multimodal_kwargs():
total_len = sum(memoryview(x).cast("B").nbytes for x in encoded)
# expected total encoding length, should be 44559, +-20 for minor changes
assert total_len >= 44539 and total_len <= 44579
assert 44539 <= total_len <= 44579
decoded: MultiModalKwargs = decoder.decode(encoded).mm[0]
assert all(nested_equal(d[k], decoded[k]) for k in d)
@@ -135,14 +138,15 @@ def test_multimodal_items_by_modality():
"video",
"v0",
[torch.zeros(1000, dtype=torch.int8) for _ in range(4)],
MultiModalBatchedField(),
MultiModalFlatField(
[[slice(1, 2, 3), slice(4, 5, 6)], [slice(None, 2)]], 0),
)
e3 = MultiModalFieldElem("image", "i0", torch.zeros(1000,
dtype=torch.int32),
MultiModalSharedField(4))
e4 = MultiModalFieldElem("image", "i1", torch.zeros(1000,
dtype=torch.int32),
MultiModalBatchedField())
e4 = MultiModalFieldElem(
"image", "i1", torch.zeros(1000, dtype=torch.int32),
MultiModalFlatField([slice(1, 2, 3), slice(4, 5, 6)], 2))
audio = MultiModalKwargsItem.from_elems([e1])
video = MultiModalKwargsItem.from_elems([e2])
image = MultiModalKwargsItem.from_elems([e3, e4])
@@ -161,7 +165,7 @@ def test_multimodal_items_by_modality():
total_len = sum(memoryview(x).cast("B").nbytes for x in encoded)
# expected total encoding length, should be 14255, +-20 for minor changes
assert total_len >= 14235 and total_len <= 14275
assert 14250 <= total_len <= 14300
decoded: MultiModalKwargs = decoder.decode(encoded).mm[0]
# check all modalities were recovered and do some basic sanity checks
@@ -178,8 +182,7 @@ def test_multimodal_items_by_modality():
def nested_equal(a: NestedTensors, b: NestedTensors):
if isinstance(a, torch.Tensor):
return torch.equal(a, b)
else:
return all(nested_equal(x, y) for x, y in zip(a, b))
return all(nested_equal(x, y) for x, y in zip(a, b))
def assert_equal(obj1: MyType, obj2: MyType):
@@ -199,11 +202,10 @@ def assert_equal(obj1: MyType, obj2: MyType):
assert torch.equal(obj1.empty_tensor, obj2.empty_tensor)
@pytest.mark.parametrize("allow_pickle", [True, False])
def test_dict_serialization(allow_pickle: bool):
def test_dict_serialization():
"""Test encoding and decoding of a generic Python object using pickle."""
encoder = MsgpackEncoder(allow_pickle=allow_pickle)
decoder = MsgpackDecoder(allow_pickle=allow_pickle)
encoder = MsgpackEncoder()
decoder = MsgpackDecoder()
# Create a sample Python object
obj = {"key": "value", "number": 42}
@@ -218,11 +220,10 @@ def test_dict_serialization(allow_pickle: bool):
assert obj == decoded, "Decoded object does not match the original object."
@pytest.mark.parametrize("allow_pickle", [True, False])
def test_tensor_serialization(allow_pickle: bool):
def test_tensor_serialization():
"""Test encoding and decoding of a torch.Tensor."""
encoder = MsgpackEncoder(allow_pickle=allow_pickle)
decoder = MsgpackDecoder(torch.Tensor, allow_pickle=allow_pickle)
encoder = MsgpackEncoder()
decoder = MsgpackDecoder(torch.Tensor)
# Create a sample tensor
tensor = torch.rand(10, 10)
@@ -238,11 +239,10 @@ def test_tensor_serialization(allow_pickle: bool):
tensor, decoded), "Decoded tensor does not match the original tensor."
@pytest.mark.parametrize("allow_pickle", [True, False])
def test_numpy_array_serialization(allow_pickle: bool):
def test_numpy_array_serialization():
"""Test encoding and decoding of a numpy array."""
encoder = MsgpackEncoder(allow_pickle=allow_pickle)
decoder = MsgpackDecoder(np.ndarray, allow_pickle=allow_pickle)
encoder = MsgpackEncoder()
decoder = MsgpackDecoder(np.ndarray)
# Create a sample numpy array
array = np.random.rand(10, 10)
@@ -268,26 +268,31 @@ class CustomClass:
return isinstance(other, CustomClass) and self.value == other.value
def test_custom_class_serialization_allowed_with_pickle():
def test_custom_class_serialization_allowed_with_pickle(
monkeypatch: pytest.MonkeyPatch):
"""Test that serializing a custom class succeeds when allow_pickle=True."""
encoder = MsgpackEncoder(allow_pickle=True)
decoder = MsgpackDecoder(CustomClass, allow_pickle=True)
obj = CustomClass("test_value")
with monkeypatch.context() as m:
m.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
encoder = MsgpackEncoder()
decoder = MsgpackDecoder(CustomClass)
# Encode the custom class
encoded = encoder.encode(obj)
obj = CustomClass("test_value")
# Decode the custom class
decoded = decoder.decode(encoded)
# Encode the custom class
encoded = encoder.encode(obj)
# Verify the decoded object matches the original
assert obj == decoded, "Decoded object does not match the original object."
# Decode the custom class
decoded = decoder.decode(encoded)
# Verify the decoded object matches the original
assert obj == decoded, (
"Decoded object does not match the original object.")
def test_custom_class_serialization_disallowed_without_pickle():
"""Test that serializing a custom class fails when allow_pickle=False."""
encoder = MsgpackEncoder(allow_pickle=False)
encoder = MsgpackEncoder()
obj = CustomClass("test_value")