[V1] Support LLM.apply_model (#18465)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-09-20 15:14:35 +08:00
parent be874c0201
commit 3d9a1d2de5
17 changed files with 194 additions and 169 deletions
--- a/tests/quantization/test_modelopt.py
+++ b/tests/quantization/test_modelopt.py
@@ -11,16 +11,12 @@ import pytest
 import torch

 from tests.quantization.utils import is_quant_method_supported
-from vllm.platforms import current_platform


@pytest.fixture(scope="function", autouse=True)
-def use_v0_only(monkeypatch):
-    """
-    This module relies on V0 internals, so set VLLM_USE_V1=0.
-    """
-    if not current_platform.is_cpu():
-        monkeypatch.setenv('VLLM_USE_V1', '0')
+def enable_pickle(monkeypatch):
+    """`LLM.apply_model` requires pickling a function."""
+    monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")


@pytest.mark.skipif(not is_quant_method_supported("modelopt"),