[V1] Fully Transparent Implementation of CPU Offloading (#15354)

Signed-off-by: youkaichao <youkaichao@gmail.com>
2025-03-31 20:22:34 +08:00
parent e7ae3bf3d6
commit 555aa21905
12 changed files with 148 additions and 25 deletions
--- a/tests/basic_correctness/test_cpu_offload.py
+++ b/tests/basic_correctness/test_cpu_offload.py
@@ -1,15 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0

-import pytest
-
 from ..utils import compare_two_settings


-@pytest.fixture(scope="function", autouse=True)
-def use_v0_only(monkeypatch):
-    monkeypatch.setenv('VLLM_USE_V1', '0')
-
-
 def test_cpu_offload():
    compare_two_settings("meta-llama/Llama-3.2-1B-Instruct", [],
                         ["--cpu-offload-gb", "1"])