[V1] Fully Transparent Implementation of CPU Offloading (#15354)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao
2025-03-31 20:22:34 +08:00
committed by GitHub
parent e7ae3bf3d6
commit 555aa21905
12 changed files with 148 additions and 25 deletions

View File

@@ -1,15 +1,8 @@
# SPDX-License-Identifier: Apache-2.0
import pytest
from ..utils import compare_two_settings
@pytest.fixture(scope="function", autouse=True)
def use_v0_only(monkeypatch):
monkeypatch.setenv('VLLM_USE_V1', '0')
def test_cpu_offload():
compare_two_settings("meta-llama/Llama-3.2-1B-Instruct", [],
["--cpu-offload-gb", "1"])