Files
vllm/tests/basic_correctness/test_cpu_offload.py
2026-02-13 08:11:26 -08:00

30 lines
824 B
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from ..utils import compare_two_settings
@pytest.mark.parametrize("disable_pin_memory", [False, True])
@pytest.mark.parametrize("disable_uva", [False, True])
def test_cpu_offload(disable_pin_memory, disable_uva):
env_vars = {
"VLLM_WEIGHT_OFFLOADING_DISABLE_PIN_MEMORY": str(int(disable_pin_memory)),
"VLLM_WEIGHT_OFFLOADING_DISABLE_UVA": str(int(disable_uva)),
}
args = ["--cpu-offload-gb", "1"]
# cuda graph only works with UVA offloading
if disable_uva:
args.append("--enforce-eager")
compare_two_settings(
model="hmellor/tiny-random-LlamaForCausalLM",
arg1=[],
arg2=args,
env1=None,
env2=env_vars,
)