[Bug][CLI] Allow users to disable prefix caching explicitly (#10724)
Signed-off-by: rickyx <rickyx@anyscale.com>
This commit is contained in:
@@ -4,6 +4,7 @@ from vllm import envs
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.utils import FlexibleArgumentParser
|
||||
|
||||
if not envs.VLLM_USE_V1:
|
||||
pytest.skip(
|
||||
@@ -12,6 +13,24 @@ if not envs.VLLM_USE_V1:
|
||||
)
|
||||
|
||||
|
||||
def test_prefix_caching_from_cli():
|
||||
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
|
||||
args = parser.parse_args([])
|
||||
engine_args = EngineArgs.from_cli_args(args=args)
|
||||
assert (engine_args.enable_prefix_caching
|
||||
), "V1 turns on prefix caching by default."
|
||||
|
||||
# Turn it off possible with flag.
|
||||
args = parser.parse_args(["--no-enable-prefix-caching"])
|
||||
engine_args = EngineArgs.from_cli_args(args=args)
|
||||
assert not engine_args.enable_prefix_caching
|
||||
|
||||
# Turn it on with flag.
|
||||
args = parser.parse_args(["--enable-prefix-caching"])
|
||||
engine_args = EngineArgs.from_cli_args(args=args)
|
||||
assert engine_args.enable_prefix_caching
|
||||
|
||||
|
||||
def test_defaults():
|
||||
engine_args = EngineArgs(model="facebook/opt-125m")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user