[Bugfix] Block manager v2 with preemption and lookahead slots (#8824)
This commit is contained in:
@@ -23,8 +23,10 @@ MODELS = [
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def check_settings():
|
||||
assert ENABLE_ARTIFICIAL_PREEMPT is True, (
|
||||
"Use an env var VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1. "
|
||||
"`VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest "
|
||||
"Use an env var VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1, "
|
||||
"VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1. "
|
||||
"`VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 "
|
||||
"VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 pytest "
|
||||
"tests/basic_correctness/test_preemption.py`")
|
||||
|
||||
|
||||
@@ -199,6 +201,7 @@ def test_swap(
|
||||
@pytest.mark.parametrize("dtype", ["float"])
|
||||
@pytest.mark.parametrize("max_tokens", [96])
|
||||
@pytest.mark.parametrize("beam_width", [4])
|
||||
@pytest.mark.parametrize("use_v2_block_manager", [True, False])
|
||||
def test_swap_infeasible(
|
||||
vllm_runner,
|
||||
example_prompts,
|
||||
@@ -207,6 +210,7 @@ def test_swap_infeasible(
|
||||
max_tokens: int,
|
||||
beam_width: int,
|
||||
worker_use_ray: bool,
|
||||
use_v2_block_manager: bool,
|
||||
) -> None:
|
||||
"""Verify infeasible swap request will be ignored."""
|
||||
BLOCK_SIZE = 16
|
||||
@@ -223,6 +227,7 @@ def test_swap_infeasible(
|
||||
num_gpu_blocks_override=prefill_blocks + decode_blocks,
|
||||
max_model_len=(prefill_blocks + decode_blocks) * BLOCK_SIZE,
|
||||
worker_use_ray=worker_use_ray,
|
||||
use_v2_block_manager=use_v2_block_manager,
|
||||
) as vllm_model:
|
||||
sampling_params = SamplingParams(n=beam_width,
|
||||
use_beam_search=True,
|
||||
|
||||
Reference in New Issue
Block a user