[Bugfix] Block manager v2 with preemption and lookahead slots (#8824)

2024-09-28 18:17:45 -07:00
parent d1537039ce
commit 5bf8789b2a
9 changed files with 133 additions and 116 deletions
--- a/tests/basic_correctness/test_preemption.py
+++ b/tests/basic_correctness/test_preemption.py
@@ -23,8 +23,10 @@ MODELS = [
@pytest.fixture(scope="module", autouse=True)
 def check_settings():
    assert ENABLE_ARTIFICIAL_PREEMPT is True, (
-        "Use an env var VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1. "
-        "`VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest "
+        "Use an env var VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1, "
+        "VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1. "
+        "`VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 "
+        "VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 pytest "
        "tests/basic_correctness/test_preemption.py`")


@@ -199,6 +201,7 @@ def test_swap(
@pytest.mark.parametrize("dtype", ["float"])
@pytest.mark.parametrize("max_tokens", [96])
@pytest.mark.parametrize("beam_width", [4])
+@pytest.mark.parametrize("use_v2_block_manager", [True, False])
 def test_swap_infeasible(
    vllm_runner,
    example_prompts,
@@ -207,6 +210,7 @@ def test_swap_infeasible(
    max_tokens: int,
    beam_width: int,
    worker_use_ray: bool,
+    use_v2_block_manager: bool,
 ) -> None:
    """Verify infeasible swap request will be ignored."""
    BLOCK_SIZE = 16
@@ -223,6 +227,7 @@ def test_swap_infeasible(
            num_gpu_blocks_override=prefill_blocks + decode_blocks,
            max_model_len=(prefill_blocks + decode_blocks) * BLOCK_SIZE,
            worker_use_ray=worker_use_ray,
+            use_v2_block_manager=use_v2_block_manager,
    ) as vllm_model:
        sampling_params = SamplingParams(n=beam_width,
                                         use_beam_search=True,