[BugFix] Make penalties and bad_words work with async scheduling (#26467)

Signed-off-by: Nick Hill <nhill@redhat.com>
2025-10-10 16:27:04 -07:00
parent eef921f45e
commit 5bc26c438d
4 changed files with 113 additions and 14 deletions
--- a/tests/v1/e2e/test_async_sched_and_preempt.py
+++ b/tests/v1/e2e/test_async_sched_and_preempt.py
@@ -28,9 +28,8 @@ def test_preempt_and_async_scheduling_e2e(monkeypatch: pytest.MonkeyPatch):
    sampling_param_tests: list[dict[str, Any]] = [
        dict(),
        # dict(min_tokens=20),
-        # TODO enable these with https://github.com/vllm-project/vllm/pull/26467.
-        # dict(repetition_penalty=0.1),
-        # dict(bad_words=[]),
+        dict(presence_penalty=-1.0),
+        dict(bad_words=["the", " the"]),
    ]

    default_params = dict(
@@ -42,9 +41,9 @@ def test_preempt_and_async_scheduling_e2e(monkeypatch: pytest.MonkeyPatch):
        m.setenv("VLLM_ATTENTION_BACKEND", "FLEX_ATTENTION")
        # m.setenv("VLLM_KERNEL_OVERRIDE_BATCH_INVARIANT", "1")

-        outputs = []
+        outputs: list[tuple[str, list]] = []
        for test_preemption in [False, True]:
-            for executor in ["uni", "mp"]:
+            for executor in ["mp", "uni"]:
                for async_scheduling in [False, True]:
                    cache_arg: dict[str, Any] = (
                        dict(num_gpu_blocks_override=32)
@@ -78,6 +77,21 @@ def test_preempt_and_async_scheduling_e2e(monkeypatch: pytest.MonkeyPatch):
                                    ),
                                )
                            )
+
+                        if not outputs:
+                            # First check that the different parameter configs
+                            # actually result in different output.
+                            for other_test, params in zip(
+                                results[1:], sampling_param_tests[1:]
+                            ):
+                                with pytest.raises(AssertionError):
+                                    check_outputs_equal(
+                                        outputs_0_lst=results[0],
+                                        outputs_1_lst=other_test,
+                                        name_0=f"baseline params={params}",
+                                        name_1=f"other params={params}",
+                                    )
+
                        outputs.append((test_config, results))

    baseline_config, baseline_tests = outputs[0]