[V1] Wrapper which plumbs request-level logits processors into vLLM batch-level logits processing (#23656)

Signed-off-by: Andrew Feldman <afeldman@redhat.com>
2025-09-02 22:52:51 -04:00
parent e32a0e8678
commit 136d853e65
6 changed files with 524 additions and 5 deletions
--- a/tests/v1/logits_processors/test_custom_offline.py
+++ b/tests/v1/logits_processors/test_custom_offline.py
@@ -15,6 +15,7 @@ from tests.v1.logits_processors.utils import (DUMMY_LOGITPROC_ARG,
                                              POOLING_MODEL_NAME, TEMP_GREEDY,
                                              CustomLogitprocSource,
                                              DummyLogitsProcessor,
+                                              WrappedPerReqLogitsProcessor,
                                              dummy_module)
 from tests.v1.logits_processors.utils import entry_points as fake_entry_points
 from tests.v1.logits_processors.utils import prompts
@@ -161,6 +162,38 @@ def test_custom_logitsprocs(monkeypatch,
    _run_test(kwargs, logitproc_loaded=True)


+@create_new_process_for_each_test()
+def test_custom_logitsprocs_req(monkeypatch):
+    """Test passing request-level logits processor to offline Python interface
+    
+    Wrap a request-level logits processor to create a batch level logits
+    processor that has a well-defined behavior (mask out all tokens except one
+    `target_token`)
+
+    Construct an `LLM` instance which loads the wrapped logits processor. Pass
+    the custom logitproc as a class object.
+
+    Construct a reference `LLM` instance with no custom logitproc
+
+    Pass in a batch of requests, 50% of which pass a `target_token` value
+    in through `SamplingParams.extra_args`, 50% of which do not.
+
+    Validate that
+    * Requests which do not activate the custom logitproc, yield the same
+      results for both `LLM` instances
+    * Requests which activate the custom logitproc, only output `target_token`
+
+    Args:
+      monkeypatch: for setting env vars
+    """
+
+    # Test that logitproc info is passed to workers
+    monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "1")
+    random.seed(40)
+    _run_test({"logits_processors": [WrappedPerReqLogitsProcessor]},
+              logitproc_loaded=True)
+
+
@create_new_process_for_each_test()
@pytest.mark.parametrize("logitproc_source", [
    CustomLogitprocSource.LOGITPROC_SOURCE_ENTRYPOINT,