[Core] Support min_tokens with speculative decoding (#32642)

Signed-off-by: qianlihuang <yiliu.dong@qq.com> Co-authored-by: qianlihuang <yiliu.dong@qq.com>
2026-02-27 01:31:28 +08:00
parent 99c7892c5b
commit d940607629
7 changed files with 75 additions and 11 deletions
--- a/tests/v1/logits_processors/test_custom_offline.py
+++ b/tests/v1/logits_processors/test_custom_offline.py
@@ -276,9 +276,12 @@ def test_rejects_custom_logitsprocs(
        monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "fork")

        llm = LLM(**llm_kwargs)
-        # Require that no logitsprocs have been loaded
+        # Require that no custom logitsprocs have been loaded
+        # (built-in processors may exist: MinTokensLogitsProcessor,
+        # LogitBiasLogitsProcessor, MinPLogitsProcessor)
        worker = llm.llm_engine.model_executor.driver_worker.worker
-        assert sum([1 for _ in worker.model_runner.input_batch.logitsprocs.all]) == 0
+        for proc in worker.model_runner.input_batch.logitsprocs.all:
+            assert not isinstance(proc, DummyLogitsProcessor)
        return

    if logitproc_source == CustomLogitprocSource.LOGITPROC_SOURCE_FQCN: