[Core] Support min_tokens with speculative decoding (#32642)

Signed-off-by: qianlihuang <yiliu.dong@qq.com>
Co-authored-by: qianlihuang <yiliu.dong@qq.com>
This commit is contained in:
Yiliu Dong
2026-02-27 01:31:28 +08:00
committed by GitHub
parent 99c7892c5b
commit d940607629
7 changed files with 75 additions and 11 deletions

View File

@@ -276,9 +276,12 @@ def test_rejects_custom_logitsprocs(
monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "fork")
llm = LLM(**llm_kwargs)
# Require that no logitsprocs have been loaded
# Require that no custom logitsprocs have been loaded
# (built-in processors may exist: MinTokensLogitsProcessor,
# LogitBiasLogitsProcessor, MinPLogitsProcessor)
worker = llm.llm_engine.model_executor.driver_worker.worker
assert sum([1 for _ in worker.model_runner.input_batch.logitsprocs.all]) == 0
for proc in worker.model_runner.input_batch.logitsprocs.all:
assert not isinstance(proc, DummyLogitsProcessor)
return
if logitproc_source == CustomLogitprocSource.LOGITPROC_SOURCE_FQCN: