[Async][Feat] support apply penalty or bad_words for async + spec (#30495)

Signed-off-by: zhuhaoran <zhuhaoran.zhr@alibaba-inc.com>
Signed-off-by: izhuhaoran <izhuhaoran@qq.com>
Signed-off-by: Nick Hill <nickhill123@gmail.com>
Co-authored-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
zhrrr
2026-01-09 10:31:50 +08:00
committed by GitHub
parent a4ec0c5595
commit 8ff4a99566
4 changed files with 70 additions and 34 deletions

View File

@@ -165,23 +165,6 @@ class InputProcessor:
"are not yet supported with speculative decoding."
)
# Async scheduling + spec decode currently incompatible with some
# sampling parameters.
if (
self.vllm_config.speculative_config is not None
and self.vllm_config.scheduler_config.async_scheduling
and (
params.frequency_penalty != 0.0
or params.presence_penalty != 0.0
or params.repetition_penalty != 1.0
or params.bad_words_token_ids
)
):
raise ValueError(
"async scheduling with spec decoding doesn't yet support "
"penalties or bad words in sampling parameters."
)
def _validate_params(
self,
params: SamplingParams | PoolingParams,