[Async][Feat] support apply penalty or bad_words for async + spec (#30495)
Signed-off-by: zhuhaoran <zhuhaoran.zhr@alibaba-inc.com> Signed-off-by: izhuhaoran <izhuhaoran@qq.com> Signed-off-by: Nick Hill <nickhill123@gmail.com> Co-authored-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
@@ -165,23 +165,6 @@ class InputProcessor:
|
||||
"are not yet supported with speculative decoding."
|
||||
)
|
||||
|
||||
# Async scheduling + spec decode currently incompatible with some
|
||||
# sampling parameters.
|
||||
if (
|
||||
self.vllm_config.speculative_config is not None
|
||||
and self.vllm_config.scheduler_config.async_scheduling
|
||||
and (
|
||||
params.frequency_penalty != 0.0
|
||||
or params.presence_penalty != 0.0
|
||||
or params.repetition_penalty != 1.0
|
||||
or params.bad_words_token_ids
|
||||
)
|
||||
):
|
||||
raise ValueError(
|
||||
"async scheduling with spec decoding doesn't yet support "
|
||||
"penalties or bad words in sampling parameters."
|
||||
)
|
||||
|
||||
def _validate_params(
|
||||
self,
|
||||
params: SamplingParams | PoolingParams,
|
||||
|
||||
Reference in New Issue
Block a user