[BugFix][V1] Fix overhead related to bad_words sampling when not in use (#14894)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill
2025-03-16 14:53:34 -07:00
committed by GitHub
parent f6137adbcb
commit fc1f67715d
3 changed files with 10 additions and 7 deletions

View File

@@ -124,8 +124,9 @@ def _construct_expected_sampling_metadata(
if req.sampling_params.allowed_token_ids:
allowed_token_ids_mask[index_in_input_batch][
req.sampling_params.allowed_token_ids] = True
bad_words_token_ids[
index_in_input_batch] = req.sampling_params.bad_words_token_ids
if req.sampling_params.bad_words_token_ids:
bad_words_token_ids[
index_in_input_batch] = req.sampling_params.bad_words_token_ids
return SamplingMetadata(
temperature=torch.tensor(temperature, dtype=torch.float,