[BugFix][V1] Fix overhead related to bad_words sampling when not in use (#14894)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill
2025-03-16 14:53:34 -07:00
committed by GitHub
parent f6137adbcb
commit fc1f67715d
3 changed files with 10 additions and 7 deletions

View File

@@ -324,8 +324,9 @@ class InputBatch:
self.allowed_token_ids_mask_cpu_tensor[req_index][
sampling_params.allowed_token_ids] = False
self.bad_words_token_ids[
req_index] = sampling_params.bad_words_token_ids
if sampling_params.bad_words_token_ids:
self.bad_words_token_ids[
req_index] = sampling_params.bad_words_token_ids
# Add request lora ID
if request.lora_request: