[V0][Bugfix] Fix parallel sampling performance regression when guided decoding is enabled (#17731)

Signed-off-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
Co-authored-by: Russell Bryant <rbryant@redhat.com>
This commit is contained in:
Madeesh Kannan
2025-05-23 12:38:23 +02:00
committed by GitHub
parent 4ce64e2df4
commit e493e48524
4 changed files with 40 additions and 8 deletions

View File

@@ -1494,7 +1494,7 @@ class ParallelSampleSequenceGroup(SequenceGroupBase):
for i in range(original_params.n):
request_id_i = f"{request_id}_parallel_sample_{i}"
group.seq_id_to_index[request_id_i] = i
params = copy.deepcopy(original_params)
params = params.clone()
params.n = 1
if params.seed is not None:
params.seed += i