[Bugfix] Prevent IndexError for cached requests when pipeline parallelism is disabled (#20486)
Signed-off-by: Peter Pan <Peter.Pan@daocloud.io>
This commit is contained in:
@@ -635,6 +635,8 @@ class Scheduler(SchedulerInterface):
|
|||||||
token_ids = req.all_token_ids[req.num_computed_tokens:req.
|
token_ids = req.all_token_ids[req.num_computed_tokens:req.
|
||||||
num_computed_tokens + num_tokens]
|
num_computed_tokens + num_tokens]
|
||||||
new_token_ids.append(token_ids)
|
new_token_ids.append(token_ids)
|
||||||
|
else:
|
||||||
|
new_token_ids.append([])
|
||||||
new_block_ids.append(req_to_new_block_ids[req_id])
|
new_block_ids.append(req_to_new_block_ids[req_id])
|
||||||
num_computed_tokens.append(req.num_computed_tokens)
|
num_computed_tokens.append(req.num_computed_tokens)
|
||||||
# Because resumed_reqs is usually empty, it is more efficient to do
|
# Because resumed_reqs is usually empty, it is more efficient to do
|
||||||
|
|||||||
Reference in New Issue
Block a user