[Model Runner V2] Fix unintended CPU-GPU sync in make_dummy (#34667)
Signed-off-by: Woosuk Kwon <woosuk@inferact.ai>
This commit is contained in:
@@ -108,7 +108,7 @@ class InputBatch:
|
||||
query_start_loc_np = np.empty(num_reqs + 1, dtype=np.int32)
|
||||
query_start_loc_np[0] = 0
|
||||
np.cumsum(num_scheduled_tokens, out=query_start_loc_np[1:])
|
||||
input_buffers.query_start_loc[0] = 0
|
||||
input_buffers.query_start_loc[:1] = 0
|
||||
torch.cumsum(
|
||||
seq_lens, dim=0, out=input_buffers.query_start_loc[1 : num_reqs + 1]
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user