[V1] Refactor num_computed_tokens logic (#15307)
Signed-off-by: Cody Yu <hao.yu.cody@gmail.com> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
@@ -107,14 +107,33 @@ class RejectionSampler(nn.Module):
|
||||
@staticmethod
|
||||
def parse_output(
|
||||
output_token_ids: torch.Tensor,
|
||||
ignored_req_idxs: list[int],
|
||||
vocab_size: int,
|
||||
) -> list[list[int]]:
|
||||
"""Parse the output of the rejection sampler.
|
||||
|
||||
Args:
|
||||
output_token_ids: The sampled token IDs in shape
|
||||
[batch_size, max_spec_len + 1]. The rejected tokens are
|
||||
replaced with `PLACEHOLDER_TOKEN_ID` by the rejection sampler
|
||||
and will be filtered out in this function.
|
||||
ignored_req_idxs: The indices of the requests that should not be
|
||||
sampled. This is usually because the request is still in the
|
||||
prefill phase.
|
||||
vocab_size: The size of the vocabulary.
|
||||
|
||||
Returns:
|
||||
A list of lists of token IDs.
|
||||
"""
|
||||
output_token_ids_np = output_token_ids.cpu().numpy()
|
||||
# Create mask for valid tokens.
|
||||
valid_mask = ((output_token_ids_np != PLACEHOLDER_TOKEN_ID) &
|
||||
(output_token_ids_np < vocab_size))
|
||||
|
||||
ignored_req_idx_set = set(ignored_req_idxs)
|
||||
outputs = [
|
||||
row[valid_mask[i]].tolist()
|
||||
if i not in ignored_req_idx_set else []
|
||||
for i, row in enumerate(output_token_ids_np)
|
||||
]
|
||||
return outputs
|
||||
|
||||
Reference in New Issue
Block a user