[SpecDecode] Simplified alternative padded-speculation acceptance rate fix (#29845)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
@@ -306,10 +306,16 @@ def test_prepare_inputs_padded():
|
||||
|
||||
proposer = _create_proposer("eagle", num_speculative_tokens)
|
||||
|
||||
output_metadata, token_indices_to_sample = proposer.prepare_inputs_padded(
|
||||
common_attn_metadata, spec_decode_metadata, valid_sampled_tokens_count
|
||||
output_metadata, token_indices_to_sample, num_rejected_tokens_gpu = (
|
||||
proposer.prepare_inputs_padded(
|
||||
common_attn_metadata, spec_decode_metadata, valid_sampled_tokens_count
|
||||
)
|
||||
)
|
||||
|
||||
# Verify num_rejected_tokens_gpu is calculated correctly
|
||||
expected_num_rejected = torch.tensor([1, 0, 2], dtype=torch.int32, device=device)
|
||||
assert torch.equal(num_rejected_tokens_gpu, expected_num_rejected)
|
||||
|
||||
assert output_metadata.max_query_len == 3
|
||||
assert torch.equal(output_metadata.query_start_loc, expected_query_start_loc)
|
||||
assert torch.equal(token_indices_to_sample, expected_token_indices_to_sample)
|
||||
|
||||
Reference in New Issue
Block a user