[Attention][1/n] Remove usage of deprecated seq_lens_cpu and num_computed_tokens_cpu CommonAttentionMetadata properties (#31773)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
@@ -154,12 +154,12 @@ def create_and_prepopulate_kv_cache(
|
||||
MLA KV cache tensor
|
||||
"""
|
||||
batch_size = len(kv_c_contexts)
|
||||
seq_lens = common_attn_metadata.seq_lens_cpu
|
||||
seq_lens = common_attn_metadata.seq_lens.cpu()
|
||||
query_lens = (
|
||||
common_attn_metadata.query_start_loc_cpu[1:]
|
||||
- common_attn_metadata.query_start_loc_cpu[:-1]
|
||||
)
|
||||
context_lens = common_attn_metadata.num_computed_tokens_cpu
|
||||
context_lens = seq_lens - query_lens
|
||||
block_table = common_attn_metadata.block_table_tensor
|
||||
slot_mapping = common_attn_metadata.slot_mapping
|
||||
|
||||
|
||||
Reference in New Issue
Block a user