[Perf] Remove blocking copy in GDN Attention (#31167)
Signed-off-by: Benjamin Chislett <bchislett@nvidia.com>
This commit is contained in:
committed by
GitHub
parent
5312a7284e
commit
85aff45e24
@@ -143,7 +143,7 @@ class GDNAttentionMetadataBuilder(AttentionMetadataBuilder[GDNAttentionMetadata]
|
|||||||
|
|
||||||
query_start_loc = m.query_start_loc
|
query_start_loc = m.query_start_loc
|
||||||
context_lens = m.num_computed_tokens_cpu
|
context_lens = m.num_computed_tokens_cpu
|
||||||
context_lens_tensor = context_lens.to(query_start_loc.device)
|
context_lens_tensor = context_lens.to(query_start_loc.device, non_blocking=True)
|
||||||
nums_dict, batch_ptr, token_chunk_offset_ptr = None, None, None
|
nums_dict, batch_ptr, token_chunk_offset_ptr = None, None, None
|
||||||
|
|
||||||
if (
|
if (
|
||||||
|
|||||||
Reference in New Issue
Block a user