[V1] Add tree drafting tests for eagle spec decoding (#22705)
Signed-off-by: Giancarlo Delfin <gdelfin@meta.com>
This commit is contained in:
@@ -236,9 +236,9 @@ class TreeAttentionMetadataBuilder(
|
||||
# Use prefill for drafting at the root level.
|
||||
self.tree_attn_bias = torch.empty(0)
|
||||
else:
|
||||
# Slice the tree attention bias for drafting.
|
||||
query_len = common_attn_metadata.max_query_len
|
||||
start, end = draft_index, draft_index + query_len
|
||||
# Slice the tree attention bias for drafting. Exclude
|
||||
# the root level.
|
||||
start, end = 1, 1 + common_attn_metadata.max_query_len
|
||||
self.tree_attn_bias = self.tree_attn_bias[start:end,
|
||||
start:end].contiguous()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user