[V1] Add tree drafting tests for eagle spec decoding (#22705)

Signed-off-by: Giancarlo Delfin <gdelfin@meta.com>
This commit is contained in:
Giancarlo Delfin
2025-08-13 04:11:28 -07:00
committed by GitHub
parent 3f52738dce
commit d94e3026de
4 changed files with 178 additions and 55 deletions

View File

@@ -236,9 +236,9 @@ class TreeAttentionMetadataBuilder(
# Use prefill for drafting at the root level.
self.tree_attn_bias = torch.empty(0)
else:
# Slice the tree attention bias for drafting.
query_len = common_attn_metadata.max_query_len
start, end = draft_index, draft_index + query_len
# Slice the tree attention bias for drafting. Exclude
# the root level.
start, end = 1, 1 + common_attn_metadata.max_query_len
self.tree_attn_bias = self.tree_attn_bias[start:end,
start:end].contiguous()