[V1][Spec Decode] Support multi-layer eagle draft model (#18030)

Signed-off-by: qizixi <qizixi@meta.com>
This commit is contained in:
qizixi
2025-05-24 02:45:34 -07:00
committed by GitHub
parent a859320575
commit c1e4a4052d
3 changed files with 45 additions and 9 deletions

View File

@@ -246,6 +246,9 @@ def test_propose(num_speculative_tokens):
# Assign the mock to the proposer
proposer.model = model_mock
# Assign draft attn_layer_names since load_model is not invoked
proposer.attn_layer_names = ["layer.0"]
# Create input tensors
cu_num_tokens = torch.tensor([0, seq_len_1, total_tokens],
dtype=torch.int32,