[Attention] Support multiple attention metadata builders per kv_cache_spec + proper local attention no hybrid kv cache fix (#21588)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
Lucas Wilkinson
2025-08-06 21:40:52 -04:00
committed by GitHub
parent f825c6bd22
commit 1dc8a70b6d
13 changed files with 369 additions and 213 deletions

View File

@@ -313,7 +313,8 @@ def test_propose(num_speculative_tokens, backend):
# Mock runner for attention metadata building
proposer.runner = mock.MagicMock()
proposer.runner.attn_metadata_builders = [attn_metadata_builder]
proposer.runner.attn_groups.append([mock.MagicMock()])
proposer.runner.attn_groups[0][0].metadata_builder = attn_metadata_builder
result = proposer.propose(target_token_ids=target_token_ids,
target_positions=target_positions,