[v1][attention] Support Hybrid Allocator + FlashInfer (#21412)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
2025-07-29 18:45:29 -07:00
parent 0e36abf993
commit 555e7225bc
16 changed files with 85 additions and 57 deletions
--- a/tests/v1/spec_decode/test_eagle.py
+++ b/tests/v1/spec_decode/test_eagle.py
@@ -305,6 +305,7 @@ def test_propose(num_speculative_tokens):
        _Backend.FLASH_ATTN_VLLM_V1)
    attn_metadata_builder = attn_metadata_builder_cls(
        kv_cache_spec=create_standard_kv_cache_spec(proposer.vllm_config),
+        layer_names=proposer.attn_layer_names,
        vllm_config=proposer.vllm_config,
        device=device,
    )