update flashinfer to v0.2.9rc1 (#21485)

Signed-off-by: Weiliang Liu <weiliangl@nvidia.com>
This commit is contained in:
weiliang
2025-07-25 05:06:11 +08:00
committed by GitHub
parent a6c7fb8cff
commit 2dd72d23d9
3 changed files with 6 additions and 15 deletions

View File

@@ -678,15 +678,10 @@ class FlashInferImpl(AttentionImpl):
query=decode_query,
kv_cache=kv_cache_permute,
workspace_buffer=attn_metadata.workspace_buffer,
num_heads=self.num_heads,
num_kv_heads=self.num_kv_heads,
scale=self.scale,
block_tables=block_tables_decode,
seq_lens=seq_lens_decode,
block_size=attn_metadata.page_size,
max_seq_len=attn_metadata.max_seq_len,
kv_cache_dtype=self.kv_cache_dtype,
k_scale=layer._k_scale_float,
v_scale=layer._v_scale_float,
bmm1_scale=layer._k_scale_float * self.scale,
bmm2_scale=layer._v_scale_float,
))
return output_padded