[HMA]Fix corner case when hybrid page_size can not be evenly divided issue (blk_size=64,tp=4) (#37467)

Signed-off-by: Chendi Xue <chendi.xue@intel.com>
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
Signed-off-by: Chendi.Xue <chendi.xue@intel.com>
Co-authored-by: Matthew Bonanni <mbonanni@redhat.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
This commit is contained in:
Chendi.Xue
2026-03-30 11:47:30 -05:00
committed by GitHub
parent b4a2f3ac36
commit 3b1dbaad4e
12 changed files with 220 additions and 186 deletions

View File

@@ -851,6 +851,7 @@ def test_hybrid_attention_mamba_tensor_shapes():
vllm_ctx = vllm_config.compilation_config.static_forward_context
runner = GPUModelRunner(vllm_config, DEVICE)
current_platform.update_block_size_for_backend(vllm_config)
kv_cache_spec = runner.get_kv_cache_spec()
available_memory = 5 * GiB_bytes
@@ -1306,6 +1307,7 @@ def test_mamba_cache_raises_when_max_num_seqs_exceeds_blocks():
assert fwd_context is not None
runner = GPUModelRunner(vllm_config, DEVICE)
current_platform.update_block_size_for_backend(vllm_config)
kv_cache_spec = runner.get_kv_cache_spec()
available_memory = 5 * GiB_bytes