[HMA]Fix corner case when hybrid page_size can not be evenly divided issue (blk_size=64,tp=4) (#37467)
Signed-off-by: Chendi Xue <chendi.xue@intel.com> Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Signed-off-by: Chendi.Xue <chendi.xue@intel.com> Co-authored-by: Matthew Bonanni <mbonanni@redhat.com> Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
This commit is contained in:
@@ -851,6 +851,7 @@ def test_hybrid_attention_mamba_tensor_shapes():
|
||||
vllm_ctx = vllm_config.compilation_config.static_forward_context
|
||||
|
||||
runner = GPUModelRunner(vllm_config, DEVICE)
|
||||
current_platform.update_block_size_for_backend(vllm_config)
|
||||
kv_cache_spec = runner.get_kv_cache_spec()
|
||||
|
||||
available_memory = 5 * GiB_bytes
|
||||
@@ -1306,6 +1307,7 @@ def test_mamba_cache_raises_when_max_num_seqs_exceeds_blocks():
|
||||
assert fwd_context is not None
|
||||
|
||||
runner = GPUModelRunner(vllm_config, DEVICE)
|
||||
current_platform.update_block_size_for_backend(vllm_config)
|
||||
kv_cache_spec = runner.get_kv_cache_spec()
|
||||
|
||||
available_memory = 5 * GiB_bytes
|
||||
|
||||
Reference in New Issue
Block a user