[kv_offload+HMA][7/N]: Support register_kv_caches for hybrid models (#37853)

Signed-off-by: Or Ozeri <oro@il.ibm.com>
This commit is contained in:
Or Ozeri
2026-03-27 08:38:33 +03:00
committed by GitHub
parent 999dfc1622
commit 7cc302dd87
13 changed files with 1476 additions and 758 deletions

View File

@@ -78,9 +78,10 @@ def get_kv_cache_layout():
return cache_layout
def set_kv_cache_layout(cache_layout: KVCacheLayoutType):
def set_kv_cache_layout(cache_layout: KVCacheLayoutType | None):
global _KV_CACHE_LAYOUT_OVERRIDE
_KV_CACHE_LAYOUT_OVERRIDE = cache_layout
get_kv_cache_layout.cache_clear()
@dataclass