[kv_offload+HMA][7/N]: Support register_kv_caches for hybrid models (#37853)
Signed-off-by: Or Ozeri <oro@il.ibm.com>
This commit is contained in:
@@ -78,9 +78,10 @@ def get_kv_cache_layout():
|
||||
return cache_layout
|
||||
|
||||
|
||||
def set_kv_cache_layout(cache_layout: KVCacheLayoutType):
|
||||
def set_kv_cache_layout(cache_layout: KVCacheLayoutType | None):
|
||||
global _KV_CACHE_LAYOUT_OVERRIDE
|
||||
_KV_CACHE_LAYOUT_OVERRIDE = cache_layout
|
||||
get_kv_cache_layout.cache_clear()
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
Reference in New Issue
Block a user