[Kernel] Simplify get_kv_cache_layout and cache use_trtllm_attention env-dependent bit (#22735)
Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
@@ -248,19 +248,23 @@ class AttentionMetadataBuilder(abc.ABC, Generic[M]):
|
||||
|
||||
@functools.lru_cache
|
||||
def get_kv_cache_layout():
|
||||
# Format specified by the code.
|
||||
global _KV_CACHE_LAYOUT_OVERRIDE
|
||||
# Override with format specified by the user.
|
||||
|
||||
if _KV_CACHE_LAYOUT_OVERRIDE is not None:
|
||||
cache_layout = _KV_CACHE_LAYOUT_OVERRIDE
|
||||
logger.info_once("`_KV_CACHE_LAYOUT_OVERRIDE` variable detected. " \
|
||||
"Setting KV cache layout to %s.", cache_layout)
|
||||
return cache_layout
|
||||
|
||||
# Format specified by the user.
|
||||
cache_layout = envs.VLLM_KV_CACHE_LAYOUT
|
||||
# When neither the user nor the override specified a layout, get default
|
||||
if cache_layout is None:
|
||||
if envs.VLLM_USE_TRTLLM_ATTENTION:
|
||||
cache_layout = "HND"
|
||||
else:
|
||||
cache_layout = get_kv_connector_cache_layout()
|
||||
cache_layout = get_kv_connector_cache_layout()
|
||||
else:
|
||||
logger.info_once("`VLLM_KV_CACHE_LAYOUT` environment variable " \
|
||||
"detected. Setting KV cache layout to %s.", cache_layout)
|
||||
if _KV_CACHE_LAYOUT_OVERRIDE is not None:
|
||||
cache_layout = _KV_CACHE_LAYOUT_OVERRIDE
|
||||
return cache_layout
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user