[Bugfix][Minor] Fix potential NameError in mamba backend selector and misc typos (#35886)
Signed-off-by: Li <chuali@amd.com>
This commit is contained in:
committed by
GitHub
parent
e054f152fa
commit
cb2263218e
@@ -749,7 +749,10 @@ class FlexAttentionMetadataBuilder(AttentionMetadataBuilder[FlexAttentionMetadat
|
||||
prefix_kv_lens = None
|
||||
suffix_kv_lens = None
|
||||
if use_cascade:
|
||||
raise NotImplementedError("Not yet my friend")
|
||||
raise NotImplementedError(
|
||||
"Cascade prefix attention is not yet implemented "
|
||||
"for FlexAttention backend"
|
||||
)
|
||||
|
||||
block_size = self.kv_cache_spec.block_size
|
||||
max_possible_seq_len = self.model_config.max_model_len
|
||||
|
||||
@@ -253,7 +253,7 @@ def make_local_attention_virtual_batches(
|
||||
# seqlens_q_local = [2, 2, 1, 4, 4, 1, 4, 1]
|
||||
#
|
||||
# First Get batched arange. (E.g., [2, 4, 2] -> [0, 1, 0, 1, 2, 3, 0, 1])
|
||||
# (TODO: max a utility to share this code with _prepare_inputs)
|
||||
# (TODO: make a utility to share this code with _prepare_inputs)
|
||||
# arange step 1. [2, 4, 2] -> [2, 6, 8]
|
||||
cu_num_blocks = np.cumsum(local_blocks)
|
||||
virtual_batches = cu_num_blocks[-1]
|
||||
|
||||
@@ -149,8 +149,8 @@ def _cached_get_mamba_attn_backend(
|
||||
selected_backend = MambaAttentionBackendEnum[backend_name]
|
||||
except KeyError as e:
|
||||
raise ValueError(
|
||||
f"Invalid mamba attention backend type: '{backend_name}'. Valid "
|
||||
f"backends are: {list(MambaAttentionBackendEnum.__members__.keys())}"
|
||||
f"Invalid mamba attention backend type: '{mamba_type}'. Valid "
|
||||
f"types are: {list(MAMBA_TYPE_TO_BACKEND_MAP.keys())}"
|
||||
) from e
|
||||
|
||||
mamba_attn_backend = selected_backend.get_class()
|
||||
|
||||
Reference in New Issue
Block a user