[Mypy] Fix mypy for vllm/model_executor (except vllm/model_executor/layers) (#37904)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -96,6 +96,7 @@ def sparse_attn_indexer(
|
||||
topk_indices_buffer[: hidden_states.shape[0]] = -1
|
||||
if has_prefill:
|
||||
prefill_metadata = attn_metadata.prefill
|
||||
assert prefill_metadata is not None
|
||||
|
||||
# Get the full shared workspace buffers once (will allocate on first use)
|
||||
workspace_manager = current_workspace_manager()
|
||||
@@ -170,6 +171,8 @@ def sparse_attn_indexer(
|
||||
|
||||
if has_decode:
|
||||
decode_metadata = attn_metadata.decode
|
||||
assert decode_metadata is not None
|
||||
# kv_cache shape [
|
||||
# kv_cache size requirement [num_block, block_size, n_head, head_dim],
|
||||
# we only have [num_block, block_size, head_dim],
|
||||
kv_cache = kv_cache.unsqueeze(-2)
|
||||
|
||||
Reference in New Issue
Block a user