fix some typos (#24071)
Signed-off-by: co63oc <co63oc@users.noreply.github.com>
This commit is contained in:
@@ -167,7 +167,7 @@ class FlashAttentionMetadataBuilder(
|
||||
# work for mixed prefill-decode and uniform-decode. But for non-spec decodes
|
||||
# the graphs would not work for mixed prefill-decode; sorta the inverse
|
||||
# of UNIFORM_SINGLE_TOKEN_DECODE.
|
||||
# Theres probably a better way to describe this using `AttentionCGSupport`
|
||||
# There's probably a better way to describe this using `AttentionCGSupport`
|
||||
# but for now just set it to `UNIFORM_BATCH` to get use to drop down
|
||||
# to FULL_AND_PIECEWISE.
|
||||
# TODO(luka, lucas): audit FA2 as part of:
|
||||
|
||||
@@ -291,7 +291,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
|
||||
paged_kv_indices_buffer=paged_kv_indices,
|
||||
paged_kv_last_page_len_buffer=paged_kv_last_page_len,
|
||||
# Tensor cores are enabled by default because the perf would be
|
||||
# atleast as good as cuda cores for all attention ops in latest
|
||||
# at least as good as cuda cores for all attention ops in latest
|
||||
# gpus.
|
||||
use_tensor_cores=True,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user