[Bugfix][CI] fix typos (#34934)

Signed-off-by: 1195343015 <1195343015@qq.com> Signed-off-by: Jiayi Yan <66017932+1195343015@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-03-06 01:05:46 +08:00
parent 8c760b6ab6
commit 6a895197fa
98 changed files with 227 additions and 366 deletions
--- a/vllm/v1/attention/backends/cpu_attn.py
+++ b/vllm/v1/attention/backends/cpu_attn.py
@@ -174,7 +174,7 @@ class CPUAttentionMetadataBuilder(AttentionMetadataBuilder[CPUAttentionMetadata]
            query_start_loc = query_start_loc[: num_decodes + 1]
            block_table_tensor = block_table_tensor[:num_decodes]

-        sheduler_metadata = ops.cpu_attn_get_scheduler_metadata(
+        scheduler_metadata = ops.cpu_attn_get_scheduler_metadata(
            num_reqs=num_reqs,
            num_heads=self.num_heads,
            num_kv_heads=self.num_kv_heads,
@@ -197,7 +197,7 @@ class CPUAttentionMetadataBuilder(AttentionMetadataBuilder[CPUAttentionMetadata]
            seq_lens=seq_lens,
            block_table=block_table_tensor,
            slot_mapping=slot_mapping,
-            scheduler_metadata=sheduler_metadata,
+            scheduler_metadata=scheduler_metadata,
            causal=causal,
            use_sdpa_prefill=self.use_sdpa_prefill,
            num_decode_tokens=num_decode_tokens,
--- a/vllm/v1/attention/backends/mamba_attn.py
+++ b/vllm/v1/attention/backends/mamba_attn.py
@@ -383,7 +383,7 @@ class BaseMambaAttentionMetadataBuilder(AttentionMetadataBuilder[M], abc.ABC):

            # Return a tensor of shape (#requests, #max blocks)
            state_indices_tensor = common_attn_metadata.block_table_tensor
-            # Additional cache-related varaiables:
+            # Additional cache-related variables:
            mamba_block_size = self.kv_cache_spec.block_size
            (
                block_idx_last_computed_token,
--- a/vllm/v1/attention/backends/mla/flashmla_sparse.py
+++ b/vllm/v1/attention/backends/mla/flashmla_sparse.py
@@ -49,14 +49,14 @@ if TYPE_CHECKING:

 logger = init_logger(__name__)

-# For FP8 sparse attention we have two impelementations:
+# For FP8 sparse attention we have two implementations:
 # 1. Mixed batch mode: use the FP8 decode kernel for both prefill and decode this is
 #    done by treating all tokens as single batch.
 # 2. Separate prefill and decode mode: use the BF16 prefill kernel for prefill
 #    (upconverting the FP8 cache to BF16 then calling the prefill kernel) and using
 #    the FP8 decode kernel for decode.
 # Currently we use #1 when the number of heads per rank is low (i.e. TP) since the BF16
-# prefill kernel requires padding the numer of heads to 128 while the decode does not
+# prefill kernel requires padding the number of heads to 128 while the decode does not
 # so when the per ranke head count is below MIN_HEADS_FOR_BF16_PREFILL we use the mixed
 # batch mode (#2).
 MIN_HEADS_FOR_BF16_PREFILL = 32
@@ -126,7 +126,7 @@ class FlashMLASparseBackend(AttentionBackend):
        cache_dtype_str: str = "auto",
    ) -> tuple[int, ...]:
        if cache_dtype_str == "fp8_ds_mla":
-            # custom storage fromat is 656 bytes
+            # custom storage format is 656 bytes
            #  see FlashMLA readme.md for details
            return (num_blocks, block_size, 656)
        else:
--- a/vllm/v1/attention/backends/rocm_aiter_fa.py
+++ b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -370,7 +370,7 @@ class AiterFlashAttentionMetadata:
    slot_mapping: torch.Tensor
    block_table: torch.Tensor

-    # prefill and deocde split
+    # prefill and decode split
    num_decodes: int
    num_decode_tokens: int
    num_prefills: int
@@ -1099,7 +1099,7 @@ class AiterFlashAttentionImpl(AttentionImpl):
                extend_tokens_slice = slice(
                    num_decode_tokens, num_decode_tokens + num_extend_tokens
                )
-                extend_querys = query[extend_tokens_slice]
+                extend_queries = query[extend_tokens_slice]
                extend_keys = key[extend_tokens_slice]
                extend_values = value[extend_tokens_slice]
                extend_outputs = output[extend_tokens_slice]
@@ -1110,7 +1110,7 @@ class AiterFlashAttentionImpl(AttentionImpl):
                    v_scale = attn_metadata.v_scale
                self.extend_forward(
                    attn_metadata=attn_metadata,
-                    query=extend_querys,
+                    query=extend_queries,
                    key=extend_keys,
                    value=extend_values,
                    key_cache=key_cache,