diff --git a/tests/kernels/moe/test_cutedsl_moe.py b/tests/kernels/moe/test_cutedsl_moe.py
index af1a34d17..66a97b48b 100644
--- a/tests/kernels/moe/test_cutedsl_moe.py
+++ b/tests/kernels/moe/test_cutedsl_moe.py
@@ -139,7 +139,7 @@ def prepare_inputs(
masked_m.append(mask.sum())
masked_m = torch.tensor(masked_m, dtype=torch.int32)
- # Intialize the hidden_states_3d with ones instead of empty to avoid nan
+ # Initialize the hidden_states_3d with ones instead of empty to avoid nan
# issue.
hidden_states_3d = torch.ones(
(num_experts, max(masked_m), hidden_states.shape[1]), dtype=hidden_states.dtype
diff --git a/tests/kernels/moe/test_moe_align_block_size.py b/tests/kernels/moe/test_moe_align_block_size.py
index 980c72661..4165df37c 100644
--- a/tests/kernels/moe/test_moe_align_block_size.py
+++ b/tests/kernels/moe/test_moe_align_block_size.py
@@ -334,7 +334,7 @@ def test_batched_moe_align_block_size(
ref_expert_ids = torch.empty((Msum // block_size,), dtype=torch.int32)
ref_num_tokens_post_pad = torch.empty((1,), dtype=torch.int32)
- # Intialize
+ # Initialize
sentinel = E * max_tokens_per_batch
ref_sorted_ids.fill_(sentinel)
ref_expert_ids.fill_(-1)
diff --git a/tests/reasoning/test_hunyuan_reasoning_parser.py b/tests/reasoning/test_hunyuan_reasoning_parser.py
index 32e753d2a..493f33f95 100644
--- a/tests/reasoning/test_hunyuan_reasoning_parser.py
+++ b/tests/reasoning/test_hunyuan_reasoning_parser.py
@@ -12,7 +12,7 @@ START_REASONING = "\n"
START_RESPONSE = "\n\n\n"
END_RESPONSE = "\n"
-NO_REASONING_QUICK_THROUGHT = {
+NO_REASONING_QUICK_THOUGHT = {
"output": f"{START_REASONING}{START_RESPONSE}This is the rest{END_RESPONSE}", # noqa: E501
"reasoning": None,
"content": "This is the rest",
@@ -81,7 +81,7 @@ TEST_CASES = [
NO_REASONING,
id="no_reasoning",
),
- pytest.param(False, NO_REASONING_QUICK_THROUGHT, id="no_reasoning_quick"),
+ pytest.param(False, NO_REASONING_QUICK_THOUGHT, id="no_reasoning_quick"),
pytest.param(
False,
MULTIPLE_LINES,
@@ -117,7 +117,7 @@ TEST_CASES = [
NO_REASONING,
id="no_reasoning_streaming",
),
- pytest.param(True, NO_REASONING_QUICK_THROUGHT, id="no_reasoning_quick_stream"),
+ pytest.param(True, NO_REASONING_QUICK_THOUGHT, id="no_reasoning_quick_stream"),
pytest.param(
True,
MULTIPLE_LINES,
diff --git a/vllm/model_executor/layers/fused_moe/oracle/fp8.py b/vllm/model_executor/layers/fused_moe/oracle/fp8.py
index bc0fc9a88..b94e4637e 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/fp8.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/fp8.py
@@ -333,7 +333,7 @@ def select_fp8_moe_backend(
# TODO(rob): per discussion with TPU team, we need a way to register
# MoE backends by OOT plugins, rather than having an explicit list
- # of AVAILBLE_BACKENDS. Enabling returning `Fp8MoeBackend.NONE` is
+ # of AVAILABLE_BACKENDS. Enabling returning `Fp8MoeBackend.NONE` is
# a temporary measure until these register APIs are complete.
if current_platform.is_cuda() or current_platform.is_rocm():
raise NotImplementedError(
diff --git a/vllm/v1/attention/ops/flashmla.py b/vllm/v1/attention/ops/flashmla.py
index 01ced4aba..aa667570a 100644
--- a/vllm/v1/attention/ops/flashmla.py
+++ b/vllm/v1/attention/ops/flashmla.py
@@ -52,8 +52,8 @@ def is_flashmla_dense_supported() -> tuple[bool, str | None]:
"""
Return: is_supported_flag, unsupported_reason (optional).
"""
- is_availble, maybe_reason = _is_flashmla_available()
- if not is_availble:
+ is_available, maybe_reason = _is_flashmla_available()
+ if not is_available:
return False, maybe_reason
if not current_platform.is_device_capability_family(90):
return False, "FlashMLA Dense is only supported on Hopper devices."
@@ -64,8 +64,8 @@ def is_flashmla_sparse_supported() -> tuple[bool, str | None]:
"""
Return: is_supported_flag, unsupported_reason (optional).
"""
- is_availble, maybe_reason = _is_flashmla_available()
- if not is_availble:
+ is_available, maybe_reason = _is_flashmla_available()
+ if not is_available:
return False, maybe_reason
if not (
current_platform.is_device_capability_family(90)