diff --git a/tests/kernels/moe/test_cutedsl_moe.py b/tests/kernels/moe/test_cutedsl_moe.py index af1a34d17..66a97b48b 100644 --- a/tests/kernels/moe/test_cutedsl_moe.py +++ b/tests/kernels/moe/test_cutedsl_moe.py @@ -139,7 +139,7 @@ def prepare_inputs( masked_m.append(mask.sum()) masked_m = torch.tensor(masked_m, dtype=torch.int32) - # Intialize the hidden_states_3d with ones instead of empty to avoid nan + # Initialize the hidden_states_3d with ones instead of empty to avoid nan # issue. hidden_states_3d = torch.ones( (num_experts, max(masked_m), hidden_states.shape[1]), dtype=hidden_states.dtype diff --git a/tests/kernels/moe/test_moe_align_block_size.py b/tests/kernels/moe/test_moe_align_block_size.py index 980c72661..4165df37c 100644 --- a/tests/kernels/moe/test_moe_align_block_size.py +++ b/tests/kernels/moe/test_moe_align_block_size.py @@ -334,7 +334,7 @@ def test_batched_moe_align_block_size( ref_expert_ids = torch.empty((Msum // block_size,), dtype=torch.int32) ref_num_tokens_post_pad = torch.empty((1,), dtype=torch.int32) - # Intialize + # Initialize sentinel = E * max_tokens_per_batch ref_sorted_ids.fill_(sentinel) ref_expert_ids.fill_(-1) diff --git a/tests/reasoning/test_hunyuan_reasoning_parser.py b/tests/reasoning/test_hunyuan_reasoning_parser.py index 32e753d2a..493f33f95 100644 --- a/tests/reasoning/test_hunyuan_reasoning_parser.py +++ b/tests/reasoning/test_hunyuan_reasoning_parser.py @@ -12,7 +12,7 @@ START_REASONING = "\n" START_RESPONSE = "\n\n\n" END_RESPONSE = "\n" -NO_REASONING_QUICK_THROUGHT = { +NO_REASONING_QUICK_THOUGHT = { "output": f"{START_REASONING}{START_RESPONSE}This is the rest{END_RESPONSE}", # noqa: E501 "reasoning": None, "content": "This is the rest", @@ -81,7 +81,7 @@ TEST_CASES = [ NO_REASONING, id="no_reasoning", ), - pytest.param(False, NO_REASONING_QUICK_THROUGHT, id="no_reasoning_quick"), + pytest.param(False, NO_REASONING_QUICK_THOUGHT, id="no_reasoning_quick"), pytest.param( False, MULTIPLE_LINES, @@ -117,7 +117,7 @@ TEST_CASES = [ NO_REASONING, id="no_reasoning_streaming", ), - pytest.param(True, NO_REASONING_QUICK_THROUGHT, id="no_reasoning_quick_stream"), + pytest.param(True, NO_REASONING_QUICK_THOUGHT, id="no_reasoning_quick_stream"), pytest.param( True, MULTIPLE_LINES, diff --git a/vllm/model_executor/layers/fused_moe/oracle/fp8.py b/vllm/model_executor/layers/fused_moe/oracle/fp8.py index bc0fc9a88..b94e4637e 100644 --- a/vllm/model_executor/layers/fused_moe/oracle/fp8.py +++ b/vllm/model_executor/layers/fused_moe/oracle/fp8.py @@ -333,7 +333,7 @@ def select_fp8_moe_backend( # TODO(rob): per discussion with TPU team, we need a way to register # MoE backends by OOT plugins, rather than having an explicit list - # of AVAILBLE_BACKENDS. Enabling returning `Fp8MoeBackend.NONE` is + # of AVAILABLE_BACKENDS. Enabling returning `Fp8MoeBackend.NONE` is # a temporary measure until these register APIs are complete. if current_platform.is_cuda() or current_platform.is_rocm(): raise NotImplementedError( diff --git a/vllm/v1/attention/ops/flashmla.py b/vllm/v1/attention/ops/flashmla.py index 01ced4aba..aa667570a 100644 --- a/vllm/v1/attention/ops/flashmla.py +++ b/vllm/v1/attention/ops/flashmla.py @@ -52,8 +52,8 @@ def is_flashmla_dense_supported() -> tuple[bool, str | None]: """ Return: is_supported_flag, unsupported_reason (optional). """ - is_availble, maybe_reason = _is_flashmla_available() - if not is_availble: + is_available, maybe_reason = _is_flashmla_available() + if not is_available: return False, maybe_reason if not current_platform.is_device_capability_family(90): return False, "FlashMLA Dense is only supported on Hopper devices." @@ -64,8 +64,8 @@ def is_flashmla_sparse_supported() -> tuple[bool, str | None]: """ Return: is_supported_flag, unsupported_reason (optional). """ - is_availble, maybe_reason = _is_flashmla_available() - if not is_availble: + is_available, maybe_reason = _is_flashmla_available() + if not is_available: return False, maybe_reason if not ( current_platform.is_device_capability_family(90)