diff --git a/tests/kernels/moe/modular_kernel_tools/common.py b/tests/kernels/moe/modular_kernel_tools/common.py index 6078ce44c..537dcae4e 100644 --- a/tests/kernels/moe/modular_kernel_tools/common.py +++ b/tests/kernels/moe/modular_kernel_tools/common.py @@ -258,16 +258,16 @@ class Config: f"{self.fe_supported_types()}." ) - # Check block quanization support - is_block_quatized = self.quant_block_shape is not None - if is_block_quatized and self.quant_dtype is None: + # Check block quantization support + is_block_quantized = self.quant_block_shape is not None + if is_block_quantized and self.quant_dtype is None: return False, "No block quantization support." - if is_block_quatized and not self.is_block_quant_supported(): + if is_block_quantized and not self.is_block_quant_supported(): return False, "Mismatched block quantization support." # deep_gemm only works with block-quantized - if self.needs_deep_gemm() and not is_block_quatized: + if self.needs_deep_gemm() and not is_block_quantized: return False, "Needs DeepGEMM but not block quantized." # Check dependencies (turn into asserts?) diff --git a/tests/quantization/test_fp8.py b/tests/quantization/test_fp8.py index c9ab24fd4..a4b6d3598 100644 --- a/tests/quantization/test_fp8.py +++ b/tests/quantization/test_fp8.py @@ -217,7 +217,7 @@ def test_scaled_fp8_quant(dtype) -> None: ref_y, inv_scale = ops.scaled_fp8_quant(x, None) ref_y = per_tensor_dequantize(ref_y, inv_scale, dtype) - # Reference dynamic quantizaton + # Reference dynamic quantization y = quantize_ref(x, inv_scale) torch.testing.assert_close(ref_y, per_tensor_dequantize(y, inv_scale, dtype)) diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py index bcda46421..56c9ca361 100644 --- a/vllm/utils/deep_gemm.py +++ b/vllm/utils/deep_gemm.py @@ -389,7 +389,7 @@ def should_use_deepgemm_for_fp8_linear( # Verify DeepGEMM N/K dims requirements # NOTE: Also synchronized with test_w8a8_block_fp8_deep_gemm_matmul - # test inside kernels/quatization/test_block_fp8.py + # test inside kernels/quantization/test_block_fp8.py N_MULTIPLE = 64 K_MULTIPLE = 128