diff --git a/vllm/_aiter_ops.py b/vllm/_aiter_ops.py index 0eae279ac..03e3bb759 100644 --- a/vllm/_aiter_ops.py +++ b/vllm/_aiter_ops.py @@ -761,7 +761,7 @@ class rocm_aiter_ops: @classmethod @if_aiter_supported - def is_linear_fp8_enaled(cls) -> bool: + def is_linear_fp8_enabled(cls) -> bool: return cls.is_linear_enabled() @classmethod diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py index ee99572f5..758a54c10 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py @@ -61,7 +61,7 @@ class CompressedTensorsW8A8Fp8(CompressedTensorsScheme): ) self.cutlass_block_fp8_supported = cutlass_block_fp8_supported() - self.use_aiter_and_is_supported = rocm_aiter_ops.is_linear_fp8_enaled() + self.use_aiter_and_is_supported = rocm_aiter_ops.is_linear_fp8_enabled() if self.weight_block_size is not None: assert not self.is_static_input_scheme diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 1f770d6d8..d19b20798 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -414,7 +414,7 @@ class Fp8LinearMethod(LinearMethodBase): if vllm_is_batch_invariant(): self.use_marlin = False - self.use_aiter_and_is_supported = rocm_aiter_ops.is_linear_fp8_enaled() + self.use_aiter_and_is_supported = rocm_aiter_ops.is_linear_fp8_enabled() self.use_deep_gemm = is_deep_gemm_supported() self.weight_block_size = self.quant_config.weight_block_size diff --git a/vllm/model_executor/layers/quantization/input_quant_fp8.py b/vllm/model_executor/layers/quantization/input_quant_fp8.py index a5db086fb..7994c838a 100644 --- a/vllm/model_executor/layers/quantization/input_quant_fp8.py +++ b/vllm/model_executor/layers/quantization/input_quant_fp8.py @@ -51,7 +51,7 @@ class QuantFP8(CustomOp): self.column_major_scales = column_major_scales self.use_ue8m0 = use_ue8m0 - self.use_aiter = rocm_aiter_ops.is_linear_fp8_enaled() + self.use_aiter = rocm_aiter_ops.is_linear_fp8_enabled() self.is_group_quant = group_shape.is_per_group() if self.is_group_quant: diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 5892639eb..b95287906 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -408,7 +408,7 @@ class RocmPlatform(Platform): parallel_config = vllm_config.parallel_config is_eager_execution = compilation_config == CUDAGraphMode.NONE use_aiter_rms_norm = rocm_aiter_ops.is_rmsnorm_enabled() - use_aiter_fp8_linear = rocm_aiter_ops.is_linear_fp8_enaled() + use_aiter_fp8_linear = rocm_aiter_ops.is_linear_fp8_enabled() if compilation_config.cudagraph_mode.has_full_cudagraphs(): # decode context parallel does not support full cudagraphs