diff --git a/vllm/envs.py b/vllm/envs.py index 8c6eef3e7..02fcd998a 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -106,7 +106,7 @@ if TYPE_CHECKING: VLLM_ROCM_USE_AITER_MLA: bool = True VLLM_ROCM_USE_AITER_MHA: bool = True VLLM_ROCM_USE_AITER_FP4_ASM_GEMM: bool = False - VLLM_ROCM_USE_AITER_TRITON_ROPE: bool = True + VLLM_ROCM_USE_AITER_TRITON_ROPE: bool = False VLLM_ROCM_USE_AITER_FP8BMM: bool = True VLLM_ROCM_USE_AITER_FP4BMM: bool = True VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION: bool = False @@ -949,9 +949,9 @@ environment_variables: dict[str, Callable[[], Any]] = { os.getenv("VLLM_ROCM_USE_AITER_FP4_ASM_GEMM", "False").lower() in ("true", "1") ), # Whether to use aiter rope. - # By default is enabled. + # By default is disabled. "VLLM_ROCM_USE_AITER_TRITON_ROPE": lambda: ( - os.getenv("VLLM_ROCM_USE_AITER_TRITON_ROPE", "True").lower() in ("true", "1") + os.getenv("VLLM_ROCM_USE_AITER_TRITON_ROPE", "False").lower() in ("true", "1") ), # Whether to use aiter triton fp8 bmm kernel # By default is enabled. diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index ab4c3e074..94675e3c9 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -592,7 +592,6 @@ class RocmPlatform(Platform): use_aiter_rms_norm = rocm_aiter_ops.is_rmsnorm_enabled() use_aiter_fp8_linear = rocm_aiter_ops.is_linear_fp8_enabled() use_aiter_fused_se = rocm_aiter_ops.is_fusion_moe_shared_experts_enabled() - use_aiter_triton_rope = rocm_aiter_ops.is_triton_rotary_embed_enabled() # Aiter rms norm perform best when CUDA Graph capture is enabled. if ( use_aiter_rms_norm @@ -619,9 +618,9 @@ class RocmPlatform(Platform): and "-grouped_topk" not in compilation_config.custom_ops ): compilation_config.custom_ops.append("+grouped_topk") - # Enable rotary embedding when using AITER if its not disabled by user + # Enable rotary embedding customop when using AITER if not disabled by user if ( - use_aiter_triton_rope + rocm_aiter_ops.is_enabled() and "+rotary_embedding" not in compilation_config.custom_ops and "-rotary_embedding" not in compilation_config.custom_ops ):