From 180fba653ead2274bfcd3951a275f4d6cf9ade04 Mon Sep 17 00:00:00 2001 From: Divakar Verma <137818590+divakar-amd@users.noreply.github.com> Date: Wed, 21 Jan 2026 12:41:11 -0600 Subject: [PATCH] [ROCm] fix import for on_gfx9 (#32783) Signed-off-by: Divakar Verma --- .../model_executor/layers/fused_moe/fused_batched_moe.py | 9 ++++++++- vllm/model_executor/layers/fused_moe/fused_moe.py | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py index 8e45c0e41..509bacfbc 100644 --- a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py @@ -913,7 +913,14 @@ class BatchedTritonExperts(mk.FusedMoEPermuteExpertsUnpermute): activation_key: QuantKey | None, ) -> bool: p = current_platform - device_supports_fp8 = (p.is_rocm() and p.rocm.on_gfx9()) or ( + if p.is_rocm(): + from vllm.platforms.rocm import on_gfx9 + + is_rocm_on_gfx9 = on_gfx9() + else: + is_rocm_on_gfx9 = False + + device_supports_fp8 = is_rocm_on_gfx9 or ( p.is_cuda() and p.has_device_capability((8, 9)) ) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 7e7d59fb9..669a6e74b 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -1922,7 +1922,14 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute): activation_key: QuantKey | None, ) -> bool: p = current_platform - device_supports_fp8 = (p.is_rocm() and p.rocm.on_gfx9()) or ( + if p.is_rocm(): + from vllm.platforms.rocm import on_gfx9 + + is_rocm_on_gfx9 = on_gfx9() + else: + is_rocm_on_gfx9 = False + + device_supports_fp8 = is_rocm_on_gfx9 or ( p.is_cuda() and p.has_device_capability((8, 9)) )