diff --git a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py index def1ec9dc..8c8439dec 100644 --- a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py @@ -381,7 +381,6 @@ class AiterExperts(mk.FusedMoEPermuteExpertsUnpermute): # TODO(rob): rocm_aiter_fused_experts uses self.quant_config's # a_scales for static quantization. Update this to fit better # with the interface once all quant integrations are complete. - assert a2_scale == self.quant_config.a2_scale if expert_tokens_meta is not None: num_local_tokens = expert_tokens_meta.expert_num_tokens