[ROCm] Improve error handling while loading quantized model on gfx120… (#31715)

Signed-off-by: brian033 <85883730+brian033@users.noreply.github.com> Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
2026-01-15 20:16:00 +08:00
parent 28459785ff
commit b89275d018
1 changed files with 5 additions and 1 deletions
--- a/vllm/model_executor/layers/quantization/quark/schemes/quark_ocp_mx.py
+++ b/vllm/model_executor/layers/quantization/quark/schemes/quark_ocp_mx.py
@@ -153,7 +153,11 @@ try:
        fake_impl=gemm_with_dynamic_quant_fake,
        dispatch_key=current_platform.dispatch_key,
    )
-except (ImportError, AttributeError):
+except (ImportError, AttributeError, RuntimeError):
    logger.warning(
        "AITER is not found or QuarkOCP_MX is not supported on the current "
        "platform. QuarkOCP_MX quantization will not be available."
    )
    dynamic_mxfp4_quant = gemm_afp4wfp4 = None