[Feature] Add support for MoE models in the calibration-free RTN-based quantization (#20766)

Signed-off-by: Alex Kogan <alex.kogan@oracle.com>
2025-07-25 21:09:34 -04:00
parent f1b286b2fb
commit 7ae75fa6d0
2 changed files with 201 additions and 38 deletions
--- a/tests/quantization/test_rtn.py
+++ b/tests/quantization/test_rtn.py
@@ -8,7 +8,10 @@ import pytest

 from tests.quantization.utils import is_quant_method_supported

-MODELS = ["microsoft/Phi-3-mini-4k-instruct"]
+MODELS = [
+    "microsoft/Phi-3-mini-4k-instruct",  # dense model
+    "ai21labs/Jamba-tiny-dev",  # MoE model
+]


@pytest.mark.skipif(not is_quant_method_supported("rtn"),