[Feature] Add support for MoE models in the calibration-free RTN-based quantization (#20766)

Signed-off-by: Alex Kogan <alex.kogan@oracle.com>
This commit is contained in:
Alex Kogan
2025-07-25 21:09:34 -04:00
committed by GitHub
parent f1b286b2fb
commit 7ae75fa6d0
2 changed files with 201 additions and 38 deletions

View File

@@ -8,7 +8,10 @@ import pytest
from tests.quantization.utils import is_quant_method_supported
MODELS = ["microsoft/Phi-3-mini-4k-instruct"]
MODELS = [
"microsoft/Phi-3-mini-4k-instruct", # dense model
"ai21labs/Jamba-tiny-dev", # MoE model
]
@pytest.mark.skipif(not is_quant_method_supported("rtn"),