[Quant][Perf] Use moe_wna16 kernel by default for MoEs with many experts (#13236)

Signed-off-by: mgoin <mgoin64@gmail.com>
2025-02-14 15:53:42 -05:00
parent c9e2d644e7
commit 5e5c8e091e
4 changed files with 39 additions and 26 deletions
--- a/tests/weight_loading/test_weight_loading.py
+++ b/tests/weight_loading/test_weight_loading.py
@@ -12,7 +12,7 @@ MODEL_NAME = os.environ.get("MODEL_NAME",
                            "robertgshaw2/zephyr-7b-beta-channelwise-gptq")
 REVISION = os.environ.get("REVISION", "main")
 QUANTIZATION = os.environ.get("QUANTIZATION", "gptq_marlin")
-MIN_CAPABILITY = os.environ.get("MIN_CAPABILITY", "89")
+MIN_CAPABILITY = os.environ.get("MIN_CAPABILITY", "80")


@pytest.mark.skipif(