[Quant][Perf] Use moe_wna16 kernel by default for MoEs with many experts (#13236)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-02-14 15:53:42 -05:00
committed by GitHub
parent c9e2d644e7
commit 5e5c8e091e
4 changed files with 39 additions and 26 deletions

View File

@@ -12,7 +12,7 @@ MODEL_NAME = os.environ.get("MODEL_NAME",
"robertgshaw2/zephyr-7b-beta-channelwise-gptq")
REVISION = os.environ.get("REVISION", "main")
QUANTIZATION = os.environ.get("QUANTIZATION", "gptq_marlin")
MIN_CAPABILITY = os.environ.get("MIN_CAPABILITY", "89")
MIN_CAPABILITY = os.environ.get("MIN_CAPABILITY", "80")
@pytest.mark.skipif(