model_name: nm-testing/Qwen3-30B-A3B-MXFP4A16 accuracy_threshold: 0.88 num_questions: 1319 num_fewshot: 5 server_args: "--enforce-eager --max-model-len 4096"