model_name: "nm-testing/Qwen1.5-MoE-A2.7B-Chat-quantized.w4a16"
accuracy_threshold: 0.45
num_questions: 1319
num_fewshot: 5
max_model_len: 4096
# Duo stream incompatabilbe with this model: https://github.com/vllm-project/vllm/issues/28220
env:
  VLLM_DISABLE_SHARED_EXPERTS_STREAM: "1"