9 lines
286 B
YAML
9 lines
286 B
YAML
model_name: "nm-testing/Qwen1.5-MoE-A2.7B-Chat-quantized.w4a16"
|
|
accuracy_threshold: 0.45
|
|
num_questions: 1319
|
|
num_fewshot: 5
|
|
max_model_len: 4096
|
|
# Duo stream incompatabilbe with this model: https://github.com/vllm-project/vllm/issues/28220
|
|
env:
|
|
VLLM_DISABLE_SHARED_EXPERTS_STREAM: "1"
|