10 lines
309 B
YAML
10 lines
309 B
YAML
|
|
# TODO(rob): enable
|
||
|
|
# model_name: "amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV"
|
||
|
|
# accuracy_threshold: 0.62
|
||
|
|
# num_questions: 1319
|
||
|
|
# num_fewshot: 5
|
||
|
|
# server_args: "--enforce-eager --max-model-len 8192 --tensor-parallel-size 2"
|
||
|
|
# env:
|
||
|
|
# VLLM_USE_FLASHINFER_MOE_FP8: "1"
|
||
|
|
# VLLM_FLASHINFER_MOE_BACKEND: "throughput"
|