Signed-off-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <robshaw@redhat.com>
10 lines
309 B
YAML
10 lines
309 B
YAML
# TODO(rob): enable
|
|
# model_name: "amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV"
|
|
# accuracy_threshold: 0.62
|
|
# num_questions: 1319
|
|
# num_fewshot: 5
|
|
# server_args: "--enforce-eager --max-model-len 8192 --tensor-parallel-size 2"
|
|
# env:
|
|
# VLLM_USE_FLASHINFER_MOE_FP8: "1"
|
|
# VLLM_FLASHINFER_MOE_BACKEND: "throughput"
|