Files
vllm/.buildkite/lm-eval-harness/configs/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8.yaml
2026-02-03 08:52:49 -05:00

20 lines
460 B
YAML

model_name: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8"
tasks:
- name: "gsm8k"
metrics:
- name: "exact_match,strict-match"
value: 0.7142
- name: "exact_match,flexible-extract"
value: 0.4579
env_vars:
VLLM_USE_FLASHINFER_MOE_FP8: "1"
VLLM_FLASHINFER_MOE_BACKEND: "throughput"
limit: 1319
num_fewshot: 5
max_model_len: 262144
kv_cache_dtype: fp8
enforce_eager: false
apply_chat_template: true
fewshot_as_multiturn: true
trust_remote_code: true