diff --git a/.buildkite/lm-eval-harness/configs/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.yaml b/.buildkite/lm-eval-harness/configs/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.yaml new file mode 100644 index 000000000..c1dbaef62 --- /dev/null +++ b/.buildkite/lm-eval-harness/configs/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.yaml @@ -0,0 +1,15 @@ +model_name: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16" +tasks: +- name: "gsm8k" + metrics: + - name: "exact_match,strict-match" + value: 0.695 + - name: "exact_match,flexible-extract" + value: 0.447 +limit: 1319 +num_fewshot: 5 +max_model_len: 262144 +enforce_eager: false +apply_chat_template: true +fewshot_as_multiturn: true +trust_remote_code: true \ No newline at end of file diff --git a/.buildkite/lm-eval-harness/configs/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8.yaml b/.buildkite/lm-eval-harness/configs/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8.yaml new file mode 100644 index 000000000..a87328fcd --- /dev/null +++ b/.buildkite/lm-eval-harness/configs/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8.yaml @@ -0,0 +1,19 @@ +model_name: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8" +tasks: +- name: "gsm8k" + metrics: + - name: "exact_match,strict-match" + value: 0.7142 + - name: "exact_match,flexible-extract" + value: 0.4579 +env_vars: + VLLM_USE_FLASHINFER_MOE_FP8: "1" + VLLM_FLASHINFER_MOE_BACKEND: "throughput" +limit: 1319 +num_fewshot: 5 +max_model_len: 262144 +kv_cache_dtype: fp8 +enforce_eager: false +apply_chat_template: true +fewshot_as_multiturn: true +trust_remote_code: true diff --git a/.buildkite/lm-eval-harness/configs/models-large-hopper.txt b/.buildkite/lm-eval-harness/configs/models-large-hopper.txt index 5552391d9..2b6c0b5e6 100644 --- a/.buildkite/lm-eval-harness/configs/models-large-hopper.txt +++ b/.buildkite/lm-eval-harness/configs/models-large-hopper.txt @@ -1 +1,2 @@ Qwen3-235B-A22B-Instruct-2507-FP8.yaml +NVIDIA-Nemotron-3-Nano-30B-A3B-FP8.yaml diff --git a/.buildkite/lm-eval-harness/configs/models-large.txt b/.buildkite/lm-eval-harness/configs/models-large.txt index 37eeac85c..385031b74 100644 --- a/.buildkite/lm-eval-harness/configs/models-large.txt +++ b/.buildkite/lm-eval-harness/configs/models-large.txt @@ -3,3 +3,4 @@ Meta-Llama-3-70B-Instruct.yaml Mixtral-8x7B-Instruct-v0.1.yaml Qwen2-57B-A14-Instruct.yaml DeepSeek-V2-Lite-Chat.yaml +NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.yaml diff --git a/tests/config/base_model_arch_groundtruth.json b/tests/config/base_model_arch_groundtruth.json index 3401198ad..81534886d 100644 --- a/tests/config/base_model_arch_groundtruth.json +++ b/tests/config/base_model_arch_groundtruth.json @@ -355,5 +355,22 @@ "is_deepseek_mla": true, "is_multimodal_model": false, "dtype": "torch.float32" + }, + "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": { + "architectures": [ + "NemotronHForCausalLM" + ], + "model_type": "nemotron_h", + "text_model_type": "nemotron_h", + "hidden_size": 2688, + "total_num_hidden_layers": 52, + "total_num_attention_heads": 32, + "head_size": 128, + "vocab_size": 131072, + "total_num_kv_heads": 2, + "num_experts": 128, + "is_deepseek_mla": false, + "is_multimodal_model": false, + "dtype": "torch.bfloat16" } } diff --git a/tests/config/test_model_arch_config.py b/tests/config/test_model_arch_config.py index f28ed1733..fbae31331 100644 --- a/tests/config/test_model_arch_config.py +++ b/tests/config/test_model_arch_config.py @@ -14,6 +14,7 @@ from vllm.transformers_utils.model_arch_config_convertor import ( BASE_TRUST_REMOTE_CODE_MODELS = { "nvidia/Llama-3_3-Nemotron-Super-49B-v1", + "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "XiaomiMiMo/MiMo-7B-RL", # Excluded: Not available online right now # "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1",