[cherry-pick][Bugfix] Disable monolithic TRTLLM MoE for Renormalize routing (#37591)#37605
Signed-off-by: khluu <khluu000@gmail.com>
This commit is contained in:
8
tests/evals/gsm8k/configs/Qwen3.5-35B-A3B-DEP2.yaml
Normal file
8
tests/evals/gsm8k/configs/Qwen3.5-35B-A3B-DEP2.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
model_name: "Qwen/Qwen3.5-35B-A3B"
|
||||
accuracy_threshold: 0.86
|
||||
num_questions: 1319
|
||||
num_fewshot: 5
|
||||
server_args: >-
|
||||
--max-model-len 4096
|
||||
--data-parallel-size 2
|
||||
--enable-expert-parallel
|
||||
9
tests/evals/gsm8k/configs/Qwen3.5-35B-A3B-FP8-DEP2.yaml
Normal file
9
tests/evals/gsm8k/configs/Qwen3.5-35B-A3B-FP8-DEP2.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
model_name: "Qwen/Qwen3.5-35B-A3B-FP8"
|
||||
accuracy_threshold: 0.86
|
||||
num_questions: 1319
|
||||
num_fewshot: 5
|
||||
server_args: >-
|
||||
--max-model-len 4096
|
||||
--data-parallel-size 2
|
||||
--enable-expert-parallel
|
||||
--kv-cache-dtype fp8
|
||||
2
tests/evals/gsm8k/configs/models-qwen35-blackwell.txt
Normal file
2
tests/evals/gsm8k/configs/models-qwen35-blackwell.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
Qwen3.5-35B-A3B-DEP2.yaml
|
||||
Qwen3.5-35B-A3B-FP8-DEP2.yaml
|
||||
Reference in New Issue
Block a user