diff --git a/tests/test_moe_nan_b200.py b/tests/test_moe_nan_b200.py index 385bfb51..d25431a1 100644 --- a/tests/test_moe_nan_b200.py +++ b/tests/test_moe_nan_b200.py @@ -27,7 +27,7 @@ MODEL = "/root/nvidia-meeting/DeepSeek-V4-Pro-NVFP4" DEV = "cuda:0" H = 7168 -INTERMEDIATE = 18432 # DeepSeek-V4 MoE intermediate +INTERMEDIATE = 3072 # DeepSeek-V4 MoE intermediate NUM_EXPERTS = 384 TOPK = 6 EPS = 1e-6