diff --git a/tests/test_pipeline_real_weights.py b/tests/test_pipeline_real_weights.py index 30687b8d..cf2ce97c 100644 --- a/tests/test_pipeline_real_weights.py +++ b/tests/test_pipeline_real_weights.py @@ -14,6 +14,7 @@ NUM_TOKENS = 8 TOP_K = 6 SWIGLU_LIMIT = 10.0 DEVICE = "cuda" +MAX_NUM_TOKENS = 8192 # match vLLM config def load_layer_tensors(model_dir, layer_idx): @@ -126,7 +127,7 @@ def main(): runner = CuTeDSLMoERunner( num_experts=NUM_EXPERTS, hidden_size=HIDDEN_SIZE, - intermediate_size=INTERMEDIATE_SIZE, max_num_tokens=NUM_TOKENS, + intermediate_size=INTERMEDIATE_SIZE, max_num_tokens=MAX_NUM_TOKENS, top_k=TOP_K, device=DEVICE, ) runner.l1_fp4 = l1_fp4; runner.l1_sf = l1_sf; runner.l1_gs = l1_gs