From b7acac5e4e1109b1b4083ab1b1422d332d1938cb Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sun, 17 May 2026 21:22:30 +0000 Subject: [PATCH] Call _ensure_stacked() before using runner buffers --- tests/test_pipeline_real_weights.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_pipeline_real_weights.py b/tests/test_pipeline_real_weights.py index 3ee66e87..71c2f76f 100644 --- a/tests/test_pipeline_real_weights.py +++ b/tests/test_pipeline_real_weights.py @@ -172,6 +172,7 @@ def main(): intermediate_size=INTERMEDIATE_SIZE, max_num_tokens=NUM_TOKENS, top_k=TOP_K, device=DEVICE, ) + runner._ensure_stacked() # Just use the runner's scale assembly l1_gsa = torch.full((NUM_EXPERTS,), l1_gs, dtype=torch.float32, device=DEVICE) l1_scale_a = runner._assemble_scales_cudagraph_safe(