From 5a98cc6d906f9e331fd604eb23de96ebf672d156 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sat, 6 Jun 2026 07:29:33 +0000 Subject: [PATCH] =?UTF-8?q?Store=20pre-cached=20norm=20weights=20on=20self?= =?UTF-8?q?=20to=20prevent=20GC=20during=20graph=20replay=20=E2=80=94=20ro?= =?UTF-8?q?ot=20cause=20of=20all-zeros=20replay=20bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- single_shot_inference.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/single_shot_inference.py b/single_shot_inference.py index 2cc733e8..efe9fe06 100644 --- a/single_shot_inference.py +++ b/single_shot_inference.py @@ -283,6 +283,11 @@ class CUDAGraphDecoder: if kvn is not None: kv_norm_dev[li] = kvn.to(dev, torch.float32) if kvn.device != torch.device(dev) or kvn.dtype != torch.float32 else kvn + self.attn_norm_dev = attn_norm_dev + self.ffn_norm_dev = ffn_norm_dev + self.q_norm_dev = q_norm_dev + self.kv_norm_dev = kv_norm_dev + # Verify all MoE/SE buffers are allocated (swizzled buffers must exist before capture) for li in range(self.n_layers): moe = moe_runners.get(li)