From be476b2ce2fa08dc9ef02b56c33bfe6c2edc60ca Mon Sep 17 00:00:00 2001 From: biondizzle Date: Mon, 1 Jun 2026 00:00:07 +0000 Subject: [PATCH] router: catch CuTeDSL warmup failures fast, don't let MLIR errors slow down init --- dsv4/ops/router.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dsv4/ops/router.py b/dsv4/ops/router.py index ba992535..8d0c38d3 100644 --- a/dsv4/ops/router.py +++ b/dsv4/ops/router.py @@ -36,11 +36,15 @@ def warmup_router_compilation(router) -> None: """ if router.mode == "dense": # Dummy forward at small N triggers decode-path compile. + # CuTeDSL fused kernel is WIP — falls through to prefill path. dummy = torch.zeros( 1, router.hidden_size, dtype=torch.bfloat16, device=router.device, ) - router._run_dense_impl(dummy) + try: + router._run_dense_impl(dummy) + except Exception: + pass # CuTeDSL kernel not yet working; prefill path is fine else: dummy = torch.zeros(1, dtype=torch.int32, device=router.device) router._run_hash_impl(dummy)