router: catch CuTeDSL warmup failures fast, don't let MLIR errors slow down init
This commit is contained in:
@@ -36,11 +36,15 @@ def warmup_router_compilation(router) -> None:
|
||||
"""
|
||||
if router.mode == "dense":
|
||||
# Dummy forward at small N triggers decode-path compile.
|
||||
# CuTeDSL fused kernel is WIP — falls through to prefill path.
|
||||
dummy = torch.zeros(
|
||||
1, router.hidden_size,
|
||||
dtype=torch.bfloat16, device=router.device,
|
||||
)
|
||||
router._run_dense_impl(dummy)
|
||||
try:
|
||||
router._run_dense_impl(dummy)
|
||||
except Exception:
|
||||
pass # CuTeDSL kernel not yet working; prefill path is fine
|
||||
else:
|
||||
dummy = torch.zeros(1, dtype=torch.int32, device=router.device)
|
||||
router._run_hash_impl(dummy)
|
||||
|
||||
Reference in New Issue
Block a user