diff --git a/single_shot_inference.py b/single_shot_inference.py
index b5e3a9a7..bd23b8cd 100644
--- a/single_shot_inference.py
+++ b/single_shot_inference.py
@@ -1359,9 +1359,7 @@ def main():
         se.set_fused_swiglu(True)
         # EAGERLY process shared expert weights
         se._ensure_initialized()
-        # BF16 fallback for shared expert — dequantize NVFP4 weights to BF16
         se._use_runtime_gsa = True
-        se.enable_bf16_fallback()  # sets _fused_swiglu=False, pre-materializes BF16 weights
         se_runners[li] = se
         if (li+1) % 10 == 0: print(f"  Built {li+1}/{n_layers} MoE layers")
         torch.cuda.empty_cache()