From 5d975d00d92ca6867a457dbdfa997d615c08e4f2 Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Sat, 16 May 2026 06:09:22 +0000
Subject: [PATCH] feat: tqdm progress bar for expert weight loading
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces heartbeat prints with a clean tqdm bar:
  Loading Native NVFP4 Expert Weights: 50%|██████████░░| 480/960
---
 vllm/patches/deepseek_v4.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py
index 2e1b4e70..2bbe2db7 100644
--- a/vllm/patches/deepseek_v4.py
+++ b/vllm/patches/deepseek_v4.py
@@ -223,6 +223,7 @@ class DeepseekV4MegaMoEExperts(nn.Module):
     """
     _cutedsl_runner: 'CuTeDSLMoERunner | None' = None
     _weight_load_count: int = 0
+    _weight_load_tqdm: 'tqdm | None' = None
 
     # NVFP4 E2M1 lookup table (positive values, sign from bit 3)
     E2M1_LUT = [0.0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0]
@@ -352,11 +353,15 @@ class DeepseekV4MegaMoEExperts(nn.Module):
         shard_id: str,
         expert_id: int,
     ) -> bool:
-        # Heartbeat: print every 256 weight loads so k8s/docker
-        # don't think the pod is dead during GPU upload
+        # Progress bar for k8s/docker liveness during GPU upload
+        if DeepseekV4MegaMoEExperts._weight_load_count == 0:
+            DeepseekV4MegaMoEExperts._weight_load_tqdm = tqdm(
+                total=self.num_local_experts * 20,  # ~20 tensors per expert
+                desc="  Loading Native NVFP4 Expert Weights",
+                unit="tensor",
+            )
         DeepseekV4MegaMoEExperts._weight_load_count += 1
-        if DeepseekV4MegaMoEExperts._weight_load_count % 256 == 1:
-            print(f"  Loading expert weights... ({DeepseekV4MegaMoEExperts._weight_load_count})", flush=True)
+        DeepseekV4MegaMoEExperts._weight_load_tqdm.update(1)
 
         local_expert_id = self._map_global_expert_id(expert_id)
         if local_expert_id == -1: