From 5d975d00d92ca6867a457dbdfa997d615c08e4f2 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sat, 16 May 2026 06:09:22 +0000 Subject: [PATCH] feat: tqdm progress bar for expert weight loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces heartbeat prints with a clean tqdm bar: Loading Native NVFP4 Expert Weights: 50%|██████████░░| 480/960 --- vllm/patches/deepseek_v4.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py index 2e1b4e70..2bbe2db7 100644 --- a/vllm/patches/deepseek_v4.py +++ b/vllm/patches/deepseek_v4.py @@ -223,6 +223,7 @@ class DeepseekV4MegaMoEExperts(nn.Module): """ _cutedsl_runner: 'CuTeDSLMoERunner | None' = None _weight_load_count: int = 0 + _weight_load_tqdm: 'tqdm | None' = None # NVFP4 E2M1 lookup table (positive values, sign from bit 3) E2M1_LUT = [0.0, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 6.0] @@ -352,11 +353,15 @@ class DeepseekV4MegaMoEExperts(nn.Module): shard_id: str, expert_id: int, ) -> bool: - # Heartbeat: print every 256 weight loads so k8s/docker - # don't think the pod is dead during GPU upload + # Progress bar for k8s/docker liveness during GPU upload + if DeepseekV4MegaMoEExperts._weight_load_count == 0: + DeepseekV4MegaMoEExperts._weight_load_tqdm = tqdm( + total=self.num_local_experts * 20, # ~20 tensors per expert + desc=" Loading Native NVFP4 Expert Weights", + unit="tensor", + ) DeepseekV4MegaMoEExperts._weight_load_count += 1 - if DeepseekV4MegaMoEExperts._weight_load_count % 256 == 1: - print(f" Loading expert weights... ({DeepseekV4MegaMoEExperts._weight_load_count})", flush=True) + DeepseekV4MegaMoEExperts._weight_load_tqdm.update(1) local_expert_id = self._map_global_expert_id(expert_id) if local_expert_id == -1: