diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py index 50785954..c5e05f9d 100644 --- a/vllm/patches/deepseek_v4.py +++ b/vllm/patches/deepseek_v4.py @@ -429,7 +429,7 @@ class DeepseekV4MegaMoEExperts(nn.Module): l2_fp4, l2_sf, l2_gs = [], [], [] from tqdm import tqdm - for e in tqdm(range(self.num_local_experts), desc=" NVFP4 experts", unit="exp"): + for e in tqdm(range(self.num_local_experts), desc=" uint8→NVFP4 experts", unit="exp"): # ── L1: gate + up (fused) ── gate_w = self.w13_weight.data[e, :self.intermediate_size] # (intermediate, hidden//2) uint8 up_w = self.w13_weight.data[e, self.intermediate_size:] # (intermediate, hidden//2) uint8 @@ -1623,7 +1623,7 @@ class DeepseekV4Model(nn.Module): _shard_index = self._build_shard_index("/model") if os.path.isdir("/model") else None from tqdm import tqdm - for layer_idx, layer in tqdm(enumerate(self.layers), total=len(self.layers), desc=" NVFP4 convert", unit="layer"): + for layer_idx, layer in tqdm(enumerate(self.layers), total=len(self.layers), desc=" NVFP4→FP8/BF16 convert", unit="layer"): attn = layer.attn # FP8 conversion: only wo_a