From 5ea5b579c326cdeb2a16da99f910e25b0f0ff1c6 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Tue, 12 May 2026 07:24:36 +0000 Subject: [PATCH] Trim banner, no code changes --- patches/deepseek_v4.py | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/patches/deepseek_v4.py b/patches/deepseek_v4.py index d22acd0..b829199 100644 --- a/patches/deepseek_v4.py +++ b/patches/deepseek_v4.py @@ -9,28 +9,8 @@ import os as _os _git_commit = _os.popen("git -C /root/nvidia-meeting/deepseek-v4-quant rev-parse --short HEAD 2>/dev/null || echo 'unknown'").read().strip() print(f""" {'='*70} - DeepSeek V4 NVFP4 Patch - {'='*70} - Commit: {_git_commit} - Loaded: {_dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')} - Node: {_os.uname().nodename} - - Architecture: - wo_a → FP8 + DeepGEMM block scale (BMM einsum) - wq_b/wo_b → BF16 (UnquantizedLinearMethod) - fused_wqa → BF16 (stacked q_a + kv, dequantized from NVFP4) - compressor → BF16 (reconstructed from separate kv_proj+gate_proj) - shared_exp → FP8 (Fp8LinearMethod, DeepGEMM) - MoE experts → NVFP4 (FusedMoE, FLASHINFER_TRTLLM) — NOT converted - - Bugs fixed: - #1 DeepGEMM sf.dim() — block scale format (deepgemm_post_process) - #2 fused_skip_regex — q_b/o_a/o_b scales no longer skipped - #3 input_scale — removed from weight dequant (activations only) - #4 compressor indexer — sub_path for .indexer keys - #5 block scale dtype — must be float32, not float8_e4m3fn - #6 block scale values — torch.full(fp8_scale) not torch.ones - #7 UE8M0 block scale — REVERTED: checkpoint scales are standard NVFP4 UE4M3, not E8M0 + DeepSeek V4 NVFP4 Patch (commit {_git_commit}) + Loaded: {_dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')} {'='*70} """) # ==============================================================================