Trim banner, no code changes

2026-05-12 07:24:36 +00:00
parent 74af9984f6
commit 5ea5b579c3
1 changed files with 2 additions and 22 deletions
--- a/patches/deepseek_v4.py
+++ b/patches/deepseek_v4.py
@@ -9,28 +9,8 @@ import os as _os
 _git_commit = _os.popen("git -C /root/nvidia-meeting/deepseek-v4-quant rev-parse --short HEAD 2>/dev/null || echo 'unknown'").read().strip()
 print(f"""
 {'='*70}
-  DeepSeek V4 NVFP4 Patch
-  {'='*70}
-  Commit:   {_git_commit}
-  Loaded:   {_dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}
-  Node:     {_os.uname().nodename}
-
-  Architecture:
-    wo_a        → FP8 + DeepGEMM block scale (BMM einsum)
-    wq_b/wo_b   → BF16 (UnquantizedLinearMethod)
-    fused_wqa   → BF16 (stacked q_a + kv, dequantized from NVFP4)
-    compressor  → BF16 (reconstructed from separate kv_proj+gate_proj)
-    shared_exp  → FP8 (Fp8LinearMethod, DeepGEMM)
-    MoE experts → NVFP4 (FusedMoE, FLASHINFER_TRTLLM) — NOT converted
-  
-  Bugs fixed:
-    #1 DeepGEMM sf.dim() — block scale format (deepgemm_post_process)
-    #2 fused_skip_regex — q_b/o_a/o_b scales no longer skipped
-    #3 input_scale — removed from weight dequant (activations only)
-    #4 compressor indexer — sub_path for .indexer keys
-    #5 block scale dtype — must be float32, not float8_e4m3fn
-    #6 block scale values — torch.full(fp8_scale) not torch.ones
-    #7 UE8M0 block scale — REVERTED: checkpoint scales are standard NVFP4 UE4M3, not E8M0
+  DeepSeek V4 NVFP4 Patch  (commit {_git_commit})
+  Loaded: {_dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}
 {'='*70}
 """)
 # ==============================================================================