- Patch fixes iter_weights_for_calibration() for DeepseekV4Experts (ModuleList quantizers vs singular) - Run script uses official NVIDIA hf_ptq.py with FP8 source - Documents flags to avoid (--low_memory_mode, wrong arg names)
26 lines
996 B
Bash
Executable File
26 lines
996 B
Bash
Executable File
#!/bin/bash
|
|
# DeepSeek V4 Pro FP8 → NVFP4 via NVIDIA ModelOpt
|
|
# Run from: /root/nvidia-meeting/modelopt-repo/examples/llm_ptq
|
|
#
|
|
# Prerequisites:
|
|
# - modelopt 0.45.0+ from git: pip install "nvidia-modelopt[hf] @ git+https://github.com/NVIDIA/Model-Optimizer.git"
|
|
# - transformers 5.8.0.dev0: pip install git+https://github.com/huggingface/transformers.git
|
|
# - kernels: pip install -U kernels
|
|
# - Patch modelopt: cp patches/quant_module_patched.py <venv>/lib/python3.10/site-packages/modelopt/torch/quantization/nn/modules/quant_module.py
|
|
#
|
|
# Source weights: /root/nvidia-meeting/DeepSeek-V4-Pro-FP8
|
|
|
|
set -e
|
|
cd /root/nvidia-meeting/modelopt-repo/examples/llm_ptq
|
|
source /root/nvidia-meeting/venv/bin/activate
|
|
|
|
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
|
|
bash scripts/huggingface_example.sh \
|
|
--model /root/nvidia-meeting/DeepSeek-V4-Pro-FP8 \
|
|
--quant nvfp4 \
|
|
--tp 8 \
|
|
--calib 256 \
|
|
--kv_cache_quant fp8_cast \
|
|
--trust_remote_code \
|
|
--use_seq_device_map
|