#!/bin/bash # DeepSeek V4 Pro FP8 → NVFP4 via NVIDIA ModelOpt # Run from: /root/nvidia-meeting/modelopt-repo/examples/llm_ptq # # Prerequisites: # - modelopt 0.45.0+ from git: pip install "nvidia-modelopt[hf] @ git+https://github.com/NVIDIA/Model-Optimizer.git" # - transformers 5.8.0.dev0: pip install git+https://github.com/huggingface/transformers.git # - kernels: pip install -U kernels # - Patch modelopt: cp patches/quant_module_patched.py /lib/python3.10/site-packages/modelopt/torch/quantization/nn/modules/quant_module.py # # Source weights: /root/nvidia-meeting/DeepSeek-V4-Pro-FP8 set -e cd /root/nvidia-meeting/modelopt-repo/examples/llm_ptq source /root/nvidia-meeting/venv/bin/activate PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \ bash scripts/huggingface_example.sh \ --model /root/nvidia-meeting/DeepSeek-V4-Pro-FP8 \ --quant nvfp4 \ --tp 8 \ --calib 256 \ --kv_cache_quant fp8_cast \ --trust_remote_code \ --use_seq_device_map