nvfp4_full: pass HF_TOKEN env var for gated calibration dataset

This commit is contained in:
2026-05-08 13:33:45 +00:00
parent 3d38e1d5cd
commit 36e1342270

View File

@@ -40,6 +40,9 @@ KV_CACHE_QUANT = "fp8_cast"
# 2.8TB RAM is enough for the 3TB model (with memory-mapped loading)
EXTRA_FLAGS = "--trust_remote_code --use_seq_device_map --gpu_max_mem_percentage 0.7"
# HF token for gated calibration datasets (nvidia/Nemotrons-Post-Training-Dataset-v2)
HF_TOKEN = "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"
# Output dir follows modelopt convention: <model>_<quant>_kv_<kv_quant>
# We override the model name to make the strategy clear
OUTPUT_NAME = f"DeepSeek-V4-Pro_NVFP4-{QUANT}_kv_{KV_CACHE_QUANT}"
@@ -50,6 +53,7 @@ LOG_FILE = f"/root/nvidia-meeting/modelopt_{QUANT}.log"
# ── Run ─────────────────────────────────────────────────────────────────────
cmd = f"""cd {SCRIPT_DIR} && \\
. /root/nvidia-meeting/venv/bin/activate && \\
HF_TOKEN={HF_TOKEN} \\
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \\
bash scripts/huggingface_example.sh \\
--model {MODEL} \\