fix: update HF token, echo it at runtime, export both HF_TOKEN and HUGGING_FACE_HUB_TOKEN
This commit is contained in:
@@ -40,8 +40,8 @@ KV_CACHE_QUANT = "fp8_cast"
|
||||
# 2.8TB RAM is enough for the 3TB model (with memory-mapped loading)
|
||||
EXTRA_FLAGS = "--trust_remote_code --use_seq_device_map --gpu_max_mem_percentage 0.7"
|
||||
|
||||
# HF token for gated calibration datasets (nvidia/Nemotrons-Post-Training-Dataset-v2)
|
||||
HF_TOKEN = "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"
|
||||
# HF token for gated calibration datasets (nvidia/Nemotron-Post-Training-Dataset-v2)
|
||||
HF_TOKEN = "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"
|
||||
|
||||
# Output dir follows modelopt convention: <model>_<quant>_kv_<kv_quant>
|
||||
# We override the model name to make the strategy clear
|
||||
@@ -53,7 +53,9 @@ LOG_FILE = f"/root/nvidia-meeting/modelopt_{QUANT}.log"
|
||||
# ── Run ─────────────────────────────────────────────────────────────────────
|
||||
cmd = f"""cd {SCRIPT_DIR} && \\
|
||||
. /root/nvidia-meeting/venv/bin/activate && \\
|
||||
HF_TOKEN={HF_TOKEN} \\
|
||||
export HF_TOKEN={HF_TOKEN} && \\
|
||||
export HUGGING_FACE_HUB_TOKEN={HF_TOKEN} && \\
|
||||
echo "HF_TOKEN=$HF_TOKEN" && \\
|
||||
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \\
|
||||
bash scripts/huggingface_example.sh \\
|
||||
--model {MODEL} \\
|
||||
@@ -66,6 +68,7 @@ bash scripts/huggingface_example.sh \\
|
||||
print(f"Running: {QUANT} quantization on {MODEL}")
|
||||
print(f"Output: {OUTPUT_NAME}")
|
||||
print(f"Log: {LOG_FILE}")
|
||||
print(f"HF_TOKEN: {HF_TOKEN}")
|
||||
print(f"Command:\n{cmd}\n")
|
||||
|
||||
ret = subprocess.call(cmd, shell=True)
|
||||
|
||||
Reference in New Issue
Block a user