diff --git a/build_nvfp4.sh b/build_nvfp4.sh new file mode 100644 index 0000000..50fb4ec --- /dev/null +++ b/build_nvfp4.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Build DeepGEMM NVFP4 mega_moe kernel inside the running vLLM container +# Usage: bash build_nvfp4.sh [CACHE_BUSTER_VALUE] + +set -e +CACHE_BUSTER=${1:-1} + +CONTAINER=nvidia-meeting-vllm-1 +REPO_DIR=/root/DeepGEMM-nvfp4 + +echo "=== Cleaning old clone (CACHE_BUSTER=$CACHE_BUSTER) ===" +docker exec $CONTAINER bash -c "rm -rf $REPO_DIR" + +echo "=== Cloning nvfp4-mega-moe branch ===" +docker exec $CONTAINER bash -c "git clone -b nvfp4-mega-moe https://sweetapi.com/biondizzle/DeepGEMM.git $REPO_DIR && CACHE_BUSTER=$CACHE_BUSTER" + +echo "=== Building DeepGEMM ===" +docker exec $CONTAINER bash -c "export CPATH='/usr/local/lib/python3.12/dist-packages/flashinfer/data/cutlass/include:/usr/local/lib/python3.12/dist-packages/nvidia/cu13/include:\$CPATH' && cd $REPO_DIR && python3 setup.py build_ext --inplace 2>&1" + +echo "=== Verifying ===" +docker exec $CONTAINER bash -c "cd $REPO_DIR && python3 -c 'import deep_gemm; print(\"DeepGEMM NVFP4 build OK\")' 2>&1" + +echo "=== DONE ==="