#!/bin/bash # Build DeepGEMM NVFP4 mega_moe kernel inside the running vLLM container # Usage: bash build_nvfp4.sh [CACHE_BUSTER_VALUE] set -e CACHE_BUSTER=${1:-1} CONTAINER=nvidia-meeting-vllm-1 REPO_DIR=/root/DeepGEMM-nvfp4 echo "=== Cleaning old clone (CACHE_BUSTER=$CACHE_BUSTER) ===" docker exec $CONTAINER bash -c "rm -rf $REPO_DIR" echo "=== Cloning nvfp4-mega-moe branch ===" docker exec $CONTAINER bash -c "git clone -b nvfp4-mega-moe https://sweetapi.com/biondizzle/DeepGEMM.git $REPO_DIR && CACHE_BUSTER=$CACHE_BUSTER" echo "=== Building DeepGEMM ===" docker exec $CONTAINER bash -c "export CPATH='/usr/local/lib/python3.12/dist-packages/flashinfer/data/cutlass/include:/usr/local/lib/python3.12/dist-packages/nvidia/cu13/include:\$CPATH' && cd $REPO_DIR && python3 setup.py build_ext --inplace 2>&1" echo "=== Verifying ===" docker exec $CONTAINER bash -c "cd $REPO_DIR && python3 -c 'import deep_gemm; print(\"DeepGEMM NVFP4 build OK\")' 2>&1" echo "=== DONE ==="