Files
DeepGEMM/build_nvfp4.sh

24 lines
983 B
Bash

#!/bin/bash
# Build DeepGEMM NVFP4 mega_moe kernel inside the running vLLM container
# Usage: bash build_nvfp4.sh [CACHE_BUSTER_VALUE]
set -e
CACHE_BUSTER=${1:-1}
CONTAINER=nvidia-meeting-vllm-1
REPO_DIR=/root/DeepGEMM-nvfp4
echo "=== Cleaning old clone (CACHE_BUSTER=$CACHE_BUSTER) ==="
docker exec $CONTAINER bash -c "rm -rf $REPO_DIR"
echo "=== Cloning nvfp4-mega-moe branch ==="
docker exec $CONTAINER bash -c "git clone -b nvfp4-mega-moe https://sweetapi.com/biondizzle/DeepGEMM.git $REPO_DIR && CACHE_BUSTER=$CACHE_BUSTER"
echo "=== Building DeepGEMM ==="
docker exec $CONTAINER bash -c "export CPATH='/usr/local/lib/python3.12/dist-packages/flashinfer/data/cutlass/include:/usr/local/lib/python3.12/dist-packages/nvidia/cu13/include:\$CPATH' && cd $REPO_DIR && python3 setup.py build_ext --inplace 2>&1"
echo "=== Verifying ==="
docker exec $CONTAINER bash -c "cd $REPO_DIR && python3 -c 'import deep_gemm; print(\"DeepGEMM NVFP4 build OK\")' 2>&1"
echo "=== DONE ==="