diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh new file mode 100644 index 0000000..dd47555 --- /dev/null +++ b/.github/scripts/build.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -eoxu pipefail + +# We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6 +# https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810 +# However this still fails so I am using a newer version of setuptools +pip install setuptools==68.0.0 +pip install ninja packaging wheel +export PATH=/usr/local/cuda/bin:/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH +export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH + +# Limit MAX_JOBS otherwise the github runner goes OOM +export MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "129" ] && echo 1 || echo 2) +export NVCC_THREADS=2 + +export TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 9.0+PTX" +export DG_USE_LOCAL_VERSION=${DG_USE_LOCAL_VERSION:-0} + +# 5h timeout since GH allows max 6h and we want some buffer +EXIT_CODE=0 +timeout 5h python setup.py bdist_wheel --dist-dir=dist || EXIT_CODE=$? + +if [ $EXIT_CODE -eq 0 ]; then +tmpname=cu${WHEEL_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi$CXX11_ABI +wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2") +ls dist/*whl |xargs -I {} mv {} dist/${wheel_name} +echo "wheel_name=${wheel_name}" >> $GITHUB_ENV +fi + +echo $EXIT_CODE \ No newline at end of file diff --git a/.github/scripts/check_for_ngc_images.sh b/.github/scripts/check_for_ngc_images.sh new file mode 100644 index 0000000..9e17601 --- /dev/null +++ b/.github/scripts/check_for_ngc_images.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# Configuration +BASE_IMAGE="nvcr.io/nvidia/pytorch" +TAG_SUFFIX="-py3" +MONTHS_TO_CHECK=7 # Check current month and previous 6 months (total 7) + +# Initialize an array to store existing tags +EXISTING_TAGS=() + +echo "Checking for existence of the last ${MONTHS_TO_CHECK} NGC PyTorch images: ${BASE_IMAGE}:YY.MM${TAG_SUFFIX}" +echo "---------------------------------------------------------------------" + +# Loop through the last N months +for i in $(seq 0 $((MONTHS_TO_CHECK - 1))); do + # Calculate Year and Month for the tag + CURRENT_YEAR=$(date +%Y) + CURRENT_MONTH=$(date +%m) + + # Calculate target month and year + TARGET_DATE=$(date -d "$CURRENT_YEAR-$CURRENT_MONTH-01 -$i months" +%y.%m) + + # Construct the full image tag and the tag-only string + IMAGE_TAG="${TARGET_DATE}${TAG_SUFFIX}" + FULL_IMAGE="${BASE_IMAGE}:${IMAGE_TAG}" + + echo "Checking: ${FULL_IMAGE}" + + # Use 'docker manifest inspect' to check for image existence without pulling. + if docker manifest inspect "${FULL_IMAGE}" > /dev/null 2>&1; then + echo "✅ EXISTS: Found." + # Add the tag-only string to the array + EXISTING_TAGS+=("nvcr.io/nvidia/pytorch:${IMAGE_TAG}") + else + echo "❌ MISSING: Not found." + fi +done + +echo "---------------------------------------------------------------------" + +## JSON Output Generation +# This uses the collected array to build a JSON string. + +# 1. Convert the shell array to a newline-separated string. +TAGS_NL_SEP=$(printf "%s\n" "${EXISTING_TAGS[@]}") + +# 2. Use jq to read the newline-separated list and format it into a JSON array. +# . | split("\n") | .[:-1] reads the input, splits it by newline, and removes the trailing empty element. +if command -v jq &> /dev/null; then + JSON_STRING=$(echo -e "${TAGS_NL_SEP}" | jq -R -s 'split("\n") | .[:-1]') + + echo "Generated JSON String of Existing Tags:" + echo "${JSON_STRING}" + + # Optional: Save the JSON string to a variable for further use + # echo "JSON_STRING is now available in the shell if you source this script." +else + echo "WARNING: 'jq' is not installed. Cannot format output as JSON." + echo "Found Tags: ${EXISTING_TAGS[*]}" +fi + +echo "---" +echo "Check complete." + +echo "${JSON_STRING}" > ngc_images.json \ No newline at end of file diff --git a/.github/scripts/test.sh b/.github/scripts/test.sh new file mode 100644 index 0000000..0211897 --- /dev/null +++ b/.github/scripts/test.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +set -exou pipefail + +pip install dist/*.whl +python -c "import deep_gemm; print(deep_gemm.__version__)" \ No newline at end of file diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml index cff8013..25596d6 100644 --- a/.github/workflows/_build.yml +++ b/.github/workflows/_build.yml @@ -53,6 +53,11 @@ jobs: ref: ${{ inputs.release-version }} submodules: recursive + - name: Checkout build scripts + uses: actions/checkout@v4 + with: + path: build-scripts/ + - name: Set up Python uses: actions/setup-python@v5 with: @@ -82,7 +87,7 @@ jobs: - name: Install CUDA ${{ inputs.cuda-version }} if: ${{ inputs.cuda-version != 'cpu' }} - uses: Jimver/cuda-toolkit@v0.2.26 + uses: Jimver/cuda-toolkit@v0.2.28 id: cuda-toolkit with: cuda: ${{ inputs.cuda-version }} @@ -109,8 +114,8 @@ jobs: # see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix # This code is ugly, maybe there's a better way to do this. export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \ - minv = {'2.4': 118, '2.5': 118, '2.6': 118, '2.7': 118, '2.8': 126}[env['MATRIX_TORCH_VERSION']]; \ - maxv = {'2.4': 124, '2.5': 124, '2.6': 126, '2.7': 128, '2.8': 129}[env['MATRIX_TORCH_VERSION']]; \ + minv = {'2.1': 121, '2.4': 118, '2.5': 118, '2.6': 118, '2.7': 118, '2.8': 126}[env['MATRIX_TORCH_VERSION']]; \ + maxv = {'2.1': 121, '2.4': 124, '2.5': 124, '2.6': 126, '2.7': 128, '2.8': 129}[env['MATRIX_TORCH_VERSION']]; \ print(minv if int(env['MATRIX_CUDA_VERSION']) < 120 else maxv)" \ ) if [[ ${{ inputs.torch-version }} == *"dev"* ]]; then @@ -156,39 +161,24 @@ jobs: - name: Build wheel id: build_wheel + env: + CXX11_ABI: ${{ inputs.cxx11_abi }} + MATRIX_TORCH_VERSION: ${{ env.MATRIX_TORCH_VERSION}} + WHEEL_CUDA_VERSION: ${{ env.WHEEL_CUDA_VERSION }} + MATRIX_PYTHON_VERSION: ${{ env.MATRIX_PYTHON_VERSION }} + DG_USE_LOCAL_VERSION: ${{ inputs.use-local-version && '1' || '0' }} run: | - # We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6 - # https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810 - # However this still fails so I'm using a newer version of setuptools - pip install setuptools==75.8.0 - pip install ninja packaging wheel - export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH - export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH - # Limit MAX_JOBS otherwise the github runner goes OOM - # nvcc 11.8 can compile with 2 jobs, but nvcc 12.3 goes OOM - - export MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "129" ] && echo 1 || echo 2) - export NVCC_THREADS=2 - export TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 9.0+PTX" - export DG_USE_LOCAL_VERSION=${{ inputs.use-local-version && '1' || '0' }} - - # 5h timeout since GH allows max 6h and we want some buffer - EXIT_CODE=0 - timeout 5h python setup.py bdist_wheel --dist-dir=dist || EXIT_CODE=$? - - if [ $EXIT_CODE -eq 0 ]; then - tmpname=cu${WHEEL_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ inputs.cxx11_abi }} - wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2") - ls dist/*whl |xargs -I {} mv {} dist/${wheel_name} - echo "wheel_name=${wheel_name}" >> $GITHUB_ENV - fi + EXIT_CODE=$(bash build-scripts/.github/scripts/build.sh | tail -n 1) # Store exit code in GitHub env for later steps echo "build_exit_code=$EXIT_CODE" | tee -a "$GITHUB_OUTPUT" - # Do not fail the job if timeout killed the build exit $EXIT_CODE + - name: Log Built Wheels + run: | + ls dist + - name: Log build logs after timeout if: always() && steps.build_wheel.outputs.build_exit_code == 124 run: | diff --git a/.github/workflows/_build_in_container.yml b/.github/workflows/_build_in_container.yml new file mode 100644 index 0000000..00cdb5b --- /dev/null +++ b/.github/workflows/_build_in_container.yml @@ -0,0 +1,139 @@ +name: ~Build wheel template + +on: + workflow_call: + inputs: + runs-on: + description: "The runner to use for the build" + required: true + type: string + container-image: + description: "Container image" + required: true + type: string + upload-to-release: + description: "Upload wheel to this release" + required: false + type: boolean + default: false + release-version: + description: "Upload wheel to this release" + required: false + type: string + +defaults: + run: + shell: bash -x -e -u -o pipefail {0} + +jobs: + build-wheel: + runs-on: ${{ inputs.runs-on }} + name: Build wheel (${{ inputs.container-image }}) + steps: + - name: Move /var/lib/docker/ + run: sudo mv /var/lib/docker/ "${GITHUB_WORKSPACE}/docker" + + - name: Maximize build space + uses: easimon/maximize-build-space@master + with: + root-reserve-mb: 5120 + temp-reserve-mb: 32 + swap-size-mb: 10240 + remove-dotnet: "true" + remove-android: "true" + remove-haskell: "true" + remove-codeql: "true" + build-mount-path: "/var/lib/docker/" + + - name: Restore /var/lib/docker/ + run: sudo sh -c "mv ${GITHUB_WORKSPACE}/docker/* /var/lib/docker" + + - name: Checkout source + uses: actions/checkout@v4 + with: + ref: ${{ inputs.release-version }} + submodules: recursive + + - name: Checkout build scripts + uses: actions/checkout@v4 + with: + path: build-scripts/ + + - name: Build + run: | + echo "Free space:" + df -h + + - name: Pull the container + run: docker pull ${{ inputs.container-image }} + + - name: Set CUDA and PyTorch versions + run: | + cat <<'EOF' >> script.sh + #!/bin/bash + + set -eoxu pipefail + + echo "MATRIX_CUDA_VERSION=$(echo $CUDA_VERSION | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV + echo "MATRIX_TORCH_VERSION=$NVIDIA_PYTORCH_VERSION" >> $GITHUB_ENV + echo "WHEEL_CUDA_VERSION=$(echo $CUDA_VERSION | awk -F \. {'print $1'})" >> $GITHUB_ENV + echo "MATRIX_PYTHON_VERSION=$(python -c "import sys; print('{}.{}'.format(sys.version_info[0], sys.version_info[1]))" | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV + echo "CXX11_ABI=$(python -c 'import torch; print(str(torch._C._GLIBCXX_USE_CXX11_ABI).upper())')" >> $GITHUB_ENV + + cat $GITHUB_ENV + EOF + + docker run \ + --rm \ + --shm-size=64g \ + --workdir /workspace \ + --volume $(pwd):/workspace \ + --volume $GITHUB_ENV:$GITHUB_ENV \ + -e GITHUB_ENV=$GITHUB_ENV \ + ${{ inputs.container-image }} bash /workspace/script.sh + + - name: Build wheel + id: build_wheel + env: + CXX11_ABI: ${{ env.CXX11_ABI }} + MATRIX_TORCH_VERSION: ${{ env.MATRIX_TORCH_VERSION}} + WHEEL_CUDA_VERSION: ${{ env.WHEEL_CUDA_VERSION }} + MATRIX_PYTHON_VERSION: ${{ env.MATRIX_PYTHON_VERSION }} + run: | + EXIT_CODE=$(docker run \ + --rm \ + --shm-size=64g \ + --workdir /workspace \ + --volume $(pwd):/workspace \ + --volume $GITHUB_ENV:$GITHUB_ENV \ + -e PIP_CONSTRAINT= \ + -e GITHUB_ENV=$GITHUB_ENV \ + -e CXX11_ABI=$CXX11_ABI \ + -e MATRIX_TORCH_VERSION=$MATRIX_TORCH_VERSION \ + -e WHEEL_CUDA_VERSION=$WHEEL_CUDA_VERSION \ + -e MATRIX_PYTHON_VERSION=$MATRIX_PYTHON_VERSION \ + ${{ inputs.container-image }} bash /workspace/build-scripts/.github/scripts/build.sh | tail -n 1) + + - name: Log Built Wheels + run: | + ls dist + + - name: Get Release with tag + id: get_current_release + uses: joutvhu/get-release@v1 + with: + tag_name: ${{ inputs.release-version }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Upload Release Asset + id: upload_release_asset + if: inputs.upload-to-release + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.get_current_release.outputs.upload_url }} + asset_path: ./dist/${{env.wheel_name}} + asset_name: ${{env.wheel_name}} + asset_content_type: application/* diff --git a/.github/workflows/build_in_container.yml b/.github/workflows/build_in_container.yml new file mode 100644 index 0000000..7056e14 --- /dev/null +++ b/.github/workflows/build_in_container.yml @@ -0,0 +1,34 @@ +name: Build wheels in a container + +on: + workflow_dispatch: + inputs: + runs-on: + description: "The runner to use for the build" + required: true + type: string + default: ubuntu-22.04 + container-image: + description: "Container image" + required: true + type: string + upload-to-release: + description: "Upload wheel to this release" + required: false + type: boolean + default: false + release-version: + description: "Upload wheel to this release" + required: false + type: string + + push: + +jobs: + build-wheels: + uses: ./.github/workflows/_build_in_container.yml + with: + runs-on: ${{ inputs.runs-on || 'ubuntu-22.04' }} + container-image: ${{ inputs.container-image || 'nvcr.io/nvidia/pytorch:25.06-py3' }} + upload-to-release: ${{ inputs.upload-to-release || false }} + release-version: ${{ inputs.release-version || 'v2.2.5' }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index a7b3e6b..9886ffe 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -41,9 +41,9 @@ jobs: # Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the # manylinux docker image, but I haven't figured out how to install CUDA on manylinux. os: [ubuntu-22.04] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] - torch-version: ["2.4.0", "2.5.1", "2.6.0", "2.7.1", "2.8.0"] - cuda-version: ["12.9.1"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + torch-version: ["2.5.1", "2.6.0", "2.7.1", "2.8.0"] + cuda-version: ["12.9.1", "13.0.0"] # We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not. # Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI. # Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs) @@ -51,9 +51,14 @@ jobs: cxx11_abi: ["FALSE", "TRUE"] exclude: # see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix - # Pytorch < 2.5 does not support Python 3.13 - torch-version: "2.4.0" python-version: "3.13" + include: + - os: "ubuntu-22.04" + cxx11_abi: "FALSE" + cuda-version: "12.1.0" + python-version: "3.8" + torch-version: "2.1.0" uses: ./.github/workflows/_build.yml with: runs-on: ${{ matrix.os }} @@ -64,6 +69,35 @@ jobs: release-version: ${{ needs.setup_release.outputs.release-version }} upload-to-release: true use-local-version: false + + check_for_ngc_images: + runs-on: ubuntu-latest + outputs: + images: ${{ steps.check_for_ngc_images.outputs.IMAGES }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Check for NGC PyTorch images + id: check_for_ngc_images + run: | + bash ./.github/scripts/check_for_ngc_images.sh + echo "IMAGES=$(cat ngc_images.json| jq -cr)" | tee -a $GITHUB_OUTPUT + + build_ngc_wheels: + name: Build Wheel for NGC PyTorch + needs: [setup_release, check_for_ngc_images] + strategy: + fail-fast: false + matrix: + os: [ubuntu-22.04] + container-image: ${{ fromJson(needs.check_for_ngc_images.outputs.images) }} + uses: ./.github/workflows/_build_in_container.yml + with: + runs-on: ${{ matrix.os }} + container-image: ${{ matrix.container-image }} + release-version: ${{ needs.setup_release.outputs.release-version }} + upload-to-release: true publish_package: name: Publish package diff --git a/setup.py b/setup.py index 01a72eb..38e891c 100644 --- a/setup.py +++ b/setup.py @@ -84,7 +84,11 @@ def get_platform(): def get_wheel_url(): torch_version = parse(torch.__version__) - torch_version = f'{torch_version.major}.{torch_version.minor}' + if os.environ.get("NVIDIA_PRODUCT_NAME", "") == "PyTorch": + torch_version = str(os.environ.get("NVIDIA_PYTORCH_VERSION")) + else: + torch_version = f'{torch_version.major}.{torch_version.minor}' + python_version = f'cp{sys.version_info.major}{sys.version_info.minor}' platform_name = get_platform() deep_gemm_version = get_package_version()