ci: Fixes for pre-built wheels (#214)
* build: Allow NGC builds Signed-off-by: oliver könig <okoenig@nvidia.com> * reduce grid Signed-off-by: oliver könig <okoenig@nvidia.com> * update grid Signed-off-by: oliver könig <okoenig@nvidia.com> * fix Signed-off-by: oliver könig <okoenig@nvidia.com> * upgrade cuda action Signed-off-by: oliver könig <okoenig@nvidia.com> * remove test Signed-off-by: oliver könig <okoenig@nvidia.com> * py3.8 Signed-off-by: oliver könig <okoenig@nvidia.com> * fix Signed-off-by: oliver könig <okoenig@nvidia.com> * exclude Signed-off-by: oliver könig <okoenig@nvidia.com> * fix Signed-off-by: oliver könig <okoenig@nvidia.com> * torch-version Signed-off-by: oliver könig <okoenig@nvidia.com> * py3.8/torch2.1/cuda12.3 Signed-off-by: oliver könig <okoenig@nvidia.com> * Update publish.yml * fix grid Signed-off-by: oliver könig <okoenig@nvidia.com> * fix Signed-off-by: oliver könig <okoenig@nvidia.com> * cuda11.8 Signed-off-by: oliver könig <okoenig@nvidia.com> * no hopper for 118 Signed-off-by: oliver könig <okoenig@nvidia.com> * fix Signed-off-by: oliver könig <okoenig@nvidia.com> * fix Signed-off-by: oliver könig <okoenig@nvidia.com> --------- Signed-off-by: oliver könig <okoenig@nvidia.com>
This commit is contained in:
31
.github/scripts/build.sh
vendored
Normal file
31
.github/scripts/build.sh
vendored
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -eoxu pipefail
|
||||||
|
|
||||||
|
# We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6
|
||||||
|
# https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810
|
||||||
|
# However this still fails so I am using a newer version of setuptools
|
||||||
|
pip install setuptools==68.0.0
|
||||||
|
pip install ninja packaging wheel
|
||||||
|
export PATH=/usr/local/cuda/bin:/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
|
||||||
|
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
# Limit MAX_JOBS otherwise the github runner goes OOM
|
||||||
|
export MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "129" ] && echo 1 || echo 2)
|
||||||
|
export NVCC_THREADS=2
|
||||||
|
|
||||||
|
export TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 9.0+PTX"
|
||||||
|
export DG_USE_LOCAL_VERSION=${DG_USE_LOCAL_VERSION:-0}
|
||||||
|
|
||||||
|
# 5h timeout since GH allows max 6h and we want some buffer
|
||||||
|
EXIT_CODE=0
|
||||||
|
timeout 5h python setup.py bdist_wheel --dist-dir=dist || EXIT_CODE=$?
|
||||||
|
|
||||||
|
if [ $EXIT_CODE -eq 0 ]; then
|
||||||
|
tmpname=cu${WHEEL_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi$CXX11_ABI
|
||||||
|
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
|
||||||
|
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
|
||||||
|
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo $EXIT_CODE
|
||||||
65
.github/scripts/check_for_ngc_images.sh
vendored
Normal file
65
.github/scripts/check_for_ngc_images.sh
vendored
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
BASE_IMAGE="nvcr.io/nvidia/pytorch"
|
||||||
|
TAG_SUFFIX="-py3"
|
||||||
|
MONTHS_TO_CHECK=7 # Check current month and previous 6 months (total 7)
|
||||||
|
|
||||||
|
# Initialize an array to store existing tags
|
||||||
|
EXISTING_TAGS=()
|
||||||
|
|
||||||
|
echo "Checking for existence of the last ${MONTHS_TO_CHECK} NGC PyTorch images: ${BASE_IMAGE}:YY.MM${TAG_SUFFIX}"
|
||||||
|
echo "---------------------------------------------------------------------"
|
||||||
|
|
||||||
|
# Loop through the last N months
|
||||||
|
for i in $(seq 0 $((MONTHS_TO_CHECK - 1))); do
|
||||||
|
# Calculate Year and Month for the tag
|
||||||
|
CURRENT_YEAR=$(date +%Y)
|
||||||
|
CURRENT_MONTH=$(date +%m)
|
||||||
|
|
||||||
|
# Calculate target month and year
|
||||||
|
TARGET_DATE=$(date -d "$CURRENT_YEAR-$CURRENT_MONTH-01 -$i months" +%y.%m)
|
||||||
|
|
||||||
|
# Construct the full image tag and the tag-only string
|
||||||
|
IMAGE_TAG="${TARGET_DATE}${TAG_SUFFIX}"
|
||||||
|
FULL_IMAGE="${BASE_IMAGE}:${IMAGE_TAG}"
|
||||||
|
|
||||||
|
echo "Checking: ${FULL_IMAGE}"
|
||||||
|
|
||||||
|
# Use 'docker manifest inspect' to check for image existence without pulling.
|
||||||
|
if docker manifest inspect "${FULL_IMAGE}" > /dev/null 2>&1; then
|
||||||
|
echo "✅ EXISTS: Found."
|
||||||
|
# Add the tag-only string to the array
|
||||||
|
EXISTING_TAGS+=("nvcr.io/nvidia/pytorch:${IMAGE_TAG}")
|
||||||
|
else
|
||||||
|
echo "❌ MISSING: Not found."
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "---------------------------------------------------------------------"
|
||||||
|
|
||||||
|
## JSON Output Generation
|
||||||
|
# This uses the collected array to build a JSON string.
|
||||||
|
|
||||||
|
# 1. Convert the shell array to a newline-separated string.
|
||||||
|
TAGS_NL_SEP=$(printf "%s\n" "${EXISTING_TAGS[@]}")
|
||||||
|
|
||||||
|
# 2. Use jq to read the newline-separated list and format it into a JSON array.
|
||||||
|
# . | split("\n") | .[:-1] reads the input, splits it by newline, and removes the trailing empty element.
|
||||||
|
if command -v jq &> /dev/null; then
|
||||||
|
JSON_STRING=$(echo -e "${TAGS_NL_SEP}" | jq -R -s 'split("\n") | .[:-1]')
|
||||||
|
|
||||||
|
echo "Generated JSON String of Existing Tags:"
|
||||||
|
echo "${JSON_STRING}"
|
||||||
|
|
||||||
|
# Optional: Save the JSON string to a variable for further use
|
||||||
|
# echo "JSON_STRING is now available in the shell if you source this script."
|
||||||
|
else
|
||||||
|
echo "WARNING: 'jq' is not installed. Cannot format output as JSON."
|
||||||
|
echo "Found Tags: ${EXISTING_TAGS[*]}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "---"
|
||||||
|
echo "Check complete."
|
||||||
|
|
||||||
|
echo "${JSON_STRING}" > ngc_images.json
|
||||||
6
.github/scripts/test.sh
vendored
Normal file
6
.github/scripts/test.sh
vendored
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -exou pipefail
|
||||||
|
|
||||||
|
pip install dist/*.whl
|
||||||
|
python -c "import deep_gemm; print(deep_gemm.__version__)"
|
||||||
48
.github/workflows/_build.yml
vendored
48
.github/workflows/_build.yml
vendored
@@ -53,6 +53,11 @@ jobs:
|
|||||||
ref: ${{ inputs.release-version }}
|
ref: ${{ inputs.release-version }}
|
||||||
submodules: recursive
|
submodules: recursive
|
||||||
|
|
||||||
|
- name: Checkout build scripts
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
path: build-scripts/
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
@@ -82,7 +87,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Install CUDA ${{ inputs.cuda-version }}
|
- name: Install CUDA ${{ inputs.cuda-version }}
|
||||||
if: ${{ inputs.cuda-version != 'cpu' }}
|
if: ${{ inputs.cuda-version != 'cpu' }}
|
||||||
uses: Jimver/cuda-toolkit@v0.2.26
|
uses: Jimver/cuda-toolkit@v0.2.28
|
||||||
id: cuda-toolkit
|
id: cuda-toolkit
|
||||||
with:
|
with:
|
||||||
cuda: ${{ inputs.cuda-version }}
|
cuda: ${{ inputs.cuda-version }}
|
||||||
@@ -109,8 +114,8 @@ jobs:
|
|||||||
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
|
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
|
||||||
# This code is ugly, maybe there's a better way to do this.
|
# This code is ugly, maybe there's a better way to do this.
|
||||||
export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
|
export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
|
||||||
minv = {'2.4': 118, '2.5': 118, '2.6': 118, '2.7': 118, '2.8': 126}[env['MATRIX_TORCH_VERSION']]; \
|
minv = {'2.1': 121, '2.4': 118, '2.5': 118, '2.6': 118, '2.7': 118, '2.8': 126}[env['MATRIX_TORCH_VERSION']]; \
|
||||||
maxv = {'2.4': 124, '2.5': 124, '2.6': 126, '2.7': 128, '2.8': 129}[env['MATRIX_TORCH_VERSION']]; \
|
maxv = {'2.1': 121, '2.4': 124, '2.5': 124, '2.6': 126, '2.7': 128, '2.8': 129}[env['MATRIX_TORCH_VERSION']]; \
|
||||||
print(minv if int(env['MATRIX_CUDA_VERSION']) < 120 else maxv)" \
|
print(minv if int(env['MATRIX_CUDA_VERSION']) < 120 else maxv)" \
|
||||||
)
|
)
|
||||||
if [[ ${{ inputs.torch-version }} == *"dev"* ]]; then
|
if [[ ${{ inputs.torch-version }} == *"dev"* ]]; then
|
||||||
@@ -156,39 +161,24 @@ jobs:
|
|||||||
|
|
||||||
- name: Build wheel
|
- name: Build wheel
|
||||||
id: build_wheel
|
id: build_wheel
|
||||||
|
env:
|
||||||
|
CXX11_ABI: ${{ inputs.cxx11_abi }}
|
||||||
|
MATRIX_TORCH_VERSION: ${{ env.MATRIX_TORCH_VERSION}}
|
||||||
|
WHEEL_CUDA_VERSION: ${{ env.WHEEL_CUDA_VERSION }}
|
||||||
|
MATRIX_PYTHON_VERSION: ${{ env.MATRIX_PYTHON_VERSION }}
|
||||||
|
DG_USE_LOCAL_VERSION: ${{ inputs.use-local-version && '1' || '0' }}
|
||||||
run: |
|
run: |
|
||||||
# We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6
|
EXIT_CODE=$(bash build-scripts/.github/scripts/build.sh | tail -n 1)
|
||||||
# https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810
|
|
||||||
# However this still fails so I'm using a newer version of setuptools
|
|
||||||
pip install setuptools==75.8.0
|
|
||||||
pip install ninja packaging wheel
|
|
||||||
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
|
|
||||||
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
|
||||||
# Limit MAX_JOBS otherwise the github runner goes OOM
|
|
||||||
# nvcc 11.8 can compile with 2 jobs, but nvcc 12.3 goes OOM
|
|
||||||
|
|
||||||
export MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "129" ] && echo 1 || echo 2)
|
|
||||||
export NVCC_THREADS=2
|
|
||||||
export TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 9.0+PTX"
|
|
||||||
export DG_USE_LOCAL_VERSION=${{ inputs.use-local-version && '1' || '0' }}
|
|
||||||
|
|
||||||
# 5h timeout since GH allows max 6h and we want some buffer
|
|
||||||
EXIT_CODE=0
|
|
||||||
timeout 5h python setup.py bdist_wheel --dist-dir=dist || EXIT_CODE=$?
|
|
||||||
|
|
||||||
if [ $EXIT_CODE -eq 0 ]; then
|
|
||||||
tmpname=cu${WHEEL_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ inputs.cxx11_abi }}
|
|
||||||
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
|
|
||||||
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
|
|
||||||
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Store exit code in GitHub env for later steps
|
# Store exit code in GitHub env for later steps
|
||||||
echo "build_exit_code=$EXIT_CODE" | tee -a "$GITHUB_OUTPUT"
|
echo "build_exit_code=$EXIT_CODE" | tee -a "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
# Do not fail the job if timeout killed the build
|
|
||||||
exit $EXIT_CODE
|
exit $EXIT_CODE
|
||||||
|
|
||||||
|
- name: Log Built Wheels
|
||||||
|
run: |
|
||||||
|
ls dist
|
||||||
|
|
||||||
- name: Log build logs after timeout
|
- name: Log build logs after timeout
|
||||||
if: always() && steps.build_wheel.outputs.build_exit_code == 124
|
if: always() && steps.build_wheel.outputs.build_exit_code == 124
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
139
.github/workflows/_build_in_container.yml
vendored
Normal file
139
.github/workflows/_build_in_container.yml
vendored
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
name: ~Build wheel template
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_call:
|
||||||
|
inputs:
|
||||||
|
runs-on:
|
||||||
|
description: "The runner to use for the build"
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
container-image:
|
||||||
|
description: "Container image"
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
upload-to-release:
|
||||||
|
description: "Upload wheel to this release"
|
||||||
|
required: false
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
release-version:
|
||||||
|
description: "Upload wheel to this release"
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash -x -e -u -o pipefail {0}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-wheel:
|
||||||
|
runs-on: ${{ inputs.runs-on }}
|
||||||
|
name: Build wheel (${{ inputs.container-image }})
|
||||||
|
steps:
|
||||||
|
- name: Move /var/lib/docker/
|
||||||
|
run: sudo mv /var/lib/docker/ "${GITHUB_WORKSPACE}/docker"
|
||||||
|
|
||||||
|
- name: Maximize build space
|
||||||
|
uses: easimon/maximize-build-space@master
|
||||||
|
with:
|
||||||
|
root-reserve-mb: 5120
|
||||||
|
temp-reserve-mb: 32
|
||||||
|
swap-size-mb: 10240
|
||||||
|
remove-dotnet: "true"
|
||||||
|
remove-android: "true"
|
||||||
|
remove-haskell: "true"
|
||||||
|
remove-codeql: "true"
|
||||||
|
build-mount-path: "/var/lib/docker/"
|
||||||
|
|
||||||
|
- name: Restore /var/lib/docker/
|
||||||
|
run: sudo sh -c "mv ${GITHUB_WORKSPACE}/docker/* /var/lib/docker"
|
||||||
|
|
||||||
|
- name: Checkout source
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ inputs.release-version }}
|
||||||
|
submodules: recursive
|
||||||
|
|
||||||
|
- name: Checkout build scripts
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
path: build-scripts/
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
echo "Free space:"
|
||||||
|
df -h
|
||||||
|
|
||||||
|
- name: Pull the container
|
||||||
|
run: docker pull ${{ inputs.container-image }}
|
||||||
|
|
||||||
|
- name: Set CUDA and PyTorch versions
|
||||||
|
run: |
|
||||||
|
cat <<'EOF' >> script.sh
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -eoxu pipefail
|
||||||
|
|
||||||
|
echo "MATRIX_CUDA_VERSION=$(echo $CUDA_VERSION | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
|
||||||
|
echo "MATRIX_TORCH_VERSION=$NVIDIA_PYTORCH_VERSION" >> $GITHUB_ENV
|
||||||
|
echo "WHEEL_CUDA_VERSION=$(echo $CUDA_VERSION | awk -F \. {'print $1'})" >> $GITHUB_ENV
|
||||||
|
echo "MATRIX_PYTHON_VERSION=$(python -c "import sys; print('{}.{}'.format(sys.version_info[0], sys.version_info[1]))" | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
|
||||||
|
echo "CXX11_ABI=$(python -c 'import torch; print(str(torch._C._GLIBCXX_USE_CXX11_ABI).upper())')" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
cat $GITHUB_ENV
|
||||||
|
EOF
|
||||||
|
|
||||||
|
docker run \
|
||||||
|
--rm \
|
||||||
|
--shm-size=64g \
|
||||||
|
--workdir /workspace \
|
||||||
|
--volume $(pwd):/workspace \
|
||||||
|
--volume $GITHUB_ENV:$GITHUB_ENV \
|
||||||
|
-e GITHUB_ENV=$GITHUB_ENV \
|
||||||
|
${{ inputs.container-image }} bash /workspace/script.sh
|
||||||
|
|
||||||
|
- name: Build wheel
|
||||||
|
id: build_wheel
|
||||||
|
env:
|
||||||
|
CXX11_ABI: ${{ env.CXX11_ABI }}
|
||||||
|
MATRIX_TORCH_VERSION: ${{ env.MATRIX_TORCH_VERSION}}
|
||||||
|
WHEEL_CUDA_VERSION: ${{ env.WHEEL_CUDA_VERSION }}
|
||||||
|
MATRIX_PYTHON_VERSION: ${{ env.MATRIX_PYTHON_VERSION }}
|
||||||
|
run: |
|
||||||
|
EXIT_CODE=$(docker run \
|
||||||
|
--rm \
|
||||||
|
--shm-size=64g \
|
||||||
|
--workdir /workspace \
|
||||||
|
--volume $(pwd):/workspace \
|
||||||
|
--volume $GITHUB_ENV:$GITHUB_ENV \
|
||||||
|
-e PIP_CONSTRAINT= \
|
||||||
|
-e GITHUB_ENV=$GITHUB_ENV \
|
||||||
|
-e CXX11_ABI=$CXX11_ABI \
|
||||||
|
-e MATRIX_TORCH_VERSION=$MATRIX_TORCH_VERSION \
|
||||||
|
-e WHEEL_CUDA_VERSION=$WHEEL_CUDA_VERSION \
|
||||||
|
-e MATRIX_PYTHON_VERSION=$MATRIX_PYTHON_VERSION \
|
||||||
|
${{ inputs.container-image }} bash /workspace/build-scripts/.github/scripts/build.sh | tail -n 1)
|
||||||
|
|
||||||
|
- name: Log Built Wheels
|
||||||
|
run: |
|
||||||
|
ls dist
|
||||||
|
|
||||||
|
- name: Get Release with tag
|
||||||
|
id: get_current_release
|
||||||
|
uses: joutvhu/get-release@v1
|
||||||
|
with:
|
||||||
|
tag_name: ${{ inputs.release-version }}
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Upload Release Asset
|
||||||
|
id: upload_release_asset
|
||||||
|
if: inputs.upload-to-release
|
||||||
|
uses: actions/upload-release-asset@v1
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
with:
|
||||||
|
upload_url: ${{ steps.get_current_release.outputs.upload_url }}
|
||||||
|
asset_path: ./dist/${{env.wheel_name}}
|
||||||
|
asset_name: ${{env.wheel_name}}
|
||||||
|
asset_content_type: application/*
|
||||||
34
.github/workflows/build_in_container.yml
vendored
Normal file
34
.github/workflows/build_in_container.yml
vendored
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
name: Build wheels in a container
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
runs-on:
|
||||||
|
description: "The runner to use for the build"
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
default: ubuntu-22.04
|
||||||
|
container-image:
|
||||||
|
description: "Container image"
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
upload-to-release:
|
||||||
|
description: "Upload wheel to this release"
|
||||||
|
required: false
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
release-version:
|
||||||
|
description: "Upload wheel to this release"
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
|
||||||
|
push:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-wheels:
|
||||||
|
uses: ./.github/workflows/_build_in_container.yml
|
||||||
|
with:
|
||||||
|
runs-on: ${{ inputs.runs-on || 'ubuntu-22.04' }}
|
||||||
|
container-image: ${{ inputs.container-image || 'nvcr.io/nvidia/pytorch:25.06-py3' }}
|
||||||
|
upload-to-release: ${{ inputs.upload-to-release || false }}
|
||||||
|
release-version: ${{ inputs.release-version || 'v2.2.5' }}
|
||||||
42
.github/workflows/publish.yml
vendored
42
.github/workflows/publish.yml
vendored
@@ -41,9 +41,9 @@ jobs:
|
|||||||
# Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the
|
# Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the
|
||||||
# manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
|
# manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
|
||||||
os: [ubuntu-22.04]
|
os: [ubuntu-22.04]
|
||||||
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
|
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
||||||
torch-version: ["2.4.0", "2.5.1", "2.6.0", "2.7.1", "2.8.0"]
|
torch-version: ["2.5.1", "2.6.0", "2.7.1", "2.8.0"]
|
||||||
cuda-version: ["12.9.1"]
|
cuda-version: ["12.9.1", "13.0.0"]
|
||||||
# We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
|
# We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
|
||||||
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
|
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
|
||||||
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
|
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
|
||||||
@@ -51,9 +51,14 @@ jobs:
|
|||||||
cxx11_abi: ["FALSE", "TRUE"]
|
cxx11_abi: ["FALSE", "TRUE"]
|
||||||
exclude:
|
exclude:
|
||||||
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
|
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
|
||||||
# Pytorch < 2.5 does not support Python 3.13
|
|
||||||
- torch-version: "2.4.0"
|
- torch-version: "2.4.0"
|
||||||
python-version: "3.13"
|
python-version: "3.13"
|
||||||
|
include:
|
||||||
|
- os: "ubuntu-22.04"
|
||||||
|
cxx11_abi: "FALSE"
|
||||||
|
cuda-version: "12.1.0"
|
||||||
|
python-version: "3.8"
|
||||||
|
torch-version: "2.1.0"
|
||||||
uses: ./.github/workflows/_build.yml
|
uses: ./.github/workflows/_build.yml
|
||||||
with:
|
with:
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
@@ -65,6 +70,35 @@ jobs:
|
|||||||
upload-to-release: true
|
upload-to-release: true
|
||||||
use-local-version: false
|
use-local-version: false
|
||||||
|
|
||||||
|
check_for_ngc_images:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
images: ${{ steps.check_for_ngc_images.outputs.IMAGES }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Check for NGC PyTorch images
|
||||||
|
id: check_for_ngc_images
|
||||||
|
run: |
|
||||||
|
bash ./.github/scripts/check_for_ngc_images.sh
|
||||||
|
echo "IMAGES=$(cat ngc_images.json| jq -cr)" | tee -a $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
build_ngc_wheels:
|
||||||
|
name: Build Wheel for NGC PyTorch
|
||||||
|
needs: [setup_release, check_for_ngc_images]
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-22.04]
|
||||||
|
container-image: ${{ fromJson(needs.check_for_ngc_images.outputs.images) }}
|
||||||
|
uses: ./.github/workflows/_build_in_container.yml
|
||||||
|
with:
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
container-image: ${{ matrix.container-image }}
|
||||||
|
release-version: ${{ needs.setup_release.outputs.release-version }}
|
||||||
|
upload-to-release: true
|
||||||
|
|
||||||
publish_package:
|
publish_package:
|
||||||
name: Publish package
|
name: Publish package
|
||||||
needs: [build_wheels]
|
needs: [build_wheels]
|
||||||
|
|||||||
6
setup.py
6
setup.py
@@ -84,7 +84,11 @@ def get_platform():
|
|||||||
|
|
||||||
def get_wheel_url():
|
def get_wheel_url():
|
||||||
torch_version = parse(torch.__version__)
|
torch_version = parse(torch.__version__)
|
||||||
torch_version = f'{torch_version.major}.{torch_version.minor}'
|
if os.environ.get("NVIDIA_PRODUCT_NAME", "") == "PyTorch":
|
||||||
|
torch_version = str(os.environ.get("NVIDIA_PYTORCH_VERSION"))
|
||||||
|
else:
|
||||||
|
torch_version = f'{torch_version.major}.{torch_version.minor}'
|
||||||
|
|
||||||
python_version = f'cp{sys.version_info.major}{sys.version_info.minor}'
|
python_version = f'cp{sys.version_info.major}{sys.version_info.minor}'
|
||||||
platform_name = get_platform()
|
platform_name = get_platform()
|
||||||
deep_gemm_version = get_package_version()
|
deep_gemm_version = get_package_version()
|
||||||
|
|||||||
Reference in New Issue
Block a user