ci: Fixes for pre-built wheels (#214)
* build: Allow NGC builds Signed-off-by: oliver könig <okoenig@nvidia.com> * reduce grid Signed-off-by: oliver könig <okoenig@nvidia.com> * update grid Signed-off-by: oliver könig <okoenig@nvidia.com> * fix Signed-off-by: oliver könig <okoenig@nvidia.com> * upgrade cuda action Signed-off-by: oliver könig <okoenig@nvidia.com> * remove test Signed-off-by: oliver könig <okoenig@nvidia.com> * py3.8 Signed-off-by: oliver könig <okoenig@nvidia.com> * fix Signed-off-by: oliver könig <okoenig@nvidia.com> * exclude Signed-off-by: oliver könig <okoenig@nvidia.com> * fix Signed-off-by: oliver könig <okoenig@nvidia.com> * torch-version Signed-off-by: oliver könig <okoenig@nvidia.com> * py3.8/torch2.1/cuda12.3 Signed-off-by: oliver könig <okoenig@nvidia.com> * Update publish.yml * fix grid Signed-off-by: oliver könig <okoenig@nvidia.com> * fix Signed-off-by: oliver könig <okoenig@nvidia.com> * cuda11.8 Signed-off-by: oliver könig <okoenig@nvidia.com> * no hopper for 118 Signed-off-by: oliver könig <okoenig@nvidia.com> * fix Signed-off-by: oliver könig <okoenig@nvidia.com> * fix Signed-off-by: oliver könig <okoenig@nvidia.com> --------- Signed-off-by: oliver könig <okoenig@nvidia.com>
This commit is contained in:
31
.github/scripts/build.sh
vendored
Normal file
31
.github/scripts/build.sh
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eoxu pipefail
|
||||
|
||||
# We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6
|
||||
# https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810
|
||||
# However this still fails so I am using a newer version of setuptools
|
||||
pip install setuptools==68.0.0
|
||||
pip install ninja packaging wheel
|
||||
export PATH=/usr/local/cuda/bin:/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
|
||||
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
||||
|
||||
# Limit MAX_JOBS otherwise the github runner goes OOM
|
||||
export MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "129" ] && echo 1 || echo 2)
|
||||
export NVCC_THREADS=2
|
||||
|
||||
export TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 9.0+PTX"
|
||||
export DG_USE_LOCAL_VERSION=${DG_USE_LOCAL_VERSION:-0}
|
||||
|
||||
# 5h timeout since GH allows max 6h and we want some buffer
|
||||
EXIT_CODE=0
|
||||
timeout 5h python setup.py bdist_wheel --dist-dir=dist || EXIT_CODE=$?
|
||||
|
||||
if [ $EXIT_CODE -eq 0 ]; then
|
||||
tmpname=cu${WHEEL_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi$CXX11_ABI
|
||||
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
|
||||
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
|
||||
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
echo $EXIT_CODE
|
||||
65
.github/scripts/check_for_ngc_images.sh
vendored
Normal file
65
.github/scripts/check_for_ngc_images.sh
vendored
Normal file
@@ -0,0 +1,65 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Configuration
|
||||
BASE_IMAGE="nvcr.io/nvidia/pytorch"
|
||||
TAG_SUFFIX="-py3"
|
||||
MONTHS_TO_CHECK=7 # Check current month and previous 6 months (total 7)
|
||||
|
||||
# Initialize an array to store existing tags
|
||||
EXISTING_TAGS=()
|
||||
|
||||
echo "Checking for existence of the last ${MONTHS_TO_CHECK} NGC PyTorch images: ${BASE_IMAGE}:YY.MM${TAG_SUFFIX}"
|
||||
echo "---------------------------------------------------------------------"
|
||||
|
||||
# Loop through the last N months
|
||||
for i in $(seq 0 $((MONTHS_TO_CHECK - 1))); do
|
||||
# Calculate Year and Month for the tag
|
||||
CURRENT_YEAR=$(date +%Y)
|
||||
CURRENT_MONTH=$(date +%m)
|
||||
|
||||
# Calculate target month and year
|
||||
TARGET_DATE=$(date -d "$CURRENT_YEAR-$CURRENT_MONTH-01 -$i months" +%y.%m)
|
||||
|
||||
# Construct the full image tag and the tag-only string
|
||||
IMAGE_TAG="${TARGET_DATE}${TAG_SUFFIX}"
|
||||
FULL_IMAGE="${BASE_IMAGE}:${IMAGE_TAG}"
|
||||
|
||||
echo "Checking: ${FULL_IMAGE}"
|
||||
|
||||
# Use 'docker manifest inspect' to check for image existence without pulling.
|
||||
if docker manifest inspect "${FULL_IMAGE}" > /dev/null 2>&1; then
|
||||
echo "✅ EXISTS: Found."
|
||||
# Add the tag-only string to the array
|
||||
EXISTING_TAGS+=("nvcr.io/nvidia/pytorch:${IMAGE_TAG}")
|
||||
else
|
||||
echo "❌ MISSING: Not found."
|
||||
fi
|
||||
done
|
||||
|
||||
echo "---------------------------------------------------------------------"
|
||||
|
||||
## JSON Output Generation
|
||||
# This uses the collected array to build a JSON string.
|
||||
|
||||
# 1. Convert the shell array to a newline-separated string.
|
||||
TAGS_NL_SEP=$(printf "%s\n" "${EXISTING_TAGS[@]}")
|
||||
|
||||
# 2. Use jq to read the newline-separated list and format it into a JSON array.
|
||||
# . | split("\n") | .[:-1] reads the input, splits it by newline, and removes the trailing empty element.
|
||||
if command -v jq &> /dev/null; then
|
||||
JSON_STRING=$(echo -e "${TAGS_NL_SEP}" | jq -R -s 'split("\n") | .[:-1]')
|
||||
|
||||
echo "Generated JSON String of Existing Tags:"
|
||||
echo "${JSON_STRING}"
|
||||
|
||||
# Optional: Save the JSON string to a variable for further use
|
||||
# echo "JSON_STRING is now available in the shell if you source this script."
|
||||
else
|
||||
echo "WARNING: 'jq' is not installed. Cannot format output as JSON."
|
||||
echo "Found Tags: ${EXISTING_TAGS[*]}"
|
||||
fi
|
||||
|
||||
echo "---"
|
||||
echo "Check complete."
|
||||
|
||||
echo "${JSON_STRING}" > ngc_images.json
|
||||
6
.github/scripts/test.sh
vendored
Normal file
6
.github/scripts/test.sh
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -exou pipefail
|
||||
|
||||
pip install dist/*.whl
|
||||
python -c "import deep_gemm; print(deep_gemm.__version__)"
|
||||
Reference in New Issue
Block a user