Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into aws
Browse files Browse the repository at this point in the history
  • Loading branch information
ajschmidt8 committed Nov 28, 2023
2 parents eaf0099 + 3761dec commit 382574b
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
matrix: ${{ fromJSON(needs.compute-matrix.outputs.MATRIX) }}
fail-fast: false
secrets: inherit
uses: ./.github/workflows/build-image.yml
uses: ./.github/workflows/build-image.yaml
with:
ARCHES: ${{ toJSON(matrix.ARCHES) }}
CUDA_VER: ${{ matrix.CUDA_VER }}
Expand All @@ -50,7 +50,7 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- name: Login to DockerHub
uses: docker/login-action@v2
uses: docker/login-action@v3
with:
username: ${{ secrets.GPUCIBOT_DOCKERHUB_USER }}
password: ${{ secrets.GPUCIBOT_DOCKERHUB_TOKEN }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- name: Login to DockerHub
uses: docker/login-action@v2
uses: docker/login-action@v3
with:
username: ${{ secrets.GPUCIBOT_DOCKERHUB_USER }}
password: ${{ secrets.GPUCIBOT_DOCKERHUB_TOKEN }}
- name: Set up Docker Context for Buildx
run: |
docker context create builders
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
uses: docker/setup-buildx-action@v3
with:
driver: docker
endpoint: builders
Expand All @@ -58,7 +58,7 @@ jobs:
PYTHON_VER: ${{ inputs.PYTHON_VER }}
ARCH: ${{ matrix.ARCH }}
- name: Build image
uses: docker/build-push-action@v4
uses: docker/build-push-action@v5
with:
context: context
file: ${{ inputs.IMAGE_REPO }}.Dockerfile
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr.yml → .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ concurrency:

jobs:
build-images:
uses: ./.github/workflows/build-and-publish-images.yml
uses: ./.github/workflows/build-and-publish-images.yaml
with:
build_type: pull-request
secrets: inherit
2 changes: 1 addition & 1 deletion .github/workflows/push.yml → .github/workflows/push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ concurrency:

jobs:
build-images:
uses: ./.github/workflows/build-and-publish-images.yml
uses: ./.github/workflows/build-and-publish-images.yaml
with:
build_type: branch
secrets: inherit
68 changes: 48 additions & 20 deletions ci-conda.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ARG CUDA_VER=11.8.0
ARG LINUX_VER=ubuntu22.04
ARG PYTHON_VER=3.10
FROM rapidsai/mambaforge-cuda:cuda${CUDA_VER}-base-${LINUX_VER}-py${PYTHON_VER}
FROM rapidsai/miniforge-cuda:cuda${CUDA_VER}-base-${LINUX_VER}-py${PYTHON_VER}

ARG TARGETPLATFORM
ARG CUDA_VER
Expand All @@ -26,48 +26,76 @@ ENV SCCACHE_S3_NO_CREDENTIALS=false

# Install system packages depending on the LINUX_VER
RUN \
PKG_CUDA_VER="$(echo ${CUDA_VER} | cut -d '.' -f1,2 | tr '.' '-')"; \
case "${LINUX_VER}" in \
"ubuntu"*) \
apt-get update \
echo 'APT::Update::Error-Mode "any";' > /etc/apt/apt.conf.d/warnings-as-errors \
&& apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
cuda-gdb-${PKG_CUDA_VER} \
cuda-cudart-dev-${PKG_CUDA_VER} \
cuda-cupti-dev-${PKG_CUDA_VER} \
file \
unzip \
wget \
# ignore the build-essential package since it installs dependencies like gcc/g++
# we don't need them since we use conda compilers, so this keeps our images smaller
&& apt-get download cuda-nvcc-${PKG_CUDA_VER} \
&& dpkg -i --ignore-depends="build-essential" ./cuda-nvcc-*.deb \
&& rm ./cuda-nvcc-*.deb \
# apt will not work correctly if it thinks it needs the build-essential dependency
# so we patch it out with a sed command
&& sed -i 's/, build-essential//g' /var/lib/dpkg/status \
&& rm -rf "/var/lib/apt/lists/*"; \
;; \
"centos"* | "rockylinux"*) \
yum -y update \
&& yum -y install --setopt=install_weak_deps=False \
cuda-cudart-devel-${PKG_CUDA_VER} \
cuda-driver-devel-${PKG_CUDA_VER} \
cuda-gdb-${PKG_CUDA_VER} \
cuda-cupti-${PKG_CUDA_VER} \
file \
unzip \
wget \
which \
yum-utils \
&& rpm -Uvh --nodeps $(repoquery --location cuda-nvcc-${PKG_CUDA_VER}) \
&& yum clean all; \
;; \
*) \
echo "Unsupported LINUX_VER: ${LINUX_VER}" && exit 1; \
;; \
esac

# Install CUDA packages, only for CUDA 11 (CUDA 12+ should fetch from conda)
RUN \
case "${CUDA_VER}" in \
"11"*) \
PKG_CUDA_VER="$(echo ${CUDA_VER} | cut -d '.' -f1,2 | tr '.' '-')"; \
echo "Attempting to install CUDA Toolkit ${PKG_CUDA_VER}"; \
case "${LINUX_VER}" in \
"ubuntu"*) \
apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
cuda-gdb-${PKG_CUDA_VER} \
cuda-cudart-dev-${PKG_CUDA_VER} \
cuda-cupti-dev-${PKG_CUDA_VER} \
# ignore the build-essential package since it installs dependencies like gcc/g++
# we don't need them since we use conda compilers, so this keeps our images smaller
&& apt-get download cuda-nvcc-${PKG_CUDA_VER} \
&& dpkg -i --ignore-depends="build-essential" ./cuda-nvcc-*.deb \
&& rm ./cuda-nvcc-*.deb \
# apt will not work correctly if it thinks it needs the build-essential dependency
# so we patch it out with a sed command
&& sed -i 's/, build-essential//g' /var/lib/dpkg/status \
&& rm -rf "/var/lib/apt/lists/*"; \
;; \
"centos"* | "rockylinux"*) \
yum -y update \
&& yum -y install --setopt=install_weak_deps=False \
cuda-cudart-devel-${PKG_CUDA_VER} \
cuda-driver-devel-${PKG_CUDA_VER} \
cuda-gdb-${PKG_CUDA_VER} \
cuda-cupti-${PKG_CUDA_VER} \
&& rpm -Uvh --nodeps $(repoquery --location cuda-nvcc-${PKG_CUDA_VER}) \
&& yum clean all; \
;; \
*) \
echo "Unsupported LINUX_VER: ${LINUX_VER}" && exit 1; \
;; \
esac \
;; \
*) \
echo "Skipping CUDA Toolkit installation for CUDA ${CUDA_VER}"; \
;; \
esac

# Install gha-tools
RUN wget https://github.com/rapidsai/gha-tools/releases/latest/download/tools.tar.gz -O - \
| tar -xz -C /usr/local/bin
Expand Down Expand Up @@ -118,7 +146,7 @@ RUN /opt/conda/bin/git config --system --add safe.directory '*'
RUN pip install dunamai "rapids-dependency-file-generator==1.*" \
&& pip cache purge

COPY --from=mikefarah/yq:4.35.1 /usr/bin/yq /usr/local/bin/yq
COPY --from=mikefarah/yq:4.35.2 /usr/bin/yq /usr/local/bin/yq
COPY --from=amazon/aws-cli /usr/local/aws-cli/ /usr/local/aws-cli/
COPY --from=amazon/aws-cli /usr/local/bin/ /usr/local/bin/

Expand Down
71 changes: 65 additions & 6 deletions ci-wheel.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ ARG CUDA_VER=11.8.0
ARG LINUX_VER=ubuntu20.04
ARG REAL_ARCH=x86_64

ARG BASE_IMAGE=nvidia/cuda:${CUDA_VER}-devel-${LINUX_VER}
ARG BASE_IMAGE=nvcr.io/nvidia/cuda:${CUDA_VER}-devel-${LINUX_VER}
FROM ${BASE_IMAGE}

ARG CUDA_VER
Expand All @@ -20,20 +20,45 @@ ENV RAPIDS_CUDA_VERSION="${CUDA_VER}"
ENV RAPIDS_PY_VERSION="${PYTHON_VER}"

# RAPIDS pip index
ENV PIP_EXTRA_INDEX_URL="https://pypi.k8s.rapids.ai/simple"
ENV PIP_EXTRA_INDEX_URL="https://pypi.anaconda.org/rapidsai-wheels-nightly/simple"

ENV PYENV_ROOT="/pyenv"
ENV PATH="/pyenv/bin:/pyenv/shims:$PATH"

RUN case "${LINUX_VER}" in \
"ubuntu"*) \
apt update -y && apt install -y jq build-essential software-properties-common wget gcc zlib1g-dev libbz2-dev libssl-dev libreadline-dev libsqlite3-dev libffi-dev curl git libncurses5-dev libnuma-dev openssh-client libcudnn8-dev zip libopenblas-dev liblapack-dev protobuf-compiler autoconf automake libtool cmake && rm -rf /var/lib/apt/lists/* \
&& add-apt-repository ppa:git-core/ppa && add-apt-repository ppa:ubuntu-toolchain-r/test && apt update -y && apt install -y git gcc-9 g++-9 && add-apt-repository -r ppa:git-core/ppa && add-apt-repository -r ppa:ubuntu-toolchain-r/test \
echo 'APT::Update::Error-Mode "any";' > /etc/apt/apt.conf.d/warnings-as-errors \
&& apt update -y \
&& apt install -y \
debianutils build-essential software-properties-common \
jq wget gcc zlib1g-dev libbz2-dev \
libssl-dev libreadline-dev libsqlite3-dev libffi-dev curl git libncurses5-dev \
libnuma-dev openssh-client libcudnn8-dev zip libopenblas-dev liblapack-dev \
protobuf-compiler autoconf automake libtool cmake yasm libopenslide-dev \
&& add-apt-repository ppa:git-core/ppa \
&& add-apt-repository ppa:ubuntu-toolchain-r/test \
&& apt update -y \
&& apt install -y git gcc-9 g++-9 \
&& add-apt-repository -r ppa:git-core/ppa \
&& add-apt-repository -r ppa:ubuntu-toolchain-r/test \
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 90 --slave /usr/bin/g++ g++ /usr/bin/g++-9 --slave /usr/bin/gcov gcov /usr/bin/gcov-9 \
&& rm -rf /var/lib/apt/lists/* \
;; \
"centos"*) \
yum update --exclude=libnccl* -y && yum install -y epel-release wget gcc zlib-devel bzip2 bzip2-devel readline-devel sqlite sqlite-devel xz xz-devel libffi-devel curl git ncurses-devel numactl numactl-devel openssh-clients libcudnn8-devel zip blas-devel lapack-devel protobuf-compiler autoconf automake libtool centos-release-scl scl-utils cmake && yum clean all \
&& yum remove -y git && yum install -y https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm && yum install -y git jq devtoolset-11 && yum remove -y endpoint-repo \
yum update --exclude=libnccl* -y \
&& yum install -y epel-release\
&& yum update --exclude=libnccl* -y \
&& yum install -y \
which wget gcc zlib-devel bzip2 bzip2-devel readline-devel sqlite \
sqlite-devel xz xz-devel libffi-devel curl git ncurses-devel numactl \
numactl-devel openssh-clients libcudnn8-devel zip blas-devel lapack-devel \
protobuf-compiler autoconf automake libtool centos-release-scl scl-utils cmake \
yasm openslide-devel \
&& yum remove -y git \
&& yum install -y https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm \
&& yum install -y git jq devtoolset-11 \
&& yum remove -y endpoint-repo \
&& yum clean all \
&& echo -e ' \
#!/bin/bash\n \
source scl_source enable devtoolset-11\n \
Expand All @@ -47,6 +72,33 @@ RUN case "${LINUX_VER}" in \
&& make install \
&& popd \
;; \
"rockylinux"*) \
dnf update -y \
&& dnf install -y epel-release \
&& dnf update -y \
&& dnf install -y \
which wget gcc zlib-devel bzip2 bzip2-devel readline-devel sqlite \
sqlite-devel xz xz-devel libffi-devel curl git ncurses-devel numactl \
numactl-devel openssh-clients libcudnn8-devel zip jq openslide-devel \
protobuf-compiler autoconf automake libtool dnf-plugins-core cmake \
&& dnf config-manager --set-enabled powertools \
&& dnf install -y blas-devel lapack-devel \
&& dnf -y install gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ \
&& dnf -y install yasm \
&& dnf clean all \
&& echo -e ' \
#!/bin/bash\n \
source /opt/rh/gcc-toolset-11/enable \
' > /etc/profile.d/enable_devtools.sh \
&& pushd tmp \
&& wget https://ftp.openssl.org/source/openssl-1.1.1k.tar.gz \
&& tar -xzvf openssl-1.1.1k.tar.gz \
&& cd openssl-1.1.1k \
&& ./config --prefix=/usr --openssldir=/etc/ssl --libdir=lib no-shared zlib-dynamic \
&& make \
&& make install \
&& popd \
;; \
*) \
echo "Unsupported LINUX_VER: ${LINUX_VER}" && exit 1; \
;; \
Expand Down Expand Up @@ -120,6 +172,9 @@ RUN case "${LINUX_VER}" in \
# Need to specify the openssl location because of the install from source
CPPFLAGS="-I/usr/include/openssl" LDFLAGS="-L/usr/lib" pyenv install --verbose "${RAPIDS_PY_VERSION}" \
;; \
"rockylinux"*) \
CPPFLAGS="-I/usr/include/openssl" LDFLAGS="-L/usr/lib" pyenv install --verbose "${RAPIDS_PY_VERSION}" \
;; \
*) \
echo "Unsupported LINUX_VER: ${LINUX_VER}" && exit 1; \
;; \
Expand All @@ -130,6 +185,10 @@ RUN pyenv global ${PYTHON_VER} && python -m pip install auditwheel patchelf twin
# Install latest gha-tools
RUN wget https://github.com/rapidsai/gha-tools/releases/latest/download/tools.tar.gz -O - | tar -xz -C /usr/local/bin

# Install anaconda-client
RUN pip install git+https://github.com/Anaconda-Platform/anaconda-client && \
pip cache purge

# Install the AWS CLI
COPY --from=amazon/aws-cli /usr/local/aws-cli/ /usr/local/aws-cli/
COPY --from=amazon/aws-cli /usr/local/bin/ /usr/local/bin/
Expand Down
4 changes: 4 additions & 0 deletions ci/compute-build-args.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ if [[
"${LINUX_VER}" == "ubuntu20.04"
]]; then
MANYLINUX_VER="manylinux_2_31"
elif [[
"${LINUX_VER}" == "rockylinux8"
]]; then
MANYLINUX_VER="manylinux_2_28"
fi

ARGS=(
Expand Down
32 changes: 19 additions & 13 deletions citestwheel.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ARG CUDA_VER=11.8.0
ARG LINUX_VER=ubuntu18.04

ARG BASE_IMAGE=nvidia/cuda:${CUDA_VER}-devel-${LINUX_VER}
ARG BASE_IMAGE=nvcr.io/nvidia/cuda:${CUDA_VER}-devel-${LINUX_VER}
FROM ${BASE_IMAGE}

ARG CUDA_VER
Expand All @@ -12,21 +12,30 @@ ENV RAPIDS_CUDA_VERSION="${CUDA_VER}"
ENV RAPIDS_PY_VERSION="${PYTHON_VER}"

# RAPIDS pip index
ENV PIP_EXTRA_INDEX_URL="https://pypi.k8s.rapids.ai/simple"
ENV PIP_EXTRA_INDEX_URL="https://pypi.anaconda.org/rapidsai-wheels-nightly/simple"

ARG DEBIAN_FRONTEND=noninteractive

ENV PYENV_ROOT="/pyenv"
ENV PATH="/pyenv/bin:/pyenv/shims:$PATH"

RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
wget curl git jq ssh \
make build-essential libssl-dev zlib1g-dev \
libbz2-dev libreadline-dev libsqlite3-dev wget \
curl llvm libncursesw5-dev xz-utils tk-dev unzip \
libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev
RUN <<EOF
set -e
echo 'APT::Update::Error-Mode "any";' > /etc/apt/apt.conf.d/warnings-as-errors
apt-get update
apt-get install -y software-properties-common
# update git > 2.17
add-apt-repository ppa:git-core/ppa -y
apt-get update
apt-get upgrade -y
apt-get install -y --no-install-recommends \
wget curl git jq ssh \
make build-essential libssl-dev zlib1g-dev \
libbz2-dev libreadline-dev libsqlite3-dev wget \
curl llvm libncursesw5-dev xz-utils tk-dev unzip \
libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev
rm -rf /var/cache/apt/archives /var/lib/apt/lists/*
EOF

# Install pyenv
RUN curl https://pyenv.run | bash
Expand All @@ -44,9 +53,6 @@ ENV PATH="/pyenv/versions/${PYTHON_VER}/bin/:$PATH"
COPY --from=amazon/aws-cli /usr/local/aws-cli/ /usr/local/aws-cli/
COPY --from=amazon/aws-cli /usr/local/bin/ /usr/local/bin/

# update git > 2.17
RUN grep '18.04' /etc/issue && bash -c "apt-get install -y software-properties-common && add-apt-repository ppa:git-core/ppa -y && apt-get update && apt-get install --upgrade -y git" || true;

# Install latest gha-tools
RUN wget https://github.com/rapidsai/gha-tools/releases/latest/download/tools.tar.gz -O - \
| tar -xz -C /usr/local/bin
Expand Down
4 changes: 1 addition & 3 deletions matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,9 @@ exclude:
- CUDA_VER: "12.1.1"
IMAGE_REPO: "ci-wheel"

# exclude citestwheel and ci-wheel for rockylinux8
# exclude citestwheel for rockylinux8
- LINUX_VER: "rockylinux8"
IMAGE_REPO: "citestwheel"
- LINUX_VER: "rockylinux8"
IMAGE_REPO: "ci-wheel"

# exclude citestwheel and ci-wheel for ubuntu22.04
- LINUX_VER: "ubuntu22.04"
Expand Down

0 comments on commit 382574b

Please sign in to comment.