Skip to content

Commit

Permalink
Merge branch 'rocm-main' into ci_rnn_final
Browse files Browse the repository at this point in the history
  • Loading branch information
Ruturaj4 authored Dec 12, 2024
2 parents 3f8ed59 + 6dc4dee commit dfa9903
Show file tree
Hide file tree
Showing 475 changed files with 30,885 additions and 12,548 deletions.
9 changes: 8 additions & 1 deletion .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ build:mkl_open_source_only --define=tensorflow_mkldnn_contraction_kernel=1
build:clang --copt=-Wno-gnu-offsetof-extensions
# Disable clang extention that rejects unknown arguments.
build:clang --copt=-Qunused-arguments
# Error on struct/class mismatches, since this causes link failures on Windows.
build:clang --copt=-Werror=mismatched-tags

# Configs for CUDA
build:cuda --repo_env TF_NEED_CUDA=1
Expand Down Expand Up @@ -183,6 +185,7 @@ build:macos_cache_push --config=macos_cache --remote_upload_local_results=true -
build:ci_linux_x86_64 --config=avx_linux --config=avx_posix
build:ci_linux_x86_64 --config=mkl_open_source_only
build:ci_linux_x86_64 --config=clang --verbose_failures=true
build:ci_linux_x86_64 --color=yes

# TODO(b/356695103): We do not have a CPU only toolchain so we use the CUDA
# toolchain for both CPU and GPU builds.
Expand All @@ -203,6 +206,7 @@ build:ci_linux_x86_64_cuda --config=ci_linux_x86_64
# Linux Aarch64 CI configs
build:ci_linux_aarch64_base --config=clang --verbose_failures=true
build:ci_linux_aarch64_base --action_env=TF_SYSROOT="/dt10"
build:ci_linux_aarch64_base --color=yes

build:ci_linux_aarch64 --config=ci_linux_aarch64_base
build:ci_linux_aarch64 --host_crosstool_top="@ml2014_clang_aarch64_config_aarch64//crosstool:toolchain"
Expand All @@ -221,18 +225,21 @@ build:ci_linux_aarch64_cuda --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm
build:ci_darwin_x86_64 --macos_minimum_os=10.14
build:ci_darwin_x86_64 --config=macos_cache_push
build:ci_darwin_x86_64 --verbose_failures=true
build:ci_darwin_x86_64 --color=yes

# Mac Arm64 CI configs
build:ci_darwin_arm64 --macos_minimum_os=11.0
build:ci_darwin_arm64 --config=macos_cache_push
build:ci_darwin_arm64 --verbose_failures=true
build:ci_darwin_arm64 --color=yes

# Windows x86 CI configs
build:ci_windows_amd64 --config=avx_windows
build:ci_windows_amd64 --compiler=clang-cl --config=clang --verbose_failures=true
build:ci_windows_amd64 --crosstool_top="@xla//tools/toolchains/win/20240424:toolchain"
build:ci_windows_amd64 --extra_toolchains="@xla//tools/toolchains/win/20240424:cc-toolchain-x64_windows-clang-cl"
build:ci_windows_amd64 --host_linkopt=/FORCE:MULTIPLE --linkopt=/FORCE:MULTIPLE
build:ci_windows_amd64 --color=yes

# #############################################################################
# RBE config options below. These inherit the CI configs above and set the
Expand Down Expand Up @@ -379,4 +386,4 @@ build:debug --config debug_symbols -c fastbuild
try-import %workspace%/.jax_configure.bazelrc

# Load rc file with user-specific options.
try-import %workspace%/.bazelrc.user
try-import %workspace%/.bazelrc.user
22 changes: 12 additions & 10 deletions .github/workflows/asan.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ on:
branches:
- main
paths:
- '**/workflows/asan.yml'
- '**/workflows/asan.yaml'

jobs:
asan:
Expand All @@ -25,14 +25,8 @@ jobs:
run:
shell: bash -l {0}
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
with:
path: jax
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
with:
repository: python/cpython
path: cpython
ref: v3.13.0
# Install git before actions/checkout as otherwise it will download the code with the GitHub
# REST API and therefore any subsequent git commands will fail.
- name: Install clang 18
env:
DEBIAN_FRONTEND: noninteractive
Expand All @@ -42,6 +36,14 @@ jobs:
zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl git \
libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev \
libffi-dev liblzma-dev
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
path: jax
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: python/cpython
path: cpython
ref: v3.13.0
- name: Build CPython with ASAN enabled
env:
ASAN_OPTIONS: detect_leaks=0
Expand All @@ -65,7 +67,7 @@ jobs:
run: |
source ${GITHUB_WORKSPACE}/venv/bin/activate
cd jax
python build/build.py \
python build/build.py build --wheels=jaxlib --verbose \
--bazel_options=--color=yes \
--bazel_options=--copt=-fsanitize=address \
--clang_path=/usr/bin/clang-18
Expand Down
41 changes: 41 additions & 0 deletions .github/workflows/bazel_cpu_rbe.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: CI - Bazel CPU tests (RBE)

on:
workflow_dispatch:
inputs:
halt-for-connection:
description: 'Should this workflow run wait for a remote connection?'
type: choice
required: true
default: 'no'
options:
- 'yes'
- 'no'

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: true

jobs:
run_tests:
if: github.event.repository.fork == false
strategy:
matrix:
runner: ["linux-x86-n2-16", "linux-arm64-t2a-16"]

runs-on: ${{ matrix.runner }}
# TODO(b/369382309): Replace Linux Arm64 container with the ml-build container once it is available
container: ${{ (contains(matrix.runner, 'linux-x86') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest') ||
(contains(matrix.runner, 'linux-arm64') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/linux-arm64-arc-container:latest') }}

env:
JAXCI_HERMETIC_PYTHON_VERSION: "3.12"

steps:
- uses: actions/checkout@v3
- name: Wait For Connection
uses: google-ml-infra/actions/ci_connection@main
with:
halt-dispatch-input: ${{ inputs.halt-for-connection }}
- name: Run Bazel CPU Tests with RBE
run: ./ci/run_bazel_test_cpu_rbe.sh
39 changes: 39 additions & 0 deletions .github/workflows/bazel_gpu_rbe.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: CI - Bazel GPU tests (RBE)

on:
workflow_dispatch:
inputs:
halt-for-connection:
description: 'Should this workflow run wait for a remote connection?'
type: choice
required: true
default: 'no'
options:
- 'yes'
- 'no'

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: true

jobs:
run_tests:
if: github.event.repository.fork == false
strategy:
matrix:
runner: ["linux-x86-n2-16"]

runs-on: ${{ matrix.runner }}
container: 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest'

env:
JAXCI_HERMETIC_PYTHON_VERSION: "3.12"

steps:
- uses: actions/checkout@v3
- name: Wait For Connection
uses: google-ml-infra/actions/ci_connection@main
with:
halt-dispatch-input: ${{ inputs.halt-for-connection }}
- name: Run Bazel GPU Tests with RBE
run: ./ci/run_bazel_test_gpu_rbe.sh
78 changes: 40 additions & 38 deletions .github/workflows/ci-build.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: CI
name: ROCm CPU CI

# We test all supported Python versions as follows:
# - 3.10 : Documentation build
Expand All @@ -11,10 +11,10 @@ on:
# but only for the main branch
push:
branches:
- main
- rocm-main
pull_request:
branches:
- main
- rocm-main

permissions:
contents: read # to fetch code
Expand All @@ -29,18 +29,21 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Python 3.11
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: 3.11
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
- run: python -m pip install pre-commit
- uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: ~/.cache/pre-commit
key: pre-commit-${{ env.pythonLocation }}-${{ hashFiles('.pre-commit-config.yaml', 'setup.py') }}
- run: pre-commit run --show-diff-on-failure --color=always --all-files

build:
name: "build ${{ matrix.name-prefix }} (py ${{ matrix.python-version }} on ubuntu-20.04, x64=${{ matrix.enable-x64}})"
runs-on: linux-x86-n2-32
container:
image: index.docker.io/library/ubuntu@sha256:6d8d9799fe6ab3221965efac00b4c34a2bcc102c086a58dff9e19a08b913c7ef # ratchet:ubuntu:20.04
runs-on: ROCM-Ubuntu
timeout-minutes: 60
strategy:
matrix:
Expand All @@ -57,13 +60,9 @@ jobs:
prng-upgrade: 0
num_generated_cases: 1
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Image Setup
run: |
apt update
apt install -y libssl-dev
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ matrix.python-version }}
- name: Get pip cache dir
Expand All @@ -72,7 +71,7 @@ jobs:
python -m pip install --upgrade pip wheel
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
- name: pip cache
uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: ${{ runner.os }}-py${{ matrix.python-version }}-pip-${{ hashFiles('**/setup.py', '**/requirements.txt', '**/test-requirements.txt') }}
Expand Down Expand Up @@ -102,15 +101,15 @@ jobs:
documentation:
name: Documentation - test code snippets
runs-on: ubuntu-latest
runs-on: ROCM-Ubuntu
timeout-minutes: 10
strategy:
matrix:
python-version: ['3.10']
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ matrix.python-version }}
- name: Get pip cache dir
Expand All @@ -119,7 +118,7 @@ jobs:
python -m pip install --upgrade pip wheel
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
- name: pip cache
uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: ${{ runner.os }}-pip-docs-${{ hashFiles('**/setup.py', '**/requirements.txt', '**/test-requirements.txt') }}
Expand All @@ -140,14 +139,14 @@ jobs:
documentation_render:
name: Documentation - render documentation
runs-on: ubuntu-latest
timeout-minutes: 10
timeout-minutes: 20
strategy:
matrix:
python-version: ['3.10']
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ matrix.python-version }}
- name: Get pip cache dir
Expand All @@ -156,7 +155,7 @@ jobs:
python -m pip install --upgrade pip wheel
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
- name: pip cache
uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: ${{ runner.os }}-pip-docs-${{ hashFiles('**/setup.py', '**/requirements.txt', '**/test-requirements.txt') }}
Expand All @@ -165,8 +164,7 @@ jobs:
pip install -r docs/requirements.txt
- name: Render documentation
run: |
sphinx-build --color -W --keep-going -b html -D nb_execution_mode=off docs docs/build/html
sphinx-build -j auto --color -W --keep-going -b html -D nb_execution_mode=off docs docs/build/html
jax2tf_test:
name: "jax2tf_test (py ${{ matrix.python-version }} on ${{ matrix.os }}, x64=${{ matrix.enable-x64}})"
Expand All @@ -181,9 +179,9 @@ jobs:
enable-x64: 0
num_generated_cases: 10
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{ matrix.python-version }}
- name: Get pip cache dir
Expand All @@ -192,7 +190,7 @@ jobs:
python -m pip install --upgrade pip wheel
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
- name: pip cache
uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: ${{ runner.os }}-py${{ matrix.python-version }}-pip-${{ hashFiles('**/setup.py', '**/requirements.txt', '**/test-requirements.txt') }}
Expand All @@ -217,21 +215,21 @@ jobs:
ffi:
name: FFI example
runs-on: ubuntu-latest
timeout-minutes: 5
runs-on: ROCM-Ubuntu
timeout-minutes: 30
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python 3.11
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: 3.11
python-version: 3.12
- name: Get pip cache dir
id: pip-cache
run: |
python -m pip install --upgrade pip wheel
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
- name: pip cache
uses: actions/cache@3624ceb22c1c5a301c8db4169662070a689d9ea8 # v4.1.1
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: ${{ runner.os }}-pip-ffi-examples-${{ hashFiles('**/setup.py', '**/requirements.txt', '**/test-requirements.txt', 'examples/**/pyproject.toml') }}
Expand All @@ -245,6 +243,10 @@ jobs:
# a different toolchain. GCC is the default compiler on the
# 'ubuntu-latest' runner, but we still set this explicitly just to be
# clear.
CMAKE_ARGS: -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++
- name: Run tests
CMAKE_ARGS: -DCMAKE_CXX_COMPILER=g++ #-DJAX_FFI_EXAMPLE_ENABLE_CUDA=ON
- name: Run CPU tests
run: python -m pytest examples/ffi/tests
env:
JAX_PLATFORM_NAME: cpu
- name: Run GPU tests
run: python -m pytest examples/ffi/tests
Loading

0 comments on commit dfa9903

Please sign in to comment.