From d24472123b701e8e5caba5b9a46253d994da81a3 Mon Sep 17 00:00:00 2001
From: Lucy Qiu <lfq@meta.com>
Date: Mon, 25 Mar 2024 23:31:42 -0700
Subject: [PATCH] Add xnnpack to llama runner mac & linux CI job (#2677)

Summary:

Remake of D55290786, which is ghstack poisoned

Reviewed By: kimishpatel, digantdesai

Differential Revision: D55349949
---
 .ci/scripts/test_llama.sh                    | 23 ++++++-
 .github/workflows/pull.yml                   |  4 +-
 .github/workflows/trunk.yml                  | 64 ++++++++++++++++++++
 CMakeLists.txt                               |  9 ++-
 examples/models/llama2/CMakeLists.txt        | 25 ++------
 examples/models/llama2/runner/CMakeLists.txt |  2 +-
 extension/module/CMakeLists.txt              |  2 +-
 7 files changed, 103 insertions(+), 26 deletions(-)

diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
index 06444785af5..558e2aaccc4 100644
--- a/.ci/scripts/test_llama.sh
+++ b/.ci/scripts/test_llama.sh
@@ -12,7 +12,11 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 MODEL_NAME=$1 # stories110M.pt
 BUILD_TOOL=$2 # buck2 or cmake
 DTYPE=$3 # fp16 or fp32
-
+MODE=${4:-"xnnpack"} # portable or xnnpack
+if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
+    echo "Expecting atleast 4 positional arguments"
+    echo "Usage: [...]"
+fi
 if [[ -z "${MODEL_NAME:-}" ]]; then
   echo "Missing model name, exiting..."
   exit 1
@@ -28,6 +32,11 @@ if [[ -z "${DTYPE:-}" ]]; then
   exit 1
 fi
 
+if [[ -z "${MODE:-}" ]]; then
+  echo "Missing mode, choose portable or xnnpack, exiting..."
+  exit 1
+fi
+
 if [[ -z "${BUCK:-}" ]]; then
   BUCK=buck2
 fi
@@ -42,12 +51,18 @@ which "${PYTHON_EXECUTABLE}"
 cmake_install_executorch_libraries() {
     echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
     rm -rf cmake-out
+    if [[ "${MODE}" == "xnnpack" ]]; then
+      XNNPACK=ON
+    else
+      XNNPACK=OFF
+    fi
     retry cmake -DBUCK2="$BUCK" \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE=Release \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
         -DEXECUTORCH_BUILD_OPTIMIZED=ON \
+        -DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -Bcmake-out .
     cmake --build cmake-out -j9 --target install --config Release
@@ -101,7 +116,11 @@ fi
 # Export model.
 EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
 echo "Exporting ${EXPORTED_MODEL_NAME}"
-$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama -c stories110M.pt -p "${PARAMS}" -d "${DTYPE}"
+EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
+if [[ "${MODE}" == "xnnpack" ]]; then
+  EXPORT_ARGS="${EXPORT_ARGS} --pt2e_quantize xnnpack_dynamic"
+fi
+$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}
 
 # Create tokenizer.bin.
 echo "Creating tokenizer.bin"
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 32a856ea0ad..304f24529fe 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -90,6 +90,7 @@ jobs:
       matrix:
         dtype: [fp32]
         build-tool: [buck2, cmake]
+        mode: [portable, xnnpack]
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -104,13 +105,14 @@ jobs:
 
         DTYPE=${{ matrix.dtype }}
         BUILD_TOOL=${{ matrix.build-tool }}
+        MODE=${{ matrix.mode }}
 
         # Setup executorch
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
         # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
 
   test-custom-ops-linux:
     name: test-custom-ops-linux
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 826b8ab45dc..2d9046d2359 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -215,3 +215,67 @@ jobs:
         # Build and test coreml delegate
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
         popd
+
+  test-pybind-build-macos:
+    name: test-pybind-build-macos
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    strategy:
+      matrix:
+        include:
+          - build-tool: cmake
+      fail-fast: false
+    with:
+      runner: macos-13-xlarge
+      python-version: '3.11'
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        WORKSPACE=$(pwd)
+        pushd "${WORKSPACE}/pytorch/executorch"
+        bash .ci/scripts/setup-conda.sh
+
+        # build module for executorch.extension.pybindings.portable_lib
+        BUILD_TOOL=${{ matrix.build-tool }}
+
+        CMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') \
+        GITHUB_RUNNER=1 \
+        ${CONDA_RUN} \
+        PYTHON_EXECUTABLE=python \
+        EXECUTORCH_BUILD_PYBIND=ON \
+        bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
+
+        # see if we can import the module successfully
+        python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
+
+  test-llama-runner-macos:
+    name: test-llama-runner-mac
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    strategy:
+      matrix:
+        dtype: [fp32]
+        build-tool: [buck2, cmake]
+        mode: [portable, xnnpack]
+      fail-fast: false
+    with:
+      runner: macos-13-xlarge
+      python-version: '3.11'
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 900
+      script: |
+        WORKSPACE=$(pwd)
+        pushd "${WORKSPACE}/pytorch/executorch"
+        bash .ci/scripts/setup-conda.sh
+
+        DTYPE=${{ matrix.dtype }}
+        BUILD_TOOL=${{ matrix.build-tool }}
+        MODE=${{ matrix.mode }}
+
+        # Setup executorch
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
+
+        # Install requirements for export_llama
+        PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
+        # Test llama2
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 69b5e66d251..aca2a019758 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -459,8 +459,13 @@ if(EXECUTORCH_BUILD_PYBIND)
 
   # find pytorch lib, to allow pybind to take at::Tensor as input/output
   find_package(Torch CONFIG REQUIRED)
-  find_library(TORCH_PYTHON_LIBRARY torch_python
-               PATHS "${TORCH_INSTALL_PREFIX}/lib")
+
+  if(APPLE)
+    find_library(TORCH_PYTHON_LIBRARY torch_python)
+  else()
+    find_library(TORCH_PYTHON_LIBRARY torch_python
+              PATHS "${TORCH_INSTALL_PREFIX}/lib")
+  endif()
 
   # compile options for pybind
 
diff --git a/examples/models/llama2/CMakeLists.txt b/examples/models/llama2/CMakeLists.txt
index 3ebe142d4f8..70c35f52c1f 100644
--- a/examples/models/llama2/CMakeLists.txt
+++ b/examples/models/llama2/CMakeLists.txt
@@ -54,21 +54,15 @@ find_package(executorch CONFIG REQUIRED)
 # llama_runner library
 add_subdirectory(runner)
 
-set(link_options)
 set(link_libraries)
 
 if(EXECUTORCH_BUILD_OPTIMIZED)
-  list(APPEND link_libraries optimized_native_cpu_ops_lib optimized_kernels portable_kernels)
-  list(APPEND link_options
-                      "SHELL:LINKER:--whole-archive \
-                      $<TARGET_FILE:optimized_native_cpu_ops_lib> \
-                      LINKER:--no-whole-archive")
+  list(APPEND link_libraries optimized_native_cpu_ops_lib optimized_kernels
+    portable_kernels)
+  target_link_options_shared_lib(optimized_native_cpu_ops_lib)
 else()
   list(APPEND link_libraries portable_ops_lib portable_kernels)
-  list(APPEND link_options
-                      "SHELL:LINKER:--whole-archive \
-                      $<TARGET_FILE:portable_ops_lib> \
-                      LINKER:--no-whole-archive")
+  target_link_options_shared_lib(portable_ops_lib)
 endif()
 
 target_link_libraries(llama_main PUBLIC gflags llama_runner)
@@ -77,24 +71,17 @@ target_link_libraries(llama_main PUBLIC gflags llama_runner)
 if(TARGET xnnpack_backend)
   set(xnnpack_backend_libs xnnpack_backend XNNPACK pthreadpool cpuinfo)
   list(APPEND link_libraries ${xnnpack_backend_libs})
-  list(APPEND link_options
-                      "SHELL:LINKER:--whole-archive \
-                      $<TARGET_FILE:xnnpack_backend> \
-                      LINKER:--no-whole-archive")
+  target_link_options_shared_lib(xnnpack_backend)
 endif()
 
 # Vulkan backend
 if(TARGET vulkan_backend)
   list(APPEND link_libraries vulkan_backend)
-  list(APPEND link_options
-                      "SHELL:LINKER:--whole-archive \
-                      $<TARGET_FILE:vulkan_backend> \
-                      LINKER:--no-whole-archive")
+  target_link_options_shared_lib(vulkan_backend)
 endif()
 
 target_compile_options(llama_main PUBLIC ${_common_compile_options})
 target_link_libraries(llama_main PUBLIC ${link_libraries})
-target_link_options(llama_main PUBLIC ${link_options})
 
 # Print all summary
 executorch_print_configuration_summary()
diff --git a/examples/models/llama2/runner/CMakeLists.txt b/examples/models/llama2/runner/CMakeLists.txt
index a21995281df..75802f91f92 100644
--- a/examples/models/llama2/runner/CMakeLists.txt
+++ b/examples/models/llama2/runner/CMakeLists.txt
@@ -39,7 +39,7 @@ list(TRANSFORM _llama_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")
 target_include_directories(extension_module
                            INTERFACE ${_common_include_directories})
 
-if(CMAKE_TOOLCHAIN_IOS OR CMAKE_TOOLCHAIN_ANDROID)
+if(CMAKE_TOOLCHAIN_IOS OR CMAKE_TOOLCHAIN_ANDROID OR APPLE)
   # Building a share library on iOS requires code signing
   # On Android we see duplicated registration when using shared lib
   add_library(llama_runner STATIC ${_llama_runner__srcs})
diff --git a/extension/module/CMakeLists.txt b/extension/module/CMakeLists.txt
index e0d7ccc2507..e36cfa37605 100644
--- a/extension/module/CMakeLists.txt
+++ b/extension/module/CMakeLists.txt
@@ -17,7 +17,7 @@ if(NOT EXECUTORCH_ROOT)
 endif()
 
 list(TRANSFORM _extension_module__srcs PREPEND "${EXECUTORCH_ROOT}/")
-if(CMAKE_TOOLCHAIN_IOS OR CMAKE_TOOLCHAIN_ANDROID)
+if(CMAKE_TOOLCHAIN_IOS OR CMAKE_TOOLCHAIN_ANDROID OR APPLE)
   # Building a share library on iOS requires code signing
   # On Android we see duplicated registration when using shared lib
   add_library(extension_module STATIC ${_extension_module__srcs})