From 4205771baac7eeeffa9e990f7c2d3b6791baae18 Mon Sep 17 00:00:00 2001 From: Kyle Herndon Date: Wed, 4 Dec 2024 11:24:17 -0800 Subject: [PATCH 1/4] Implement MMDIT block that is necessary for flux (#592) --- sharktank/sharktank/layers/__init__.py | 1 + sharktank/sharktank/layers/mmdit.py | 146 +++++++++++++++++++++++ sharktank/sharktank/layers/modulation.py | 42 +++++++ sharktank/sharktank/layers/testing.py | 79 ++++++++++++ sharktank/sharktank/ops/default_impls.py | 16 ++- sharktank/sharktank/ops/signatures.py | 6 +- sharktank/tests/layers/mmdit_test.py | 58 +++++++++ 7 files changed, 343 insertions(+), 5 deletions(-) create mode 100644 sharktank/sharktank/layers/mmdit.py create mode 100644 sharktank/sharktank/layers/modulation.py create mode 100644 sharktank/tests/layers/mmdit_test.py diff --git a/sharktank/sharktank/layers/__init__.py b/sharktank/sharktank/layers/__init__.py index fd56ec872..620c15672 100644 --- a/sharktank/sharktank/layers/__init__.py +++ b/sharktank/sharktank/layers/__init__.py @@ -17,5 +17,6 @@ from .ffn_block import FFN from .ffn_moe_block import FFNMOE from .mixture_of_experts_block import MoeBlock +from .mmdit import MMDITDoubleBlock from .configs import * diff --git a/sharktank/sharktank/layers/mmdit.py b/sharktank/sharktank/layers/mmdit.py new file mode 100644 index 000000000..0b0750549 --- /dev/null +++ b/sharktank/sharktank/layers/mmdit.py @@ -0,0 +1,146 @@ +# Copyright 2024 Black Forest Labs. Inc. and Flux Authors +# Copyright 2024 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +"""MMDIT Layers adapted from black-forest-labs' flux implementation +https://github.com/black-forest-labs/flux/blob/main/src/flux/modules/layers.py +""" + +import torch.nn.functional as F +import torch +from torch import Tensor + +from .. import ops + +from .base import Theta, ThetaLayer +from .linear import LinearLayer +from .modulation import ModulationLayer +from .norm import RMSNormLayer +from .paged_llama_attention_block import PagedLlamaAttentionBlock + + +def qk_norm(q, k, v, rms_q, rms_k): + return rms_q(q).to(v), rms_k(k).to(v) + + +# TODO: Work on unifying with the current RoPE layer +def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tensor]: + xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2) + xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2) + xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1] + xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1] + return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk) + + +def attention(q, k, v, pe): + q, k = apply_rope(q, k, pe) # todo + + x = ops.scaled_dot_product_attention( + q=q, k=k, v=v, a=None, is_causal=True, scale=None + ) + x = ops.permute(x, (0, 2, 1, 3)) + x = x.view(x.shape[0], x.shape[1], -1) + + return x + + +class MMDITDoubleBlock(ThetaLayer): + def __init__(self, theta, num_heads: int): + super().__init__(theta) + + self.num_heads = num_heads + self.add_module("img_mod", ModulationLayer(theta("img_mod"), double=True)) + self.add_module("img_attn_qkv", LinearLayer(theta("img_attn.qkv"))) + self.add_module( + "img_attn_norm_q", + RMSNormLayer(theta("img_attn.norm.query_norm"), epsilon=1e-6), + ) + self.add_module( + "img_attn_norm_k", + RMSNormLayer(theta("img_attn.norm.key_norm"), epsilon=1e-6), + ) + self.add_module("img_attn_proj", LinearLayer(theta("img_attn.proj"))) + + self.add_module("img_mlp1", LinearLayer(theta("img_mlp.0"))) + self.add_module("img_mlp2", LinearLayer(theta("img_mlp.2"))) + + self.add_module("txt_mod", ModulationLayer(theta("txt_mod"), double=True)) + self.add_module("txt_attn_qkv", LinearLayer(theta("txt_attn.qkv"))) + self.add_module( + "txt_attn_norm_q", + RMSNormLayer(theta("txt_attn.norm.query_norm"), epsilon=1e-6), + ) + self.add_module( + "txt_attn_norm_k", + RMSNormLayer(theta("txt_attn.norm.key_norm"), epsilon=1e-6), + ) + self.add_module("txt_attn_proj", LinearLayer(theta("txt_attn.proj"))) + + self.add_module("txt_mlp1", LinearLayer(theta("txt_mlp.0"))) + self.add_module("txt_mlp2", LinearLayer(theta("txt_mlp.2"))) + + def forward( + self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor + ) -> tuple[Tensor, Tensor]: + img_mod1, img_mod2 = self.img_mod(vec) + txt_mod1, txt_mod2 = self.txt_mod(vec) + + # prepare image for attention + img_modulated = ops.layer_norm(img, None, None, eps=1e-6) + img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift + img_qkv = self.img_attn_qkv(img_modulated) + img_qkv_2 = img_qkv.view( + img_qkv.shape[0], img_qkv.shape[1], 3, self.num_heads, -1 + ) # + img_qkv_3 = ops.permute(img_qkv_2, (2, 0, 3, 1, 4)) + img_q, img_k, img_v = img_qkv_3 + img_q, img_k = qk_norm( + img_q, img_k, img_v, self.img_attn_norm_q, self.img_attn_norm_k + ) + + # prepare text for attention + txt_modulated = ops.layer_norm(txt, None, None, eps=1e-6) + txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift + txt_qkv = self.txt_attn_qkv(txt_modulated) + txt_qkv_2 = txt_qkv.view( + txt_qkv.shape[0], txt_qkv.shape[1], 3, self.num_heads, -1 + ) # + txt_qkv_3 = ops.permute(txt_qkv_2, (2, 0, 3, 1, 4)) + txt_q, txt_k, txt_v = txt_qkv_3 + txt_q, txt_k = qk_norm( + txt_q, txt_k, txt_v, self.txt_attn_norm_q, self.txt_attn_norm_k + ) + + # run actual attention + q = torch.cat((txt_q, img_q), dim=2) + k = torch.cat((txt_k, img_k), dim=2) + v = torch.cat((txt_v, img_v), dim=2) + + attn = attention(q, k, v, pe) + txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1] :] + + # calculate the image blocks + # TODO: Refactor this for code reuse with the txt blocks + img = img + img_mod1.gate * self.img_attn_proj(img_attn) + img_mlp_in = (1 + img_mod2.scale) * ops.layer_norm( + img, None, None, eps=1e-6 + ) + img_mod2.shift + img_mlp_out1 = self.img_mlp1(img_mlp_in) + img_mlp_out2 = ops.elementwise(F.gelu, img_mlp_out1) + img_mlp_out3 = self.img_mlp2(img_mlp_out2) + img = img + img_mod2.gate * img_mlp_out3 + + # calculate the text blocks + txt = txt + txt_mod1.gate * self.txt_attn_proj(txt_attn) + txt_mlp_in = (1 + txt_mod2.scale) * ops.layer_norm( + txt, None, None, eps=1e-6 + ) + txt_mod2.shift + txt_mlp_out1 = self.txt_mlp1(txt_mlp_in) + # TODO: Unify with modulation layer by taking act_fn as an arg + txt_mlp_out2 = ops.elementwise(F.gelu, txt_mlp_out1) + txt_mlp_out3 = self.txt_mlp2(txt_mlp_out2) + txt = txt + txt_mod2.gate * txt_mlp_out3 + + return img, txt diff --git a/sharktank/sharktank/layers/modulation.py b/sharktank/sharktank/layers/modulation.py new file mode 100644 index 000000000..7ef7adfa1 --- /dev/null +++ b/sharktank/sharktank/layers/modulation.py @@ -0,0 +1,42 @@ +# Copyright 2024 Black Forest Labs. Inc. and Flux Authors +# Copyright 2024 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +"""Modulation Layer adapted from black-forest-labs' flux implementation +https://github.com/black-forest-labs/flux/blob/main/src/flux/modules/layers.py +""" + +import torch +import torch.nn.functional as F + +from .. import ops + +from .base import Theta, ThetaLayer +from .linear import LinearLayer + + +class ModulationOut: + def __init__(self, shift, scale, gate): + self.shift = shift + self.scale = scale + self.gate = gate + + +class ModulationLayer(ThetaLayer): + def __init__(self, theta: Theta, double: bool): + super().__init__(theta) + + self.is_double = double + self.multiplier = 6 if double else 3 + self.add_module("lin", LinearLayer(theta("lin"))) + + def forward(self, vec: torch.Tensor) -> tuple[ModulationOut, ModulationOut | None]: + silu_result = ops.elementwise(F.silu, vec) + out = self.lin(silu_result)[:, None, :].chunk(self.multiplier, dim=-1) + + return ( + ModulationOut(*out[:3]), + ModulationOut(*out[3:]) if self.is_double else None, + ) diff --git a/sharktank/sharktank/layers/testing.py b/sharktank/sharktank/layers/testing.py index e2fc79d78..a21d5bf85 100644 --- a/sharktank/sharktank/layers/testing.py +++ b/sharktank/sharktank/layers/testing.py @@ -49,3 +49,82 @@ def make_llama_attention_block_theta( ), } ) + + +def make_mmdit_double_block_theta(dtype: torch.dtype | None = None) -> Theta: + return Theta( + { + "img_attn.norm.key_norm.weight": DefaultPrimitiveTensor( # + data=make_rand_torch((128,), dtype=dtype) + ), + "img_attn.norm.query_norm.weight": DefaultPrimitiveTensor( # + data=make_rand_torch((128,), dtype=dtype) + ), + "img_attn.proj.bias": DefaultPrimitiveTensor( + data=make_rand_torch((3072,), dtype=dtype) + ), + "img_attn.proj.weight": DefaultPrimitiveTensor( + data=make_rand_torch((3072, 3072), dtype=dtype) + ), + "img_attn.qkv.bias": DefaultPrimitiveTensor( + data=make_rand_torch((9216,), dtype=dtype) + ), + "img_attn.qkv.weight": DefaultPrimitiveTensor( + data=make_rand_torch((9216, 3072), dtype=dtype) + ), + "img_mlp.0.bias": DefaultPrimitiveTensor( + data=make_rand_torch((12288), dtype=dtype) + ), + "img_mlp.0.weight": DefaultPrimitiveTensor( + data=make_rand_torch((12288, 3072), dtype=dtype) + ), + "img_mlp.2.bias": DefaultPrimitiveTensor( + data=make_rand_torch((3072), dtype=dtype) + ), + "img_mlp.2.weight": DefaultPrimitiveTensor( + data=make_rand_torch((3072, 12288), dtype=dtype) + ), + "img_mod.lin.bias": DefaultPrimitiveTensor( + data=make_rand_torch((18432,), dtype=dtype) + ), + "img_mod.lin.weight": DefaultPrimitiveTensor( + data=make_rand_torch((18432, 3072), dtype=dtype) + ), + "txt_attn.norm.key_norm.weight": DefaultPrimitiveTensor( # + data=make_rand_torch((128,), dtype=dtype) + ), + "txt_attn.norm.query_norm.weight": DefaultPrimitiveTensor( # + data=make_rand_torch((128,), dtype=dtype) + ), + "txt_attn.proj.bias": DefaultPrimitiveTensor( + data=make_rand_torch((3072,), dtype=dtype) + ), + "txt_attn.proj.weight": DefaultPrimitiveTensor( + data=make_rand_torch((3072, 3072), dtype=dtype) + ), + "txt_attn.qkv.bias": DefaultPrimitiveTensor( + data=make_rand_torch((9216,), dtype=dtype) + ), + "txt_attn.qkv.weight": DefaultPrimitiveTensor( + data=make_rand_torch((9216, 3072), dtype=dtype) + ), + "txt_mlp.0.bias": DefaultPrimitiveTensor( + data=make_rand_torch((12288), dtype=dtype) + ), + "txt_mlp.0.weight": DefaultPrimitiveTensor( + data=make_rand_torch((12288, 3072), dtype=dtype) + ), + "txt_mlp.2.bias": DefaultPrimitiveTensor( + data=make_rand_torch((3072), dtype=dtype) + ), + "txt_mlp.2.weight": DefaultPrimitiveTensor( + data=make_rand_torch((3072, 12288), dtype=dtype) + ), + "txt_mod.lin.bias": DefaultPrimitiveTensor( + data=make_rand_torch((18432,), dtype=dtype) + ), + "txt_mod.lin.weight": DefaultPrimitiveTensor( + data=make_rand_torch((18432, 3072), dtype=dtype) + ), + } + ) diff --git a/sharktank/sharktank/ops/default_impls.py b/sharktank/sharktank/ops/default_impls.py index d117ada23..47e737fb1 100644 --- a/sharktank/sharktank/ops/default_impls.py +++ b/sharktank/sharktank/ops/default_impls.py @@ -304,16 +304,26 @@ def interpolate_default( ) -@layer_norm.override(Tensor, Tensor, Tensor) def layer_norm_default(input, weight, bias, *, eps): input = unbox_tensor(input) - weight = unbox_tensor(weight) - bias = unbox_tensor(bias) + if weight is not None: + weight = unbox_tensor(weight) + else: + weight = torch.ones(input.shape, dtype=input.dtype) + if bias is not None: + bias = unbox_tensor(bias) + else: + bias = torch.zeros(input.shape, dtype=input.dtype) return F.layer_norm( input, normalized_shape=weight.shape, weight=weight, bias=bias, eps=eps ) +layer_norm.override(Tensor)(layer_norm_default) +layer_norm.override(Tensor, Tensor)(layer_norm_default) +layer_norm.override(Tensor, Tensor, Tensor)(layer_norm_default) + + # Linear def linear_default(input, weight, bias, *, accum_dtype) -> Tensor: input = unbox_tensor(input) diff --git a/sharktank/sharktank/ops/signatures.py b/sharktank/sharktank/ops/signatures.py index 408f00ec7..dc7fb108a 100644 --- a/sharktank/sharktank/ops/signatures.py +++ b/sharktank/sharktank/ops/signatures.py @@ -582,12 +582,14 @@ def layer_norm( def _layer_norm_trampoline( d: SignatureDispatcher, input: AnyTensor, - weight: AnyTensor, + weight: Optional[AnyTensor], bias: Optional[AnyTensor], *, eps: float, ): - tensors = [input, weight] + tensors = [input] + if weight is not None: + tensors.append(bias) if bias is not None: tensors.append(bias) for override in d.find_overrides(tensors): diff --git a/sharktank/tests/layers/mmdit_test.py b/sharktank/tests/layers/mmdit_test.py new file mode 100644 index 000000000..5bd5ce39a --- /dev/null +++ b/sharktank/tests/layers/mmdit_test.py @@ -0,0 +1,58 @@ +# Copyright 2024 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import logging + +logging.basicConfig(level=logging.DEBUG) + +import unittest + +import torch + +from iree.turbine import aot +from sharktank.layers import ( + MMDITDoubleBlock, +) +import sharktank.ops as ops +from sharktank.layers.testing import ( + make_mmdit_double_block_theta, +) +from sharktank.types.tensors import DefaultPrimitiveTensor + + +class MMDITTest(unittest.TestCase): + def setUp(self): + torch.manual_seed(12345) + self.hidden_size = 3072 + self.num_heads = 24 + self.batch_size = 3 + + def testDoubleExport(self): + + theta = make_mmdit_double_block_theta() + mmdit = MMDITDoubleBlock( + theta=theta, + num_heads=self.num_heads, + ) + + img = torch.rand([self.batch_size, 1024, self.hidden_size]) + txt = torch.rand([self.batch_size, 512, self.hidden_size]) + vec = torch.rand([self.batch_size, self.hidden_size]) + rot = torch.rand([self.batch_size, 1, 1536, 64, 2, 2]) + mmdit.forward(img, txt, vec, rot) + fxb = aot.FxProgramsBuilder(mmdit) + + @fxb.export_program(name="mmdit", args=(img, txt, vec, rot), strict=False) + def _(model, img, txt, vec, rot) -> torch.Tensor: + return model.forward(img, txt, vec, rot) + + output = aot.export(fxb) + output.verify() + asm = str(output.mlir_module) + + +if __name__ == "__main__": + unittest.main() From 2304a5627cd3e0a1882552f13f8ecd5af4ed4ee8 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Wed, 4 Dec 2024 14:43:16 -0800 Subject: [PATCH 2/4] Switch ci-sdxl github runner and refactor workflow. (#644) The `mi300-sdxl-kernel` runner has been offline for a few weeks, so runs of this workflow have been queued: https://github.com/nod-ai/shark-ai/actions/workflows/ci-sdxl.yaml. This `mi300x-4` runner is probably fit to run this workflow. Also refactored the workflow to not use explicit build steps, which loosens the requirements on installed software and helps make progress on https://github.com/nod-ai/shark-ai/issues/584. --- .github/workflows/ci-sdxl.yaml | 48 ++++++++-------------------------- 1 file changed, 11 insertions(+), 37 deletions(-) diff --git a/.github/workflows/ci-sdxl.yaml b/.github/workflows/ci-sdxl.yaml index 708cb3885..102ef7817 100644 --- a/.github/workflows/ci-sdxl.yaml +++ b/.github/workflows/ci-sdxl.yaml @@ -35,9 +35,9 @@ env: LIBSHORTFIN_DIR: ${{ github.workspace }}/shortfin/ jobs: - build-and-test: - name: Build and test - runs-on: mi300-sdxl-kernel + install-and-test: + name: Install and test + runs-on: mi300x-4 steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -55,53 +55,27 @@ jobs: sudo apt install ninja -y fi - - name: Checkout IREE repo - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - repository: iree-org/iree - path: ${{ env.IREE_REPO_DIR }} - submodules: false - ref: iree-3.1.0rc20241204 - - - name: Initalize IREE submodules - working-directory: ${{ env.IREE_REPO_DIR }} - run : | - git submodule update --init --depth 1 -- third_party/benchmark - git submodule update --init --depth 1 -- third_party/cpuinfo/ - git submodule update --init --depth 1 -- third_party/flatcc - git submodule update --init --depth 1 -- third_party/googletest - git submodule update --init --depth 1 -- third_party/hip-build-deps/ - - name: Setup Python uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: "3.12" cache: "pip" - - name: Install Python packages - # TODO: Switch to `pip install -r requirements.txt -e shortfin/`. + + - name: Install requirements working-directory: ${{ env.LIBSHORTFIN_DIR }} run: | pip install -r requirements-tests.txt pip install -r requirements-iree-compiler.txt pip freeze - - name: Build shortfin (full) + - name: Install shortfin working-directory: ${{ env.LIBSHORTFIN_DIR }} run: | - mkdir build - cmake -GNinja \ - -S. \ - -Bbuild \ - -DCMAKE_C_COMPILER=clang-18 \ - -DCMAKE_CXX_COMPILER=clang++-18 \ - -DSHORTFIN_BUNDLE_DEPS=ON \ - -DSHORTFIN_IREE_SOURCE_DIR="${{ env.IREE_REPO_DIR }}" \ - -DSHORTFIN_BUILD_PYTHON_BINDINGS=ON - cmake --build build --target all - pip install -v -e build/ + pip install --no-compile -e . - - name: Test shortfin (full) + - name: Test apps/sd/e2e_test working-directory: ${{ env.LIBSHORTFIN_DIR }} + env: + HIP_VISIBLE_DEVICES: 0 run: | - ctest --timeout 30 --output-on-failure --test-dir build - HIP_VISIBLE_DEVICES=0 pytest tests/apps/sd/e2e_test.py -v -s --system=amdgpu + pytest tests/apps/sd/e2e_test.py -v -s --system=amdgpu From 57a7390b67591a631aefe63e0e487cd5ccc99cbd Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Thu, 5 Dec 2024 10:00:13 -0800 Subject: [PATCH 3/4] Install python packages into venvs across workflows. (#640) Many of these workflows are using persistent self-hosted runners, so it looks like they have been reusing the same system-wide Python environment between workflow runs (plus layer of caching on top). This switches to using venvs at `${{ github.workspace }}/.venv` that should be ephemeral, giving us more explicit control over which packages are installed. More work is planned as part of https://github.com/nod-ai/shark-ai/issues/584 to refactor these workflows further - replacing the package installs code like `pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/` with a `setup_venv.py` script that uses dev/nightly/stable packages (from an appropriate source). This also disables pip caching, since that is not directly compatible with using venvs. As a result, some workflows are slower now, but they are more predictable in what they install. Good reading for adding caching back: * https://adamj.eu/tech/2023/11/02/github-actions-faster-python-virtual-environments/ * https://github.com/actions/setup-python/blob/main/docs/advanced-usage.md#caching-packages --- .github/workflows/ci-llama-large-tests.yaml | 18 +++++----- .github/workflows/ci-llama-quick-tests.yaml | 18 +++++----- .github/workflows/ci-sglang-benchmark.yml | 31 ++++++----------- .../workflows/ci-sglang-integration-tests.yml | 24 ++++++------- .github/workflows/ci-shark-ai.yml | 18 +++++----- .github/workflows/ci_eval.yaml | 34 +++++++++---------- .github/workflows/ci_eval_short.yaml | 18 +++++----- 7 files changed, 72 insertions(+), 89 deletions(-) diff --git a/.github/workflows/ci-llama-large-tests.yaml b/.github/workflows/ci-llama-large-tests.yaml index 5766d8ca6..376f93938 100644 --- a/.github/workflows/ci-llama-large-tests.yaml +++ b/.github/workflows/ci-llama-large-tests.yaml @@ -33,7 +33,6 @@ jobs: run: shell: bash env: - PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" VENV_DIR: ${{ github.workspace }}/.venv steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -47,16 +46,12 @@ jobs: uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{matrix.version}} - - - name: Cache Pip Packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - id: cache-pip - with: - path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }} + - name: Create Python venv + run: python -m venv ${VENV_DIR} - name: Install pip deps run: | + source ${VENV_DIR}/bin/activate python -m pip install --no-compile --upgrade pip # Note: We install in three steps in order to satisfy requirements # from non default locations first. Installing the PyTorch CPU @@ -68,14 +63,17 @@ jobs: pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - # Test with nightly releases, not what iree-turbine uses. pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \ iree-base-compiler \ iree-base-runtime + pip freeze + - name: Run llama tests - run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-nightly-llama-tests --iree-hip-target=gfx942 --iree-device=hip://7 --html=out/llm/llama/benchmark/index.html + run: | + source ${VENV_DIR}/bin/activate + pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --run-nightly-llama-tests --iree-hip-target=gfx942 --iree-device=hip://7 --html=out/llm/llama/benchmark/index.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 diff --git a/.github/workflows/ci-llama-quick-tests.yaml b/.github/workflows/ci-llama-quick-tests.yaml index 697c47928..ddbcc204a 100644 --- a/.github/workflows/ci-llama-quick-tests.yaml +++ b/.github/workflows/ci-llama-quick-tests.yaml @@ -33,7 +33,6 @@ jobs: run: shell: bash env: - PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" VENV_DIR: ${{ github.workspace }}/.venv steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -47,16 +46,12 @@ jobs: uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{matrix.version}} - - - name: Cache Pip Packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - id: cache-pip - with: - path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }} + - name: Create Python venv + run: python -m venv ${VENV_DIR} - name: Install pip deps run: | + source ${VENV_DIR}/bin/activate python -m pip install --no-compile --upgrade pip # Note: We install in three steps in order to satisfy requirements # from non default locations first. Installing the PyTorch CPU @@ -68,14 +63,17 @@ jobs: pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \ -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - # Test with nightly releases, not what iree-turbine uses. pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \ iree-base-compiler \ iree-base-runtime + pip freeze + - name: Run llama 8b f16 decomposed test - run: pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --iree-hip-target=gfx942 --iree-device=hip://0 --run-quick-llama-test + run: | + source ${VENV_DIR}/bin/activate + pytest sharktank/tests/models/llama/benchmark_amdgpu_test.py -v -s --iree-hip-target=gfx942 --iree-device=hip://0 --run-quick-llama-test - name: Upload llama executable files uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index e6af1e73c..d53189483 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -45,7 +45,7 @@ jobs: run: shell: bash env: - PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" + VENV_DIR: ${{ github.workspace }}/.venv steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -54,16 +54,12 @@ jobs: uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{matrix.version}} - - - name: Cache Pip Packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - id: cache-pip - with: - path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ matrix.version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} + - name: Create Python venv + run: python -m venv ${VENV_DIR} - name: Install pip deps run: | + source ${VENV_DIR}/bin/activate python -m pip install --no-compile --upgrade pip # Note: We install in three steps in order to satisfy requirements # from non default locations first. Installing the PyTorch CPU @@ -81,11 +77,15 @@ jobs: iree-base-runtime==3.1.0rc20241204 \ "numpy<2.0" - - name: Install SGLang - run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" + # Install SGLang + pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" + + pip freeze - name: Run Shortfin Benchmark Tests - run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=shortfin_index.html --self-contained-html + run: | + source ${VENV_DIR}/bin/activate + pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=shortfin_index.html --self-contained-html - name: Upload pytest report uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 @@ -103,8 +103,6 @@ jobs: defaults: run: shell: bash - env: - PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -114,13 +112,6 @@ jobs: with: python-version: ${{matrix.version}} - - name: Cache Pip Packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - id: cache-pip - with: - path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ matrix.version }} - - name: Install SGLang run: | python -m pip install --no-compile --upgrade pip diff --git a/.github/workflows/ci-sglang-integration-tests.yml b/.github/workflows/ci-sglang-integration-tests.yml index 154657504..36a59779a 100644 --- a/.github/workflows/ci-sglang-integration-tests.yml +++ b/.github/workflows/ci-sglang-integration-tests.yml @@ -34,7 +34,7 @@ jobs: run: shell: bash env: - PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" + VENV_DIR: ${{ github.workspace }}/.venv steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -43,16 +43,12 @@ jobs: uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{matrix.version}} - - - name: Cache Pip Packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - id: cache-pip - with: - path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} + - name: Create Python venv + run: python -m venv ${VENV_DIR} - name: Install pip deps run: | + source ${VENV_DIR}/bin/activate python -m pip install --no-compile --upgrade pip # Note: We install in three steps in order to satisfy requirements # from non default locations first. Installing the PyTorch CPU @@ -69,11 +65,13 @@ jobs: iree-base-runtime \ "numpy<2.0" - - name: Install SGLang - run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" + # Install SGLang and sentence_transformers + pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python" + pip install sentence_transformers - - name: Install sentence_transformers - run: pip install sentence_transformers + pip freeze - name: Run Integration Tests - run: pytest -v app_tests/integration_tests/llm/sglang --log-cli-level=INFO + run: | + source ${VENV_DIR}/bin/activate + pytest -v app_tests/integration_tests/llm/sglang --log-cli-level=INFO diff --git a/.github/workflows/ci-shark-ai.yml b/.github/workflows/ci-shark-ai.yml index 7ec69e13b..e662d125b 100644 --- a/.github/workflows/ci-shark-ai.yml +++ b/.github/workflows/ci-shark-ai.yml @@ -33,7 +33,7 @@ jobs: run: shell: bash env: - PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" + VENV_DIR: ${{ github.workspace }}/.venv steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -42,16 +42,12 @@ jobs: uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{matrix.version}} - - - name: Cache Pip Packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - id: cache-pip - with: - path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }} + - name: Create Python venv + run: python -m venv ${VENV_DIR} - name: Install pip deps run: | + source ${VENV_DIR}/bin/activate python -m pip install --no-compile --upgrade pip # Note: We install in three steps in order to satisfy requirements # from non default locations first. Installing the PyTorch CPU @@ -70,5 +66,9 @@ jobs: iree-base-compiler \ iree-base-runtime + pip freeze + - name: Run LLM Integration Tests - run: pytest -v app_tests/integration_tests/llm/shortfin --log-cli-level=INFO + run: | + source ${VENV_DIR}/bin/activate + pytest -v app_tests/integration_tests/llm/shortfin --log-cli-level=INFO diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 7433bf167..565bf3352 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -35,7 +35,7 @@ jobs: run: shell: bash env: - PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" + VENV_DIR: ${{ github.workspace }}/.venv steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -44,16 +44,12 @@ jobs: uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{matrix.version}} - - - name: Cache Pip Packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - id: cache-pip - with: - path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','sharktank/requirements*.txt') }} + - name: Create Python venv + run: python -m venv ${VENV_DIR} - name: Install sharktank deps run: | + source ${VENV_DIR}/bin/activate python -m pip install --no-compile --upgrade pip # Note: We install in three steps in order to satisfy requirements # from non default locations first. Installing the PyTorch CPU @@ -72,8 +68,12 @@ jobs: iree-base-compiler \ iree-base-runtime + pip freeze + - name: Run perplexity test with IREE - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --run-nightly-llama-tests --bs=100 --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=out/llm/llama/perplexity/iree_perplexity/index.html + run: | + source ${VENV_DIR}/bin/activate + pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --run-nightly-llama-tests --bs=100 --iree-device='hip://7' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=out/llm/llama/perplexity/iree_perplexity/index.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 @@ -97,7 +97,7 @@ jobs: run: shell: bash env: - PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" + VENV_DIR: ${{ github.workspace }}/.venv steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -106,16 +106,12 @@ jobs: uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{matrix.version}} - - - name: Cache Pip Packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - id: cache-pip - with: - path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }} + - name: Create Python venv + run: python -m venv ${VENV_DIR} - name: Install sharktank deps run: | + source ${VENV_DIR}/bin/activate python -m pip install --no-compile --upgrade pip # Note: We install in three steps in order to satisfy requirements # from non default locations first. Installing the PyTorch CPU @@ -128,7 +124,9 @@ jobs: -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine" - name: Run perplexity test with Torch - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=out/llm/llama/perplexity/torch_perplexity/index.html + run: | + source ${VENV_DIR}/bin/activate + pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_torch_test.py --longrun --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json --html=out/llm/llama/perplexity/torch_perplexity/index.html - name: Deploy to GitHub Pages uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 diff --git a/.github/workflows/ci_eval_short.yaml b/.github/workflows/ci_eval_short.yaml index 64043c2ec..6331e0709 100644 --- a/.github/workflows/ci_eval_short.yaml +++ b/.github/workflows/ci_eval_short.yaml @@ -34,7 +34,7 @@ jobs: run: shell: bash env: - PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache" + VENV_DIR: ${{ github.workspace }}/.venv steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 @@ -43,16 +43,12 @@ jobs: uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: ${{matrix.version}} - - - name: Cache Pip Packages - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 - id: cache-pip - with: - path: ${{ env.PIP_CACHE_DIR }} - key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','sharktank/requirements*.txt') }} + - name: Create Python venv + run: python -m venv ${VENV_DIR} - name: Install sharktank deps run: | + source ${VENV_DIR}/bin/activate python -m pip install --no-compile --upgrade pip # Note: We install in three steps in order to satisfy requirements # from non default locations first. Installing the PyTorch CPU @@ -71,5 +67,9 @@ jobs: iree-base-compiler \ iree-base-runtime + pip freeze + - name: Run perplexity test with vmfb - run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --bs=5 --iree-device='hip://6' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json + run: | + source ${VENV_DIR}/bin/activate + pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_iree_test.py --bs=5 --iree-device='hip://6' --iree-hip-target=gfx942 --iree-hal-target-backends=rocm --llama3-8b-f16-model-path=/data/llama3.1/8b/llama8b_f16.irpa --llama3-8b-tokenizer-path=/data/llama3.1/8b/tokenizer_config.json From 8ea135a86806d8f0c13edbdf4a2e678a60ecb839 Mon Sep 17 00:00:00 2001 From: Scott Todd Date: Thu, 5 Dec 2024 10:45:35 -0800 Subject: [PATCH 4/4] [shortfin] Support ccache in package builds and make tracing optional. (#646) Splitting this off from https://github.com/nod-ai/shark-ai/pull/589 to make progress on https://github.com/nod-ai/shark-ai/issues/584. Tested with ``` CACHE_DIR=/tmp/shortfin/ sudo -E ./shortfin/build_tools/build_linux_package.sh + ccache --show-stats Cacheable calls: 626 / 636 (98.43%) Hits: 2 / 626 ( 0.32%) Direct: 2 / 2 (100.0%) Preprocessed: 0 / 2 ( 0.00%) Misses: 624 / 626 (99.68%) Uncacheable calls: 10 / 636 ( 1.57%) Local storage: Cache size (GB): 0.1 / 2.0 ( 3.10%) Hits: 2 / 626 ( 0.32%) Misses: 624 / 626 (99.68%) + ccache --show-stats ccache stats: Cacheable calls: 1252 / 1272 (98.43%) Hits: 550 / 1252 (43.93%) Direct: 550 / 550 (100.0%) Preprocessed: 0 / 550 ( 0.00%) Misses: 702 / 1252 (56.07%) Uncacheable calls: 20 / 1272 ( 1.57%) Local storage: Cache size (GB): 0.1 / 2.0 ( 4.11%) Hits: 550 / 1252 (43.93%) Misses: 702 / 1252 (56.07%) + ccache --show-stats Cacheable calls: 1878 / 1908 (98.43%) Hits: 1098 / 1878 (58.47%) Direct: 1098 / 1098 (100.0%) Preprocessed: 0 / 1098 ( 0.00%) Misses: 780 / 1878 (41.53%) Uncacheable calls: 30 / 1908 ( 1.57%) Local storage: Cache size (GB): 0.1 / 2.0 ( 5.12%) Hits: 1098 / 1878 (58.47%) Misses: 780 / 1878 (41.53%) CACHE_DIR=/tmp/shortfin/ sudo -E ./shortfin/build_tools/build_linux_package.sh + ccache --show-stats ccache stats: Cacheable calls: 3756 / 3816 (98.43%) Hits: 2820 / 3756 (75.08%) Direct: 2820 / 2820 (100.0%) Preprocessed: 0 / 2820 ( 0.00%) Misses: 936 / 3756 (24.92%) Uncacheable calls: 60 / 3816 ( 1.57%) Local storage: Cache size (GB): 0.1 / 2.0 ( 5.19%) Hits: 2820 / 3756 (75.08%) Misses: 936 / 3756 (24.92%) ``` So we have multiple configurations getting built (Python versions, tracing enable/disabled), but we still get a reasonable number of cache hits. Definitely room to improve there, but better than nothing. --- shortfin/build_tools/build_linux_package.sh | 65 ++++++++++++++++++++- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/shortfin/build_tools/build_linux_package.sh b/shortfin/build_tools/build_linux_package.sh index afaa1e9fb..91b944e51 100755 --- a/shortfin/build_tools/build_linux_package.sh +++ b/shortfin/build_tools/build_linux_package.sh @@ -14,9 +14,10 @@ # Build everything (all python versions): # sudo ./build_tools/build_linux_package.sh # -# Build specific Python versions to custom directory: +# Build specific Python versions to custom directory, with tracing enabled: # OVERRIDE_PYTHON_VERSIONS="cp312-cp312 cp313-cp313" \ # OUTPUT_DIR="/tmp/wheelhouse" \ +# SHORTFIN_ENABLE_TRACING="ON" \ # sudo -E ./build_tools/build_linux_package.sh # # Valid Python versions match a subdirectory under /opt/python in the docker @@ -40,6 +41,8 @@ ARCH="$(uname -m)" MANYLINUX_DOCKER_IMAGE="${MANYLINUX_DOCKER_IMAGE:-quay.io/pypa/manylinux_2_28_${ARCH}:latest}" PYTHON_VERSIONS="${OVERRIDE_PYTHON_VERSIONS:-cp311-cp311 cp312-cp312 cp313-cp313}" OUTPUT_DIR="${OUTPUT_DIR:-${THIS_DIR}/wheelhouse}" +CACHE_DIR="${CACHE_DIR:-}" +SHORTFIN_ENABLE_TRACING="${SHORTFIN_ENABLE_TRACING:-ON}" function run_on_host() { echo "Running on host" @@ -50,12 +53,23 @@ function run_on_host() { OUTPUT_DIR="$(cd "${OUTPUT_DIR}" && pwd)" echo "Outputting to ${OUTPUT_DIR}" mkdir -p "${OUTPUT_DIR}" + + # Setup cache as needed. + extra_args="" + if ! [ -z "$CACHE_DIR" ]; then + echo "Setting up host cache dir ${CACHE_DIR}" + mkdir -p "${CACHE_DIR}/ccache" + extra_args="${extra_args} -v ${CACHE_DIR}:${CACHE_DIR} -e CACHE_DIR=${CACHE_DIR}" + fi + docker run --rm \ -v "${REPO_ROOT}:${REPO_ROOT}" \ -v "${OUTPUT_DIR}:${OUTPUT_DIR}" \ -e __MANYLINUX_BUILD_WHEELS_IN_DOCKER=1 \ -e "OVERRIDE_PYTHON_VERSIONS=${PYTHON_VERSIONS}" \ -e "OUTPUT_DIR=${OUTPUT_DIR}" \ + -e "SHORTFIN_ENABLE_TRACING=${SHORTFIN_ENABLE_TRACING}" \ + ${extra_args} \ "${MANYLINUX_DOCKER_IMAGE}" \ -- ${THIS_DIR}/${SCRIPT_NAME} @@ -72,6 +86,23 @@ function run_in_docker() { echo "Using python versions: ${PYTHON_VERSIONS}" local orig_path="${PATH}" + # Configure caching. + if [ -z "$CACHE_DIR" ]; then + echo "Cache directory not configured. No caching will take place." + else + # TODO: include this in the dockerfile we use so it gets cached + install_ccache + + # TODO: debug low cache hit rate (~30% hits out of 98% cacheable) on CI + mkdir -p "${CACHE_DIR}" + CACHE_DIR="$(cd ${CACHE_DIR} && pwd)" + echo "Caching build artifacts to ${CACHE_DIR}" + export CCACHE_DIR="${CACHE_DIR}/ccache" + export CCACHE_MAXSIZE="2G" + export CMAKE_C_COMPILER_LAUNCHER=ccache + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + fi + # Build phase. echo "******************** BUILDING PACKAGE ********************" for python_version in ${PYTHON_VERSIONS}; do @@ -82,14 +113,44 @@ function run_in_docker() { fi export PATH="${python_dir}/bin:${orig_path}" echo ":::: Python version $(python --version)" + clean_wheels "shortfin" "${python_version}" build_shortfin run_audit_wheel "shortfin" "${python_version}" + + if ! [ -z "$CACHE_DIR" ]; then + echo "ccache stats:" + ccache --show-stats + fi done } +function install_ccache() { + # This gets an old version. + # yum install -y ccache + + CCACHE_VERSION="4.10.2" + + if [[ "${ARCH}" == "x86_64" ]]; then + curl --silent --fail --show-error --location \ + "https://github.com/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}-linux-${ARCH}.tar.xz" \ + --output ccache.tar.xz + + tar xf ccache.tar.xz + cp ccache-${CCACHE_VERSION}-linux-${ARCH}/ccache /usr/local/bin + elif [[ "${ARCH}" == "aarch64" ]]; then + # Latest version of ccache is not released for arm64, built it + git clone --depth 1 --branch "v${CCACHE_VERSION}" https://github.com/ccache/ccache.git + mkdir -p ccache/build && cd "$_" + cmake -G "Ninja" -DCMAKE_BUILD_TYPE=Release .. + ninja + cp ccache /usr/bin/ + fi +} + function build_shortfin() { - export SHORTFIN_ENABLE_TRACING=ON + # Note: The SHORTFIN_ENABLE_TRACING environment variable should have been + # forwarded from the host environment into Docker above. python -m pip wheel --disable-pip-version-check -v -w "${OUTPUT_DIR}" "${REPO_ROOT}/shortfin" }