diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 9a6b938..b967b5b 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -30,12 +30,70 @@ jobs: - name: make build/quantize from llama.cpp sources env: CMAKE_ARGS: "-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON" - run: make build/quantize + run: make quantize - name: file info - run: file build/quantize + run: file build/quantize* - uses: actions/upload-artifact@v4 with: - name: "quantize-macos" - path: build/quantize + name: "quantize-macos-arm64" + path: build/quantize* + + linux-build: + name: "Build quantize on Linux for ${{ matrix.arch }}" + runs-on: "ubuntu-latest" + strategy: + fail-fast: true + matrix: + arch: + - "amd64" + - "arm64" + # - "ppc64le" + # - "s390x" + image: + - registry.access.redhat.com/ubi9/python-312 + steps: + - uses: "actions/checkout@v4" + with: + submodules: true + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Pull ${{ matrix.image }} for linux/${{ matrix.arch }} + run: | + docker pull --platform linux/${{ matrix.arch }} ${{ matrix.image }} + + - name: make build/quantize from llama.cpp sources + run: | + set -e + docker run --platform linux/${{ matrix.arch }} ${{ matrix.image }} uname -a + docker run --platform linux/${{ matrix.arch }} \ + -v .:/opt/app-root/src \ + -e CMAKE_ARGS="-DLLAMA_FATAL_WARNINGS=ON" \ + ${{ matrix.image }} \ + make quantize + + - name: file info + run: file build/quantize* + + - uses: actions/upload-artifact@v4 + with: + name: "quantize-linux-${{ matrix.arch }}" + path: build/quantize* + + merge-artifacts: + name: Merge artifacts + runs-on: ubuntu-latest + needs: + - macos-build + - linux-build + steps: + - name: Merge artifacts + uses: actions/upload-artifact/merge@v4 + with: + name: quantize diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1ae584d..04fa1ca 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -30,7 +30,7 @@ jobs: steps: - uses: "actions/checkout@v4" with: - submodules: false + submodules: true - uses: "actions/setup-python@v5" with: diff --git a/Makefile b/Makefile index 54c0822..d93152e 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 CMAKE_ARGS ?= -QUANTIZE = build/quantize -LLAMA_BUILDDIR = build/llama.cpp + +UNAME_MACHINE = $(shell uname -m | tr A-Z a-z) +UNAME_OS = $(shell uname -s | tr A-Z a-z) +QUANTIZE = build/quantize-$(UNAME_MACHINE)-$(UNAME_OS) +LLAMA_BUILDDIR = build/llama.cpp-$(UNAME_MACHINE)-$(UNAME_OS) LLAMA_DIR = llama.cpp @@ -27,7 +30,10 @@ $(LLAMA_BUILDDIR)/Makefile: $(LLAMA_DIR)/CMakeLists.txt CMAKE_ARGS="$(CMAKE_ARGS)" cmake -S $(dir $<) -B $(dir $@) $(LLAMA_BUILDDIR)/bin/quantize: $(LLAMA_BUILDDIR)/Makefile - cmake --build $(dir $<) --config Release --target quantize + cmake --build $(dir $<) --parallel 2 --config Release --target quantize + +.PHONY: quantize +quantize: $(QUANTIZE) $(QUANTIZE): $(LLAMA_BUILDDIR)/bin/quantize cp -a $< $@ diff --git a/pyproject.toml b/pyproject.toml index 820dfc4..cea6f47 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ classifiers = [ "Environment :: Console", "License :: OSI Approved :: Apache Software License", "License :: OSI Approved :: MIT License", + "Operating System :: POSIX :: Linux", "Operating System :: MacOS :: MacOS X", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Programming Language :: Python :: 3", @@ -41,6 +42,9 @@ issues = "https://github.com/tiran/instructlab-quantize/issues" [tool.setuptools] package-dir = {"" = "src"} +[tool.setuptools.package-data] +"instructlab_quantize" = ["quantize-*"] + [tool.check-wheel-contents] [tool.ruff] diff --git a/src/instructlab_quantize/__init__.py b/src/instructlab_quantize/__init__.py index 1ceab83..f6b5c80 100644 --- a/src/instructlab_quantize/__init__.py +++ b/src/instructlab_quantize/__init__.py @@ -2,15 +2,12 @@ """Run quantize binary on macOS""" import os +import platform import subprocess +import sys from importlib import resources -__all__ = ( - "QUANTIZE", - "run_quantize", -) - -QUANTIZE = resources.files("instructlab_quantize").joinpath("quantize") +__all__ = ("run_quantize",) def run_quantize(*quantizeargs, **kwargs): @@ -18,7 +15,14 @@ def run_quantize(*quantizeargs, **kwargs): stdout = quantize("extra", "arguments") """ - with resources.as_file(QUANTIZE) as quantize: + machine = platform.machine().lower() + quantize_bin = f"quantize-{machine}-{sys.platform}" + + files = resources.files("instructlab_quantize") + + with resources.as_file(files.joinpath(quantize_bin)) as quantize: + if not quantize.exists(): + raise FileNotFoundError(quantize) args = [os.fspath(quantize)] args.extend(quantizeargs) return subprocess.check_output(args, **kwargs) diff --git a/src/instructlab_quantize/__main__.py b/src/instructlab_quantize/__main__.py new file mode 100644 index 0000000..8880933 --- /dev/null +++ b/src/instructlab_quantize/__main__.py @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: Apache-2.0 +import sys + +from . import run_quantize + +print(run_quantize(*sys.argv[1:])) diff --git a/src/instructlab_quantize/quantize-aarch64-linux b/src/instructlab_quantize/quantize-aarch64-linux new file mode 100755 index 0000000..78dc970 Binary files /dev/null and b/src/instructlab_quantize/quantize-aarch64-linux differ diff --git a/src/instructlab_quantize/quantize b/src/instructlab_quantize/quantize-arm64-darwin similarity index 80% rename from src/instructlab_quantize/quantize rename to src/instructlab_quantize/quantize-arm64-darwin index ffc7b37..83a9a9e 100755 Binary files a/src/instructlab_quantize/quantize and b/src/instructlab_quantize/quantize-arm64-darwin differ diff --git a/src/instructlab_quantize/quantize-x86_64-linux b/src/instructlab_quantize/quantize-x86_64-linux new file mode 100755 index 0000000..fb47a92 Binary files /dev/null and b/src/instructlab_quantize/quantize-x86_64-linux differ diff --git a/tests.py b/tests.py index e3edebf..01d120b 100644 --- a/tests.py +++ b/tests.py @@ -20,7 +20,8 @@ def m_check_output(): def test_mock_run_quantize(m_check_output: mock.Mock): - quantize = os.fspath(PKG_DIR.joinpath("quantize")) + machine = platform.machine().lower() + quantize = os.fspath(PKG_DIR.joinpath(f"quantize-{machine}-{sys.platform}")) instructlab_quantize.run_quantize("egg", "spam") m_check_output.assert_called_with([quantize, "egg", "spam"]) m_check_output.reset_mock() @@ -29,10 +30,6 @@ def test_mock_run_quantize(m_check_output: mock.Mock): m_check_output.assert_called_with([quantize, "--help"], stderr=subprocess.STDOUT) -@pytest.mark.skipif( - sys.platform != "darwin" and platform.machine() != "arm64", - reason="binary is Apple M1-only", -) def test_run_quantize(tmp_path: pathlib.Path): with pytest.raises(subprocess.CalledProcessError) as exc_info: instructlab_quantize.run_quantize("--help", stderr=subprocess.STDOUT, text=True)