Build quantize for Linux

Signed-off-by: Christian Heimes <[email protected]>
instructlab · May 15, 2024 · f24f85e · f24f85e
1 parent c27cdd6
commit f24f85e
Show file tree

Hide file tree

Showing 10 changed files with 95 additions and 20 deletions.
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -30,12 +30,70 @@ jobs:
             - name: make build/quantize from llama.cpp sources
               env:
                   CMAKE_ARGS: "-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON"
-              run: make build/quantize
+              run: make quantize
 
             - name: file info
-              run: file build/quantize
+              run: file build/quantize*
 
             - uses: actions/upload-artifact@v4
               with:
-                  name: "quantize-macos"
-                  path: build/quantize
+                  name: "quantize-macos-arm64"
+                  path: build/quantize*
+
+    linux-build:
+        name: "Build quantize on Linux for ${{ matrix.arch }}"
+        runs-on: "ubuntu-latest"
+        strategy:
+            fail-fast: true
+            matrix:
+                arch:
+                    - "amd64"
+                    - "arm64"
+                    # - "ppc64le"
+                    # - "s390x"
+                image:
+                    - registry.access.redhat.com/ubi9/python-312
+        steps:
+            - uses: "actions/checkout@v4"
+              with:
+                  submodules: true
+
+            - name: Set up QEMU
+              uses: docker/setup-qemu-action@v3
+
+            - name: Set up Docker Buildx
+              uses: docker/setup-buildx-action@v3
+
+            - name: Pull ${{ matrix.image }} for linux/${{ matrix.arch }}
+              run: |
+                  docker pull --platform linux/${{ matrix.arch }} ${{ matrix.image }}
+
+            - name: make build/quantize from llama.cpp sources
+              run: |
+                set -e
+                docker run --platform linux/${{ matrix.arch }} ${{ matrix.image }} uname -a
+                docker run --platform linux/${{ matrix.arch }} \
+                      -v .:/opt/app-root/src \
+                      -e CMAKE_ARGS="-DLLAMA_FATAL_WARNINGS=ON" \
+                      ${{ matrix.image }} \
+                      make quantize
+
+            - name: file info
+              run: file build/quantize*
+
+            - uses: actions/upload-artifact@v4
+              with:
+                  name: "quantize-linux-${{ matrix.arch }}"
+                  path: build/quantize*
+
+    merge-artifacts:
+        name: Merge artifacts
+        runs-on: ubuntu-latest
+        needs:
+            - macos-build
+            - linux-build
+        steps:
+            - name: Merge artifacts
+              uses: actions/upload-artifact/merge@v4
+              with:
+                  name: quantize
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -30,7 +30,7 @@ jobs:
         steps:
             - uses: "actions/checkout@v4"
               with:
-                submodules: false
+                submodules: true
 
             - uses: "actions/setup-python@v5"
               with:

diff --git a/Makefile b/Makefile
@@ -1,8 +1,11 @@
 # SPDX-License-Identifier: Apache-2.0
 
 CMAKE_ARGS ?=
-QUANTIZE = build/quantize
-LLAMA_BUILDDIR = build/llama.cpp
+
+UNAME_MACHINE = $(shell uname -m | tr A-Z a-z)
+UNAME_OS = $(shell uname -s | tr A-Z a-z)
+QUANTIZE = build/quantize-$(UNAME_MACHINE)-$(UNAME_OS)
+LLAMA_BUILDDIR = build/llama.cpp-$(UNAME_MACHINE)-$(UNAME_OS)
 LLAMA_DIR = llama.cpp
 
 
@@ -27,7 +30,10 @@ $(LLAMA_BUILDDIR)/Makefile: $(LLAMA_DIR)/CMakeLists.txt
 	CMAKE_ARGS="$(CMAKE_ARGS)" cmake -S $(dir $<) -B $(dir $@)
 
 $(LLAMA_BUILDDIR)/bin/quantize: $(LLAMA_BUILDDIR)/Makefile
-	cmake --build $(dir $<) --config Release --target quantize
+	cmake --build $(dir $<) --parallel 2 --config Release --target quantize
+
+.PHONY: quantize
+quantize: $(QUANTIZE)
 
 $(QUANTIZE): $(LLAMA_BUILDDIR)/bin/quantize
 	cp -a $< $@
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,6 +18,7 @@ classifiers = [
     "Environment :: Console",
     "License :: OSI Approved :: Apache Software License",
     "License :: OSI Approved :: MIT License",
+    "Operating System :: POSIX :: Linux",
     "Operating System :: MacOS :: MacOS X",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",
     "Programming Language :: Python :: 3",
@@ -41,6 +42,9 @@ issues = "https://github.com/tiran/instructlab-quantize/issues"
 [tool.setuptools]
 package-dir = {"" = "src"}
 
+[tool.setuptools.package-data]
+"instructlab_quantize" = ["quantize-*"]
+
 [tool.check-wheel-contents]
 
 [tool.ruff]

diff --git a/src/instructlab_quantize/__init__.py b/src/instructlab_quantize/__init__.py
@@ -2,23 +2,27 @@
 """Run quantize binary on macOS"""
 
 import os
+import platform
 import subprocess
+import sys
 from importlib import resources
 
-__all__ = (
-    "QUANTIZE",
-    "run_quantize",
-)
-
-QUANTIZE = resources.files("instructlab_quantize").joinpath("quantize")
+__all__ = ("run_quantize",)
 
 
 def run_quantize(*quantizeargs, **kwargs):
     """Run quantize with subprocess.check_output
 
     stdout = quantize("extra", "arguments")
     """
-    with resources.as_file(QUANTIZE) as quantize:
+    machine = platform.machine().lower()
+    quantize_bin = f"quantize-{machine}-{sys.platform}"
+
+    files = resources.files("instructlab_quantize")
+
+    with resources.as_file(files.joinpath(quantize_bin)) as quantize:
+        if not quantize.exists():
+            raise FileNotFoundError(quantize)
         args = [os.fspath(quantize)]
         args.extend(quantizeargs)
         return subprocess.check_output(args, **kwargs)
diff --git a/src/instructlab_quantize/__main__.py b/src/instructlab_quantize/__main__.py
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: Apache-2.0
+import sys
+
+from . import run_quantize
+
+print(run_quantize(*sys.argv[1:]))
diff --git a/src/instructlab_quantize/quantize-aarch64-linux b/src/instructlab_quantize/quantize-aarch64-linux
diff --git a/src/instructlab_quantize/quantize → ...nstructlab_quantize/quantize-arm64-darwin b/src/instructlab_quantize/quantize → ...nstructlab_quantize/quantize-arm64-darwin
diff --git a/src/instructlab_quantize/quantize-x86_64-linux b/src/instructlab_quantize/quantize-x86_64-linux
diff --git a/tests.py b/tests.py
@@ -20,7 +20,8 @@ def m_check_output():
 
 
 def test_mock_run_quantize(m_check_output: mock.Mock):
-    quantize = os.fspath(PKG_DIR.joinpath("quantize"))
+    machine = platform.machine().lower()
+    quantize = os.fspath(PKG_DIR.joinpath(f"quantize-{machine}-{sys.platform}"))
     instructlab_quantize.run_quantize("egg", "spam")
     m_check_output.assert_called_with([quantize, "egg", "spam"])
     m_check_output.reset_mock()
@@ -29,10 +30,6 @@ def test_mock_run_quantize(m_check_output: mock.Mock):
     m_check_output.assert_called_with([quantize, "--help"], stderr=subprocess.STDOUT)
 
 
-@pytest.mark.skipif(
-    sys.platform != "darwin" and platform.machine() != "arm64",
-    reason="binary is Apple M1-only",
-)
 def test_run_quantize(tmp_path: pathlib.Path):
     with pytest.raises(subprocess.CalledProcessError) as exc_info:
         instructlab_quantize.run_quantize("--help", stderr=subprocess.STDOUT, text=True)