Skip to content

Commit

Permalink
Build quantize for Linux
Browse files Browse the repository at this point in the history
Signed-off-by: Christian Heimes <[email protected]>
  • Loading branch information
tiran committed May 15, 2024
1 parent c27cdd6 commit f24f85e
Show file tree
Hide file tree
Showing 10 changed files with 95 additions and 20 deletions.
66 changes: 62 additions & 4 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,70 @@ jobs:
- name: make build/quantize from llama.cpp sources
env:
CMAKE_ARGS: "-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON"
run: make build/quantize
run: make quantize

- name: file info
run: file build/quantize
run: file build/quantize*

- uses: actions/upload-artifact@v4
with:
name: "quantize-macos"
path: build/quantize
name: "quantize-macos-arm64"
path: build/quantize*

linux-build:
name: "Build quantize on Linux for ${{ matrix.arch }}"
runs-on: "ubuntu-latest"
strategy:
fail-fast: true
matrix:
arch:
- "amd64"
- "arm64"
# - "ppc64le"
# - "s390x"
image:
- registry.access.redhat.com/ubi9/python-312
steps:
- uses: "actions/checkout@v4"
with:
submodules: true

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Pull ${{ matrix.image }} for linux/${{ matrix.arch }}
run: |
docker pull --platform linux/${{ matrix.arch }} ${{ matrix.image }}
- name: make build/quantize from llama.cpp sources
run: |
set -e
docker run --platform linux/${{ matrix.arch }} ${{ matrix.image }} uname -a
docker run --platform linux/${{ matrix.arch }} \
-v .:/opt/app-root/src \
-e CMAKE_ARGS="-DLLAMA_FATAL_WARNINGS=ON" \
${{ matrix.image }} \
make quantize
- name: file info
run: file build/quantize*

- uses: actions/upload-artifact@v4
with:
name: "quantize-linux-${{ matrix.arch }}"
path: build/quantize*

merge-artifacts:
name: Merge artifacts
runs-on: ubuntu-latest
needs:
- macos-build
- linux-build
steps:
- name: Merge artifacts
uses: actions/upload-artifact/merge@v4
with:
name: quantize
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
steps:
- uses: "actions/checkout@v4"
with:
submodules: false
submodules: true

- uses: "actions/setup-python@v5"
with:
Expand Down
12 changes: 9 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# SPDX-License-Identifier: Apache-2.0

CMAKE_ARGS ?=
QUANTIZE = build/quantize
LLAMA_BUILDDIR = build/llama.cpp

UNAME_MACHINE = $(shell uname -m | tr A-Z a-z)
UNAME_OS = $(shell uname -s | tr A-Z a-z)
QUANTIZE = build/quantize-$(UNAME_MACHINE)-$(UNAME_OS)
LLAMA_BUILDDIR = build/llama.cpp-$(UNAME_MACHINE)-$(UNAME_OS)
LLAMA_DIR = llama.cpp


Expand All @@ -27,7 +30,10 @@ $(LLAMA_BUILDDIR)/Makefile: $(LLAMA_DIR)/CMakeLists.txt
CMAKE_ARGS="$(CMAKE_ARGS)" cmake -S $(dir $<) -B $(dir $@)

$(LLAMA_BUILDDIR)/bin/quantize: $(LLAMA_BUILDDIR)/Makefile
cmake --build $(dir $<) --config Release --target quantize
cmake --build $(dir $<) --parallel 2 --config Release --target quantize

.PHONY: quantize
quantize: $(QUANTIZE)

$(QUANTIZE): $(LLAMA_BUILDDIR)/bin/quantize
cp -a $< $@
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ classifiers = [
"Environment :: Console",
"License :: OSI Approved :: Apache Software License",
"License :: OSI Approved :: MIT License",
"Operating System :: POSIX :: Linux",
"Operating System :: MacOS :: MacOS X",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Programming Language :: Python :: 3",
Expand All @@ -41,6 +42,9 @@ issues = "https://github.com/tiran/instructlab-quantize/issues"
[tool.setuptools]
package-dir = {"" = "src"}

[tool.setuptools.package-data]
"instructlab_quantize" = ["quantize-*"]

[tool.check-wheel-contents]

[tool.ruff]
Expand Down
18 changes: 11 additions & 7 deletions src/instructlab_quantize/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,27 @@
"""Run quantize binary on macOS"""

import os
import platform
import subprocess
import sys
from importlib import resources

__all__ = (
"QUANTIZE",
"run_quantize",
)

QUANTIZE = resources.files("instructlab_quantize").joinpath("quantize")
__all__ = ("run_quantize",)


def run_quantize(*quantizeargs, **kwargs):
"""Run quantize with subprocess.check_output
stdout = quantize("extra", "arguments")
"""
with resources.as_file(QUANTIZE) as quantize:
machine = platform.machine().lower()
quantize_bin = f"quantize-{machine}-{sys.platform}"

files = resources.files("instructlab_quantize")

with resources.as_file(files.joinpath(quantize_bin)) as quantize:
if not quantize.exists():
raise FileNotFoundError(quantize)
args = [os.fspath(quantize)]
args.extend(quantizeargs)
return subprocess.check_output(args, **kwargs)
6 changes: 6 additions & 0 deletions src/instructlab_quantize/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
import sys

from . import run_quantize

print(run_quantize(*sys.argv[1:]))
Binary file added src/instructlab_quantize/quantize-aarch64-linux
Binary file not shown.
Binary file not shown.
Binary file added src/instructlab_quantize/quantize-x86_64-linux
Binary file not shown.
7 changes: 2 additions & 5 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def m_check_output():


def test_mock_run_quantize(m_check_output: mock.Mock):
quantize = os.fspath(PKG_DIR.joinpath("quantize"))
machine = platform.machine().lower()
quantize = os.fspath(PKG_DIR.joinpath(f"quantize-{machine}-{sys.platform}"))
instructlab_quantize.run_quantize("egg", "spam")
m_check_output.assert_called_with([quantize, "egg", "spam"])
m_check_output.reset_mock()
Expand All @@ -29,10 +30,6 @@ def test_mock_run_quantize(m_check_output: mock.Mock):
m_check_output.assert_called_with([quantize, "--help"], stderr=subprocess.STDOUT)


@pytest.mark.skipif(
sys.platform != "darwin" and platform.machine() != "arm64",
reason="binary is Apple M1-only",
)
def test_run_quantize(tmp_path: pathlib.Path):
with pytest.raises(subprocess.CalledProcessError) as exc_info:
instructlab_quantize.run_quantize("--help", stderr=subprocess.STDOUT, text=True)
Expand Down

0 comments on commit f24f85e

Please sign in to comment.