diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index b967b5b..c4551dc 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -33,12 +33,19 @@ jobs: run: make quantize - name: file info - run: file build/quantize* + run: file build/quantize-arm64-darwin + + - name: test quantize + run: | + build/quantize-arm64-darwin \ + llama.cpp/models/ggml-vocab-llama.gguf \ + /tmp/ggml-vocab-Q4_K_M.gguf \ + Q4_K_M - uses: actions/upload-artifact@v4 with: - name: "quantize-macos-arm64" - path: build/quantize* + name: "quantize-arm64-darwin" + path: build/quantize-arm64-darwin linux-build: name: "Build quantize on Linux for ${{ matrix.arch }}" @@ -46,13 +53,13 @@ jobs: strategy: fail-fast: true matrix: - arch: - - "amd64" - - "arm64" - # - "ppc64le" - # - "s390x" - image: - - registry.access.redhat.com/ubi9/python-312 + include: + - arch: "amd64" + suffix: "x86_64-linux" + image: quay.io/sclorg/python-312-c8s:c8s + - arch: "arm64" + suffix: "aarch64-linux" + image: quay.io/sclorg/python-312-c8s:c8s steps: - uses: "actions/checkout@v4" with: @@ -70,21 +77,34 @@ jobs: - name: make build/quantize from llama.cpp sources run: | - set -e - docker run --platform linux/${{ matrix.arch }} ${{ matrix.image }} uname -a - docker run --platform linux/${{ matrix.arch }} \ - -v .:/opt/app-root/src \ - -e CMAKE_ARGS="-DLLAMA_FATAL_WARNINGS=ON" \ - ${{ matrix.image }} \ - make quantize + set -e + docker run --platform linux/${{ matrix.arch }} ${{ matrix.image }} uname -a + docker run --platform linux/${{ matrix.arch }} \ + -v .:/opt/app-root/src \ + -e CMAKE_ARGS="-DLLAMA_FATAL_WARNINGS=ON" \ + ${{ matrix.image }} \ + make quantize - name: file info - run: file build/quantize* + run: file build/quantize-${{ matrix.suffix }} + + - name: file symbols + run: nm -a build/quantize-${{ matrix.suffix }} | grep -o "GLIBC.*" | sort -u + + - name: test quantize + run: | + docker run --platform linux/${{ matrix.arch }} \ + -v .:/opt/app-root/src \ + ${{ matrix.image }} \ + build/quantize-${{ matrix.suffix }} \ + llama.cpp/models/ggml-vocab-llama.gguf \ + /tmp/ggml-vocab-Q4_K_M.gguf \ + Q4_K_M - uses: actions/upload-artifact@v4 with: - name: "quantize-linux-${{ matrix.arch }}" - path: build/quantize* + name: "quantize-${{ matrix.suffix }}" + path: build/quantize-${{ matrix.suffix }} merge-artifacts: name: Merge artifacts diff --git a/README.md b/README.md index 51026b4..423cfc8 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,12 @@ # instructlab-quantize llama.cpp's [`quantize`](https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp) -binary for [InstructLab](https://github.com/instructlab/instructlab) on macOS ARM64. +binary for [InstructLab](https://github.com/instructlab/instructlab). + +- macOS ARM64 +- Linux x86_64 glibc with RHEL 8 ABI (`GLIBCXX_3.4.22`, `GLIBC_2.27`) +- Linux aarch64 glibc with RHEL 8 ABI (`GLIBCXX_3.4.22`, `GLIBC_2.27`) + `quantize` program was written by 'the ggml authors' for `llama.cpp`. License: diff --git a/src/instructlab_quantize/quantize-aarch64-linux b/src/instructlab_quantize/quantize-aarch64-linux index 78dc970..107319d 100755 Binary files a/src/instructlab_quantize/quantize-aarch64-linux and b/src/instructlab_quantize/quantize-aarch64-linux differ diff --git a/src/instructlab_quantize/quantize-x86_64-linux b/src/instructlab_quantize/quantize-x86_64-linux index fb47a92..80461a4 100755 Binary files a/src/instructlab_quantize/quantize-x86_64-linux and b/src/instructlab_quantize/quantize-x86_64-linux differ