-
Notifications
You must be signed in to change notification settings - Fork 2
119 lines (101 loc) · 3.69 KB
/
build.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
---
name: Build quantize binary
permissions:
contents: read
on:
push:
branches:
- main
tags:
- "v*"
pull_request:
branches:
- main
workflow_dispatch:
jobs:
macos-build:
name: "Build quantize on macOS ARM64 (M1)"
runs-on: "macos-14"
steps:
- uses: "actions/checkout@v4"
with:
submodules: true
- name: system info
run: sysctl -a
- name: make build/quantize from llama.cpp sources
env:
CMAKE_ARGS: "-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON"
run: make quantize
- name: file info
run: file build/quantize-arm64-darwin
- name: test quantize
run: |
build/quantize-arm64-darwin \
llama.cpp/models/ggml-vocab-llama.gguf \
/tmp/ggml-vocab-Q4_K_M.gguf \
Q4_K_M
- uses: actions/upload-artifact@v4
with:
name: "quantize-arm64-darwin"
path: build/quantize-arm64-darwin
linux-build:
name: "Build quantize on Linux for ${{ matrix.arch }}"
runs-on: "ubuntu-latest"
strategy:
fail-fast: true
matrix:
include:
- arch: "amd64"
suffix: "x86_64-linux"
image: quay.io/sclorg/python-312-c8s:c8s
- arch: "arm64"
suffix: "aarch64-linux"
image: quay.io/sclorg/python-312-c8s:c8s
steps:
- uses: "actions/checkout@v4"
with:
submodules: true
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Pull ${{ matrix.image }} for linux/${{ matrix.arch }}
run: |
docker pull --platform linux/${{ matrix.arch }} ${{ matrix.image }}
- name: make build/quantize from llama.cpp sources
run: |
set -e
docker run --platform linux/${{ matrix.arch }} ${{ matrix.image }} uname -a
docker run --platform linux/${{ matrix.arch }} \
-v .:/opt/app-root/src \
-e CMAKE_ARGS="-DLLAMA_FATAL_WARNINGS=ON" \
${{ matrix.image }} \
make quantize
- name: file info
run: file build/quantize-${{ matrix.suffix }}
- name: file symbols
run: nm -a build/quantize-${{ matrix.suffix }} | grep -o "GLIBC.*" | sort -u
- name: test quantize
run: |
docker run --platform linux/${{ matrix.arch }} \
-v .:/opt/app-root/src \
${{ matrix.image }} \
build/quantize-${{ matrix.suffix }} \
llama.cpp/models/ggml-vocab-llama.gguf \
/tmp/ggml-vocab-Q4_K_M.gguf \
Q4_K_M
- uses: actions/upload-artifact@v4
with:
name: "quantize-${{ matrix.suffix }}"
path: build/quantize-${{ matrix.suffix }}
merge-artifacts:
name: Merge artifacts
runs-on: ubuntu-latest
needs:
- macos-build
- linux-build
steps:
- name: Merge artifacts
uses: actions/upload-artifact/merge@v4
with:
name: quantize