-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathserver.cfg
191 lines (190 loc) · 6.24 KB
/
server.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
{
"host": "0.0.0.0",
"port": 8000,
"models": [
{
"model": "/home/test/llm-models/chatglm3-ggml.bin",
"model_alias": "chatglm-3",
"chat_format": "chatglm",
"n_gpu_layers": 0,
"offload_kqv": true,
"embedding": false,
"n_threads": 12,
"n_batch": 512
},
{
"model": "/home/test/llm-models/chatglm4-ggml.bin",
"model_alias": "glm-4",
"chat_format": "chatglm",
"n_gpu_layers": 0,
"offload_kqv": true,
"embedding": false,
"n_threads": 12,
"n_batch": 512
},
{
"model": "/home/test/llm-models/bge-large-zh-v1.5-q4_k_m.gguf",
"model_alias": "bge-large-zh-v1.5",
"chat_format": "bert",
"n_gpu_layers": 0,
"offload_kqv": true,
"n_threads": 12,
"n_ctx": 8192,
"embedding": true,
"n_batch": 512,
"verbose": false
},
{
"model": "/home/test/.cache/huggingface/hub/models--BAAI--bge-m3/snapshots/babcf60cae0a1f438d7ade582983d4ba462303c2/onnx/",
"model_alias": "bge-m3",
"chat_format": "bge-onnx",
"embedding": true,
"n_gpu_layers": 0,
"n_ctx": 8192,
"offload_kqv": true,
"n_threads": 12,
"n_batch": 512
},
{
"model": "/home/test/llm-models/chatglm3-ggml-q8.bin",
"model_alias": "chatglm-3-q8",
"chat_format": "chatglm",
"n_gpu_layers": 0,
"offload_kqv": true,
"embedding": false,
"n_threads": 12,
"n_batch": 512
},
{
"model": "/home/test/llm-models/gorilla-openfunctions-v2-q4_K_M.gguf",
"hf_pretrained_model_name_or_path":"gorilla-llm/gorilla-openfunctions-v2",
"model_alias": "openfunctions",
"chat_format": "openfunctions",
"n_gpu_layers": 0,
"n_ctx":4096,
"embedding": false,
"offload_kqv": true,
"n_threads": 12,
"n_batch": 512
},
{
"model": "/home/test/llm-models/functionary-small-v2.4.Q4_0.gguf",
"model_alias": "functionary",
"chat_format": "functionary-v2",
"hf_pretrained_model_name_or_path":"meetkai/functionary-small-v2.4",
"n_gpu_layers": 0,
"offload_kqv": true,
"n_threads": 12,
"n_batch": 512,
"embedding": false,
"n_ctx": 8192,
"use_mmap":true
},
{
"model": "/home/test/llm-models/functionary-small-v2.5.Q4_0.gguf",
"model_alias": "functionary-v2.5",
"chat_format": "functionary-v2",
"hf_pretrained_model_name_or_path":"meetkai/functionary-small-v2.5",
"n_gpu_layers": 0,
"offload_kqv": true,
"n_threads": 12,
"n_batch": 512,
"embedding": false,
"n_ctx": 8192,
"use_mmap":true
},
{
"model": "/home/test/llm-models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"hf_pretrained_model_name_or_path":"meta-llama/Meta-Llama-3-8B-Instruct",
"model_alias": "llama-3-8b",
"chat_format": "llama-3",
"n_gpu_layers": 0,
"offload_kqv": true,
"n_threads": 12,
"n_batch": 512,
"n_ctx": 8192,
"embedding": true
},
{
"model": "/home/test/llm-models/ggml-model-q4_k.gguf",
"model_alias": "llava",
"chat_format": "llava-1-5",
"clip_model_path": "/home/test/llm-models/mmproj-model-f16.gguf",
"n_gpu_layers": 0,
"offload_kqv": true,
"embedding": false,
"n_threads": 12,
"n_ctx": 4096,
"n_batch": 512
},
{
"model": "/home/test/llm-models/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"model_alias": "mistral-7b",
"chat_format": "mistral-instruct",
"n_gpu_layers": 0,
"offload_kqv": true,
"n_threads": 12,
"n_ctx": 8192,
"n_batch": 512
},
{
"model": "/home/test/llm-models/mixtral-8x7b-instruct-v0.1.Q3_K_M.gguf",
"model_alias": "mixtral-8x7b-instruct",
"chat_format": "mistral-instruct",
"n_gpu_layers": 0,
"offload_kqv": true,
"n_threads": 12,
"n_ctx": 8192,
"n_batch": 512
},
{
"model": "/home/test/llm-models/sqlcoder-7b-2.Q4_K_M.gguf",
"model_alias": "sqlcoder",
"n_gpu_layers": 0,
"offload_kqv": true,
"n_threads": 12,
"n_ctx": 16384,
"n_batch": 1024
},
{
"model": "/home/test/llm-models/qwen2-0_5b-instruct-q4_k_m.gguf",
"model_alias": "qwen",
"chat_format":"qwen",
"n_gpu_layers": 0,
"offload_kqv": true,
"n_threads": 12,
"n_ctx": 32768,
"n_batch": 1024
},
{
"model": "/home/test/llm-models/Qwen2-1.5B-Instruct.Q4_K_M.gguf",
"model_alias": "qwen2-1.5b",
"chat_format":"qwen",
"n_gpu_layers": 0,
"offload_kqv": true,
"n_threads": 12,
"n_ctx": 32768,
"n_batch": 1024
},
{
"model": "/home/test/llm-models/qwen2-7b-instruct-q5_k_m.gguf",
"model_alias": "qwen2-7b",
"chat_format":"qwen",
"n_gpu_layers": 0,
"offload_kqv": true,
"n_threads": 12,
"n_ctx": 32768,
"n_batch": 1024
},
{
"model": "/home/test/llm-models/Baichuan2-13B-Chat-Q4_K_M.gguf",
"model_alias": "baichuan-2",
"chat_format":"baichuan-2",
"n_gpu_layers": 0,
"offload_kqv": true,
"n_threads": 12,
"n_ctx": 8192,
"n_batch": 1024
}
]
}