-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathllama.cpp.patch
136 lines (119 loc) · 5.94 KB
/
llama.cpp.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 9c86407c..16160e65 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -651,16 +651,24 @@ struct server_context {
// Clear any sampling context
for (server_slot & slot : slots) {
- common_sampler_free(slot.smpl);
- slot.smpl = nullptr;
+ if (slot.smpl != nullptr) {
+ common_sampler_free(slot.smpl);
+ slot.smpl = nullptr;
+ }
- llama_free(slot.ctx_dft);
- slot.ctx_dft = nullptr;
+ if (slot.ctx_dft != nullptr) {
+ llama_free(slot.ctx_dft);
+ slot.ctx_dft = nullptr;
+ }
- common_speculative_free(slot.spec);
- slot.spec = nullptr;
+ if (slot.spec != nullptr) {
+ common_speculative_free(slot.spec);
+ slot.spec = nullptr;
+ }
- llama_batch_free(slot.batch_spec);
+ if (slot.ctx_dft) {
+ llama_batch_free(slot.batch_spec);
+ }
}
llama_batch_free(batch);
@@ -2403,7 +2411,7 @@ inline void signal_handler(int signal) {
shutdown_handler(signal);
}
-int main(int argc, char ** argv) {
+int main_server(int argc, char ** argv) {
// own arguments required by this example
common_params params;
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index 1665e9dc..56d24e3b 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -25,20 +25,20 @@
using json = nlohmann::ordered_json;
-#define SLT_INF(slot, fmt, ...) LOG_INF("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
-#define SLT_WRN(slot, fmt, ...) LOG_WRN("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
-#define SLT_ERR(slot, fmt, ...) LOG_ERR("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
-#define SLT_DBG(slot, fmt, ...) LOG_DBG("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
-
-#define SRV_INF(fmt, ...) LOG_INF("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-#define SRV_WRN(fmt, ...) LOG_WRN("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-#define SRV_ERR(fmt, ...) LOG_ERR("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-#define SRV_DBG(fmt, ...) LOG_DBG("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-
-#define QUE_INF(fmt, ...) LOG_INF("que %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-#define QUE_WRN(fmt, ...) LOG_WRN("que %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-#define QUE_ERR(fmt, ...) LOG_ERR("que %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-#define QUE_DBG(fmt, ...) LOG_DBG("que %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define SLT_INF(slot, fmt, ...) LOG_INF("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
+// #define SLT_WRN(slot, fmt, ...) LOG_WRN("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
+// #define SLT_ERR(slot, fmt, ...) LOG_ERR("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
+// #define SLT_DBG(slot, fmt, ...) LOG_DBG("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
+
+// #define SRV_INF(fmt, ...) LOG_INF("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define SRV_WRN(fmt, ...) LOG_WRN("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define SRV_ERR(fmt, ...) LOG_ERR("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define SRV_DBG(fmt, ...) LOG_DBG("srv %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+
+// #define QUE_INF(fmt, ...) LOG_INF("que %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define QUE_WRN(fmt, ...) LOG_WRN("que %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define QUE_ERR(fmt, ...) LOG_ERR("que %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define QUE_DBG(fmt, ...) LOG_DBG("que %12.*s: " fmt, 12, __func__, __VA_ARGS__)
// https://community.openai.com/t/openai-chat-list-of-error-codes-and-types/357791/11
enum error_type {
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index 65cb92c4..fa5bb0a8 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -318,7 +318,7 @@
extern "C" {
#endif
- GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
+ //GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
enum ggml_status {
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index 9022aa3a..ee8cb9cc 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -204,7 +204,7 @@ endif()
# ggml
if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS)
- message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
+ message(WARNING "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
endif()
add_library(ggml-base
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 849c1192..1346b7f1 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -55,7 +55,7 @@ static_assert(K_QUANTS_PER_ITERATION == 1 || K_QUANTS_PER_ITERATION == 2, "K_QUA
if (err_ != vk::Result::eSuccess) { \
fprintf(stderr, "ggml_vulkan: %s error %s at %s:%d\n", \
#err, to_string(err_).c_str(), __FILE__, __LINE__); \
- exit(1); \
+ std::terminate(); \
} \
} while (0)
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 1a9a7efa..48b6de63 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -174,7 +174,7 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
fprintf(stderr, "\n");
ggml_print_backtrace();
- abort();
+ raise(SIGSEGV);
}
//