llama.cpp.patch

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 9c86407c..16160e65 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -651,16 +651,24 @@ struct server_context {
 
         // Clear any sampling context
         for (server_slot & slot : slots) {
-            common_sampler_free(slot.smpl);
-            slot.smpl = nullptr;
+            if (slot.smpl != nullptr) {
+                common_sampler_free(slot.smpl);
+                slot.smpl = nullptr;
+            }
 
-            llama_free(slot.ctx_dft);
-            slot.ctx_dft = nullptr;
+            if (slot.ctx_dft != nullptr) {
+                llama_free(slot.ctx_dft);
+                slot.ctx_dft = nullptr;
+            }
 
-            common_speculative_free(slot.spec);
-            slot.spec = nullptr;
+            if (slot.spec != nullptr) {
+                common_speculative_free(slot.spec);
+                slot.spec = nullptr;
+            }
 
-            llama_batch_free(slot.batch_spec);
+            if (slot.ctx_dft) {
+                llama_batch_free(slot.batch_spec);
+            }
         }
 
         llama_batch_free(batch);
@@ -2403,7 +2411,7 @@ inline void signal_handler(int signal) {
     shutdown_handler(signal);
 }
 
-int main(int argc, char ** argv) {
+int main_server(int argc, char ** argv) {
     // own arguments required by this example
     common_params params;
 
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index 1665e9dc..56d24e3b 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -25,20 +25,20 @@
 
 using json = nlohmann::ordered_json;
 
-#define SLT_INF(slot, fmt, ...) LOG_INF("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
-#define SLT_WRN(slot, fmt, ...) LOG_WRN("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
-#define SLT_ERR(slot, fmt, ...) LOG_ERR("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
-#define SLT_DBG(slot, fmt, ...) LOG_DBG("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
-
-#define SRV_INF(fmt, ...) LOG_INF("srv  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-#define SRV_WRN(fmt, ...) LOG_WRN("srv  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-#define SRV_ERR(fmt, ...) LOG_ERR("srv  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-#define SRV_DBG(fmt, ...) LOG_DBG("srv  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-
-#define QUE_INF(fmt, ...) LOG_INF("que  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-#define QUE_WRN(fmt, ...) LOG_WRN("que  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-#define QUE_ERR(fmt, ...) LOG_ERR("que  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
-#define QUE_DBG(fmt, ...) LOG_DBG("que  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define SLT_INF(slot, fmt, ...) LOG_INF("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
+// #define SLT_WRN(slot, fmt, ...) LOG_WRN("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
+// #define SLT_ERR(slot, fmt, ...) LOG_ERR("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
+// #define SLT_DBG(slot, fmt, ...) LOG_DBG("slot %12.*s: id %2d | task %d | " fmt, 12, __func__, (slot).id, (slot).id_task, __VA_ARGS__)
+
+// #define SRV_INF(fmt, ...) LOG_INF("srv  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define SRV_WRN(fmt, ...) LOG_WRN("srv  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define SRV_ERR(fmt, ...) LOG_ERR("srv  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define SRV_DBG(fmt, ...) LOG_DBG("srv  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+
+// #define QUE_INF(fmt, ...) LOG_INF("que  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define QUE_WRN(fmt, ...) LOG_WRN("que  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define QUE_ERR(fmt, ...) LOG_ERR("que  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
+// #define QUE_DBG(fmt, ...) LOG_DBG("que  %12.*s: " fmt, 12, __func__, __VA_ARGS__)
 
 // https://community.openai.com/t/openai-chat-list-of-error-codes-and-types/357791/11
 enum error_type {
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index 65cb92c4..fa5bb0a8 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -318,7 +318,7 @@
 extern "C" {
 #endif
 
-    GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
+    //GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
     GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
 
     enum ggml_status {
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index 9022aa3a..ee8cb9cc 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -204,7 +204,7 @@ endif()
 # ggml
 
 if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS)
-    message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
+    message(WARNING "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
 endif()
 
 add_library(ggml-base
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 849c1192..1346b7f1 100644
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -55,7 +55,7 @@ static_assert(K_QUANTS_PER_ITERATION == 1 || K_QUANTS_PER_ITERATION == 2, "K_QUA
         if (err_ != vk::Result::eSuccess) {                         \
             fprintf(stderr, "ggml_vulkan: %s error %s at %s:%d\n",  \
                 #err, to_string(err_).c_str(), __FILE__, __LINE__); \
-            exit(1);                                                \
+            std::terminate();                                                \
         }                                                           \
     } while (0)
 
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 1a9a7efa..48b6de63 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -174,7 +174,7 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
     fprintf(stderr, "\n");
 
     ggml_print_backtrace();
-    abort();
+    raise(SIGSEGV);
 }
 
 //