ggml: update ggml submodule

Signed-off-by: Daniel Bevenius <[email protected]>
danbev · Jul 22, 2024 · aba1b4b · aba1b4b
1 parent 49afa58
commit aba1b4b
Show file tree

Hide file tree

Showing 2 changed files with 77 additions and 1 deletion.
diff --git a/fundamentals/ggml/ggml b/fundamentals/ggml/ggml
diff --git a/fundamentals/ggml/src/backend-schedular.c b/fundamentals/ggml/src/backend-schedular.c
@@ -0,0 +1,76 @@
+#include <stdio.h>
+
+#include "ggml.h"
+#include "ggml-alloc.h"
+#include "ggml-backend.h"
+
+void print_backend_info(ggml_backend_buffer_t buffer, struct ggml_context* ctx) {
+      printf("------- backend info -------\n");
+      printf("buffer name: %s\n", ggml_backend_buffer_name(buffer));
+      printf("buffer size: %ld\n", ggml_backend_buffer_get_size(buffer));
+      printf("buffer alignment: %ld\n", ggml_backend_buffer_get_alignment(buffer));
+      printf("buffer max size: %ld\n", ggml_backend_buffer_get_max_size(buffer));
+      printf("buffer is host: %d\n", ggml_backend_buffer_is_host(buffer));
+
+      ggml_backend_buffer_type_t buffer_type = ggml_backend_buffer_get_type(buffer);
+      printf("buffer type name: %s\n", ggml_backend_buft_name(buffer_type));
+      printf("buffer type alignment: %ld\n", ggml_backend_buft_get_alignment(buffer_type));
+      printf("buffer type max size: %ld\n", ggml_backend_buft_get_max_size(buffer_type));
+      printf("buffer type is host: %d\n", ggml_backend_buft_is_host(buffer_type));
+}
+
+int main(int argc, char **argv) {
+  printf("GGML backend examples\n");
+
+  struct ggml_init_params params = {
+    .mem_size   = 16*1024*1024,
+    .mem_buffer = NULL,
+    .no_alloc = true,
+  };
+  struct ggml_context* ctx = ggml_init(params);
+  struct ggml_tensor* x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 10);
+  ggml_set_name(x, "x");
+  printf("x backend type (0=CPU, 10=GPU): %d\n", x->backend);
+  if (x->buffer == NULL) {
+    printf("x backend buffer is NULL\n");
+  } else {
+    printf("x backend buffer: %s\n", ggml_backend_buffer_name(x->buffer));
+  }
+
+  // The following will call ggml_backend_registry_init
+  size_t count = ggml_backend_reg_get_count();
+  printf("Number of backends registered: %ld\n", count);
+  for (size_t i = 0; i < count; i++) {
+    printf("backend %ld name: %s\n", i, ggml_backend_reg_get_name(i));
+  }
+
+  ggml_backend_t cpu_backend = ggml_backend_reg_init_backend_from_str("CPU");
+  if (cpu_backend != NULL) {
+      ggml_backend_buffer_t buffer = ggml_backend_alloc_buffer(cpu_backend, 10*4);
+      print_backend_info(buffer, ctx);
+      ggml_backend_free(cpu_backend);
+  }
+
+  ggml_backend_t cuda_backend = ggml_backend_reg_init_backend_from_str("CUDA0");
+  if (cuda_backend != NULL) {
+      ggml_backend_buffer_t buffer = ggml_backend_alloc_buffer(cuda_backend, 10*4);
+      print_backend_info(buffer, ctx);
+      printf("x backend type (%d=CPU, %d=GPU): %d\n", GGML_BACKEND_TYPE_CPU, GGML_BACKEND_TYPE_GPU, x->backend);
+      if (x->buffer != NULL) {
+          printf("x backend buffer: %s\n", ggml_backend_buffer_name(x->buffer));
+      }
+
+      ggml_backend_buffer_t t = ggml_backend_alloc_ctx_tensors(ctx, cuda_backend);
+
+      static float data_array[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+      void* data = (void*) data_array;
+
+      // The following will copy the data from the host to the device.
+      ggml_backend_tensor_set(x, data, 0, 10);
+      printf("x backend type (%d=CPU, %d=GPU): %d\n", GGML_BACKEND_TYPE_CPU, GGML_BACKEND_TYPE_GPU, x->backend);
+      ggml_backend_free(cuda_backend);
+  }
+
+  ggml_free(ctx);
+  return 0;
+}
+8 −2		CMakeLists.txt
+13 −1		README.md
+1 −1		ci/run.sh
+31 −5		docs/gguf.md
+0 −1		examples/common.cpp
+0 −12		examples/gpt-2/CMakeLists.txt
+1 −1		examples/yolo/yolov3-tiny.cpp
+0 −1		include/ggml-metal.h
+4 −4		requirements.txt
+1 −1		scripts/sync-llama.last
+1 −1		scripts/sync-whisper.last
+7 −2		src/CMakeLists.txt
+7 −7		src/ggml-common.h
+48 −24		src/ggml-cuda.cu
+55 −25		src/ggml-cuda/common.cuh
+87 −0		src/ggml-cuda/conv-transpose-1d.cu
+5 −0		src/ggml-cuda/conv-transpose-1d.cuh
+0 −1		src/ggml-cuda/cpy.cu
+14 −54		src/ggml-cuda/fattn-common.cuh
+8 −0		src/ggml-cuda/mmq.cu
+199 −47		src/ggml-cuda/mmq.cuh
+16 −10		src/ggml-cuda/mmvq.cu
+2 −1		src/ggml-cuda/template-instances/generate_cu_files.py
+5 −0		src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu
+5 −0		src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu
+326 −391		src/ggml-cuda/vecdotq.cuh
+0 −1		src/ggml-metal.metal
+0 −1		src/ggml-quants.h
+48 −1,002		src/ggml-sycl.cpp
+3 −0		src/ggml-sycl/backend.hpp
+54 −4		src/ggml-sycl/common.hpp
+5 −2		src/ggml-sycl/convert.cpp
+19 −11		src/ggml-sycl/dequantize.hpp
+23 −22		src/ggml-sycl/dmmv.cpp
+5 −5		src/ggml-sycl/dpct/helper.hpp
+64 −61		src/ggml-sycl/mmvq.cpp
+374 −0		src/ggml-sycl/norm.cpp
+35 −0		src/ggml-sycl/norm.hpp
+2 −1		src/ggml-sycl/presets.hpp
+275 −0		src/ggml-sycl/rope.cpp
+22 −0		src/ggml-sycl/rope.hpp
+250 −0		src/ggml-sycl/softmax.cpp
+24 −0		src/ggml-sycl/softmax.hpp
+0 −21		src/ggml-sycl/vecdotq.hpp
+2 −2		src/ggml-vulkan.cpp
+1 −2		src/ggml.c
+8 −0		tests/CMakeLists.txt
+2 −2		tests/test-arange.cpp
+38 −1		tests/test-backend-ops.cpp
+699 −0		tests/test-conv-transpose-1d.cpp
+2 −4		tests/test-conv1d.cpp
+2 −4		tests/test-conv2d.cpp
+2 −4		tests/test-mul-mat.cpp
+2 −2		tests/test-timestep_embedding.cpp