From d315fb77bb4822c9b364a6751342622a17be6378 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Mon, 10 Feb 2025 17:22:20 -0700 Subject: [PATCH] add sqlite-lembed as a dependency https://github.com/asg017/sqlite-lembed latest HEAD is downloaded tarball is not provided in order to get submodules C11 is now required C++11 is required if building sqlite-lembed/llama.cpp CMake 3.16+ is now required added DEP_AI option; defaults to ON --- .github/workflows/check.yml | 4 +- .github/workflows/test.yml | 2 + CMakeLists.txt | 69 ++++++++++++++--- contrib/CI/macos.sh | 2 +- contrib/CI/ubuntu.sh | 1 + contrib/deps/install_deps.sh | 23 +++--- contrib/deps/sqlite-lembed.sh | 106 +++++++++++++++++++++++++++ docs/latex/sections/dependencies.tex | 2 +- src/CMakeLists.txt | 6 ++ src/dbutils.c | 8 ++ 10 files changed, 198 insertions(+), 25 deletions(-) create mode 100755 contrib/deps/sqlite-lembed.sh diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 15ff28da4..cbc91468a 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -129,8 +129,8 @@ jobs: - name: Uninstall CMake from package manager run: yum -y autoremove cmake3 - - name: Install CMake 3.5.0 - run: contrib/CI/cmake-download.sh 3.5.0 linux-x86_64 | tail -n 1 | sed 's/linux/Linux/g' >> "${GITHUB_PATH}" + - name: Install CMake 3.16.0 + run: contrib/CI/cmake-download.sh 3.16.0 linux-x86_64 | tail -n 1 | sed 's/linux/Linux/g' >> "${GITHUB_PATH}" - name: Check CMake Version run: cmake --version diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 31f961956..62c37aa61 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -115,6 +115,8 @@ jobs: matrix: os: [ "macos-14", "macos-15" ] runs-on: ${{ matrix.os }} + env: + LDFLAGS: "-framework Foundation -framework Metal -framework MetalKit" steps: - uses: actions/checkout@v4 diff --git a/CMakeLists.txt b/CMakeLists.txt index 05fce024d..7a77895c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,7 +60,7 @@ -cmake_minimum_required(VERSION 3.5.0) +cmake_minimum_required(VERSION 3.16.0) # Don't allow in-source build get_filename_component(CMAKE_SOURCE_DIR "${CMAKE_SOURCE_DIR}" REALPATH) @@ -133,7 +133,7 @@ endif() message(STATUS "GUFI ${VERSION_STRING}") # Require C99 -set(CMAKE_C_STANDARD 99) +set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD_REQUIRED ON) # Need _XOPEN_SRC @@ -155,10 +155,16 @@ if(CMAKE_BUILD_TYPE STREQUAL "Debug") endif() endif() -# Only enable C++ if support is found +if (CYGWIN) + set(DEP_AI OFF) +else() + set(DEP_AI ON CACHE BOOL "Whether or not to build and link with SQLite AI modules") +endif() + +# Only enable C++ if support is found or if llama.cpp is rquired include(CheckLanguage) check_language(CXX) -if (CMAKE_CXX_COMPILER) +if (CMAKE_CXX_COMPILER OR DEP_AI) enable_language(CXX) # GCC prior to 4.9.3 doesn't have full standard support @@ -295,17 +301,27 @@ set(CLIENT OFF CACHE BOOL "Whet set(DEP_PATCH_SQLITE3_OPEN OFF CACHE BOOL "Whether or not to patch SQLite3 open") set(DEP_USE_JEMALLOC ON CACHE BOOL "Whether or not to build and link with jemalloc") set(DEP_OPTIONS --threads "${DEP_BUILD_THREADS}") -if (CMAKE_CXX_COMPILER) - set(DEP_OPTIONS ${DEP_OPTIONS} --cxx) -endif() if (DEP_PATCH_SQLITE3_OPEN) set(DEP_OPTIONS ${DEP_OPTIONS} --patch-sqlite3-open) endif() if (DEP_USE_JEMALLOC) set(DEP_OPTIONS ${DEP_OPTIONS} --jemalloc) endif() +if (DEP_AI) + set(DEP_OPTIONS ${DEP_OPTIONS} --AI) +endif() add_custom_target(install_dependencies ALL COMMAND ${CMAKE_SOURCE_DIR}/contrib/deps/install_deps.sh ${DEP_OPTIONS} "${DEP_DOWNLOAD_PREFIX}" "${DEP_BUILD_PREFIX}" "${DEP_INSTALL_PREFIX}" "${CMAKE_VERSION}" "${CMAKE_SYSTEM_NAME}") +# llama.cpp uses OpenMP (not required, but left enabled) +if (DEP_AI) + if (UNIX AND NOT APPLE) + find_package(OpenMP REQUIRED COMPONENTS C) + message(STATUS "OpenMP CFLAGS: ${OpenMP_C_FLAGS}") + else() + set(OpenMP_C_FLAGS) + endif() +endif() + # Find zlib pkg_search_module(ZLIB zlib) if (ZLIB_FOUND) @@ -388,22 +404,52 @@ set(COMMON_INCLUDES ${XATTR_INCLUDEDIR} ${DEP_INSTALL_PREFIX}/sqlite3/include ${DEP_INSTALL_PREFIX}/sqlite3-pcre - ${DEP_INSTALL_PREFIX}/sqlite-vec/include -) + ) + +if (DEP_AI) + list(APPEND COMMON_INCLUDES + ${DEP_INSTALL_PREFIX}/sqlite-vec/include + ${DEP_INSTALL_PREFIX}/sqlite-lembed/include + ${DEP_INSTALL_PREFIX}/llama.cpp/include + ) +endif() include_directories(${COMMON_INCLUDES}) # set of libraries needed by the final binaries set(COMMON_LIBRARIES GUFI - ${DEP_INSTALL_PREFIX}/sqlite-vec/lib/libsqlite_vec0.a ${DEP_INSTALL_PREFIX}/sqlite3-pcre/libsqlite3-pcre.a ${DEP_INSTALL_PREFIX}/sqlite3/lib/libsqlite3.a ${PCRE2_LDFLAGS} Threads::Threads - m ) +if (DEP_AI) + list(APPEND COMMON_LIBRARIES + ${DEP_INSTALL_PREFIX}/sqlite-vec/lib/libsqlite_vec0.a + ${DEP_INSTALL_PREFIX}/sqlite-lembed/lib/libsqlite-lembed0.a + -L${DEP_INSTALL_PREFIX}/llama.cpp/lib llama -lstdc++ ${OpenMP_C_FLAGS} + -L${DEP_INSTALL_PREFIX}/sqlite3/lib/ sqlite3 # only needed to make sure definitions exist - not actually linked? + ) + + if (APPLE) + find_library(METAL Metal) # REQUIRED was not added until CMake 3.18 + if (NOT METAL) + message(FATAL_ERROR "Could not find Metal Framework") + endif() + find_library(ACCELERATE Accelerate) # REQUIRED was not added until CMake 3.18 + if (NOT ACCELERATE) + message(FATAL_ERROR "Could not find Accelerate Framework") + endif() + + list(APPEND COMMON_LIBRARIES + ${ACCELERATE} + ${METAL} + ) + endif() +endif() + if (ZLIB_FOUND) list(APPEND COMMON_LIBRARIES z @@ -417,6 +463,7 @@ if (DEP_USE_JEMALLOC) endif() list(APPEND COMMON_LIBRARIES + m dl ) diff --git a/contrib/CI/macos.sh b/contrib/CI/macos.sh index b57ce9ae9..ac7cc8e05 100755 --- a/contrib/CI/macos.sh +++ b/contrib/CI/macos.sh @@ -73,7 +73,7 @@ brew --prefix grep echo "$(brew --prefix)/opt/grep/libexec/gnubin" >> "${GITHUB_PATH}" # install required packages -brew install autoconf coreutils cmake gettext pcre2 +brew install autoconf coreutils cmake gettext llvm libomp pcre2 # get osxfuse from homebrew/cask brew tap homebrew/cask diff --git a/contrib/CI/ubuntu.sh b/contrib/CI/ubuntu.sh index 4bca0b6b1..5d99f959c 100755 --- a/contrib/CI/ubuntu.sh +++ b/contrib/CI/ubuntu.sh @@ -73,6 +73,7 @@ apt update apt -y install \ libattr1-dev \ libfuse-dev \ + libomp-dev \ libpcre2-dev \ zlib1g-dev diff --git a/contrib/deps/install_deps.sh b/contrib/deps/install_deps.sh index b93d9b5c2..ba230f7ed 100755 --- a/contrib/deps/install_deps.sh +++ b/contrib/deps/install_deps.sh @@ -80,9 +80,9 @@ else fi THREADS="1" -BUILD_CXX="false" PATCH_SQLITE3_OPEN="false" JEMALLOC="false" +AI="false" # https://stackoverflow.com/a/14203146 # Bruno Bronosky @@ -96,15 +96,15 @@ case $key in THREADS="$2" shift # past count ;; - --cxx) - BUILD_CXX="true" - ;; --patch-sqlite3-open) PATCH_SQLITE3_OPEN="true" ;; --jemalloc) JEMALLOC="true" ;; + --AI) + AI="true" + ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later ;; @@ -151,8 +151,13 @@ source "${SCRIPT_PATH}/sqlite3.sh" "${PATCH_SQLITE3_OPEN}" echo "Installing SQLite3 PCRE" source "${SCRIPT_PATH}/sqlite3-pcre.sh" -echo "Installing SQLite3 vec" -source "${SCRIPT_PATH}/sqlite-vec.sh" +if [[ "${AI}" == "true" ]]; then + echo "Installing SQLite3 vec" + source "${SCRIPT_PATH}/sqlite-vec.sh" + + echo "Installing SQLite3 lembed" + source "${SCRIPT_PATH}/sqlite-lembed.sh" +fi if [[ "${JEMALLOC}" == "true" ]]; then echo "Installing jemalloc" @@ -164,9 +169,7 @@ HIGHEST_VERSION=$( (echo "${CMAKE_VERSION}"; echo "${ACCEPTABLE_VERSION}") | sor if [[ "${CMAKE_SYSTEM_NAME}" != "CYGWIN" ]]; then if [[ "${CMAKE_VERSION}" == "${HIGHEST_VERSION}" ]]; then - if [[ "${BUILD_CXX}" == "true" ]]; then - echo "Installing GoogleTest" - source "${SCRIPT_PATH}/googletest.sh" - fi + echo "Installing GoogleTest" + source "${SCRIPT_PATH}/googletest.sh" fi fi diff --git a/contrib/deps/sqlite-lembed.sh b/contrib/deps/sqlite-lembed.sh new file mode 100755 index 000000000..a9c63b329 --- /dev/null +++ b/contrib/deps/sqlite-lembed.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash +# This file is part of GUFI, which is part of MarFS, which is released +# under the BSD license. +# +# +# Copyright (c) 2017, Los Alamos National Security (LANS), LLC +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation and/or +# other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# +# From Los Alamos National Security, LLC: +# LA-CC-15-039 +# +# Copyright (c) 2017, Los Alamos National Security, LLC All rights reserved. +# Copyright 2017. Los Alamos National Security, LLC. This software was produced +# under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National +# Laboratory (LANL), which is operated by Los Alamos National Security, LLC for +# the U.S. Department of Energy. The U.S. Government has rights to use, +# reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS +# ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR +# ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is +# modified to produce derivative works, such modified software should be +# clearly marked, so as not to confuse it with the version available from +# LANL. +# +# THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +# OF SUCH DAMAGE. + + + +# build and install sqlite-vec + +set -e + +# install sqlite3 first +"${SCRIPT_PATH}/sqlite3.sh" + +# Assume all paths exist + +lembed_name="sqlite-lembed" +lembed_prefix="${INSTALL_DIR}/${lembed_name}" +if [[ ! -d "${lembed_prefix}" ]]; then + lembed_build="${BUILD_DIR}/sqlite-lembed" + + # not providing tarball in order to get submodules + if [[ ! -d "${lembed_build}" ]]; then + git clone --recurse-submodule https://github.com/asg017/sqlite-lembed.git "${lembed_build}" + fi + + # build llama.cpp submodule + llama_install="${INSTALL_DIR}/llama.cpp" + if [[ ! -d "${llama_install}" ]]; then + cd "${lembed_build}/vendor/llama.cpp" + mkdir -p build + cd build + CC="${CC}" CXX="${CXX}" CXXFLAGS="-I${INSTALL_DIR}/sqlite3" "${CMAKE}" .. -DCMAKE_INSTALL_PREFIX="${llama_install}" -DCMAKE_INSTALL_LIBDIR=lib + make -j "${THREADS}" + make -j "${THREADS}" install + fi + + cd "${lembed_build}" + make sqlite-lembed.h + # copied from sqlite-vec Makefile + "${CC}" -c -g3 -O3 -DSQLITE_EXTRA_INIT=core_init -DSQLITE_CORE \ + -DSQLITE_ENABLE_STMT_SCANSTATUS -DSQLITE_ENABLE_BYTECODE_VTAB \ + -DSQLITE_ENABLE_EXPLAIN_COMMENTS \ + -I"${llama_install}/include" \ + -I"${INSTALL_DIR}/sqlite3/include" \ + sqlite-lembed.c -o libsqlite-lembed0.a + mkdir -p "${lembed_prefix}/include" "${lembed_prefix}/lib" + cp sqlite-lembed.h "${lembed_prefix}/include" + cp libsqlite-lembed0.a "${lembed_prefix}/lib" +fi diff --git a/docs/latex/sections/dependencies.tex b/docs/latex/sections/dependencies.tex index 52240b2ae..f815f930c 100644 --- a/docs/latex/sections/dependencies.tex +++ b/docs/latex/sections/dependencies.tex @@ -77,7 +77,7 @@ \subsection{System Tools} C++ Compiler & No & C++11 support - g++ 4.9.3, clang++-3.9, or newer \hfill \\ \hline - CMake & Yes & Version 3.5 or higher \\ + CMake & Yes & Version 3.16 or higher \\ \hline Make & Yes & \\ \hline diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e61a41600..33f5383fe 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -93,6 +93,12 @@ if (LOCALTIME_R) set_source_files_properties(addqueryfuncs.c PROPERTIES COMPILE_FLAGS -DLOCALTIME_R) endif() set_source_files_properties(bf.c PROPERTIES COMPILE_FLAGS -DGUFI_VERSION="${VERSION_STRING}") +if (DEP_AI) + set_property(SOURCE dbutils.c APPEND_STRING PROPERTY COMPILE_FLAGS -DHAVE_AI=1 ) + set_property(SOURCE dbutils.c APPEND_STRING PROPERTY COMPILE_FLAGS -I${DEP_INSTALL_PREFIX}/sqlite-vec/include) + set_property(SOURCE dbutils.c APPEND_STRING PROPERTY COMPILE_FLAGS -I${DEP_INSTALL_PREFIX}/sqlite-lembed/include) + set_property(SOURCE dbutils.c APPEND_STRING PROPERTY COMPILE_FLAGS -I${DEP_INSTALL_PREFIX}/llama.cpp/include) +endif() #If the GPFS library exists, build the gpfs scan tool if (GPFS_LIB AND GPFS_BUILD) diff --git a/src/dbutils.c b/src/dbutils.c index 47dadbe86..53aee49b9 100644 --- a/src/dbutils.c +++ b/src/dbutils.c @@ -71,7 +71,10 @@ OF SUCH DAMAGE. #include #include "pcre.h" +#ifdef HAVE_AI #include "sqlite-vec.h" +#include "sqlite-lembed.h" +#endif #include "BottomUp.h" #include "dbutils.h" @@ -281,8 +284,13 @@ sqlite3 *opendb(const char *name, int flags, const int setpragmas, const int loa /* load the sqlite3-pcre extension */ sqlite3_pcre2_init(db, NULL, NULL); + #ifdef HAVE_AI /* load the sqlite-vec extension */ sqlite3_vec_init(db, NULL, NULL); + + /* load the sqlite-lembed extension */ + sqlite3_lembed_init(db, NULL, NULL); + #endif } if (modifydb) {