From cdd1db2382d4a15a1b006d558c9ae1d30f6fd1a9 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Wed, 18 Dec 2024 10:49:44 -0700 Subject: [PATCH] SQLite3 virtual tables/table-valued functions This exposes an entire GUFI tree in a single view instead of as many multiple database files through the SQLite virtual table interface. Users can query gufi_vt_* virtual tables as though they have access to that table across the entire GUFI tree in one database i.e.: SELECT name, size FROM gufi_vt_pentries('index root') WHERE size > 1024; This is done by calling gufi_query through popen and reading all of the results back from the returned FILE *, which points to stdout. The first positional argument points to the starting directory, and is required. The remaining arguments are listed in the expected order and are optional: thread count, -T, and -S. To skip an argument to the left of an argument not being skipped, pass in NULL. After the final unskipped argument, the remaining arguments maybe skipped by not passing in any value. -T and -S may be used to modify tree traversal behavior. GUFI user defined functions (UDFs) that do not require gufi_query state (excluding histogram functions) may be called. UDFs requiring gufi_query state path(), epath(), fpath(), and rpath() can be accessed from the virtual table by using columns with the same names. gufi_query now has the -u flag that causes prints to prepend the row with a 1 int (host size and endianess) column count folowed by columns prepended with a 1 octet type and 1 size_t length (host size and endianess). Column separators and newlines are not printed. --- .github/workflows/codecov.yml | 2 +- contrib/treediff.c | 14 +- include/addqueryfuncs.h | 124 ++++ include/bf.h | 18 + include/dbutils.h | 26 +- .../{validate_inputs.h => handle_sql.h} | 6 +- include/gufi_query/query.h | 2 +- include/histogram.h | 36 +- include/print.h | 1 + src/CMakeLists.txt | 26 +- src/addqueryfuncs.c | 538 ++++++++++++++ src/bf.c | 10 + src/dbutils.c | 660 +++--------------- src/gufi_query/aggregate.c | 14 +- .../{validate_inputs.c => handle_sql.c} | 71 +- src/gufi_query/main.c | 6 +- src/gufi_query/process_queries.c | 6 +- src/gufi_query/processdir.c | 6 +- src/gufi_query/query.c | 16 +- src/gufi_sqlite3.c | 19 +- src/gufi_vt.c | 628 +++++++++++++++++ src/histogram.c | 37 +- src/print.c | 76 +- test/regression/CMakeLists.txt | 1 + test/regression/gufi_query.expected | 14 + test/regression/gufi_query.sh.in | 6 + test/regression/gufi_sqlite3.expected | 17 + test/regression/gufi_sqlite3.sh.in | 3 + test/regression/gufi_vt.expected | 236 +++++++ test/regression/gufi_vt.sh.in | 156 +++++ test/regression/setup.sh.in | 2 + test/unit/googletest/CMakeLists.txt | 1 + test/unit/googletest/PoolArgs.cpp | 1 + test/unit/googletest/bf.cpp | 8 +- test/unit/googletest/dbutils.cpp.in | 78 +++ test/unit/googletest/handle_sql.cpp | 148 ++++ test/unit/googletest/histogram.cpp | 1 + test/unit/googletest/print.cpp | 99 ++- 38 files changed, 2450 insertions(+), 663 deletions(-) create mode 100644 include/addqueryfuncs.h rename include/gufi_query/{validate_inputs.h => handle_sql.h} (96%) create mode 100644 src/addqueryfuncs.c rename src/gufi_query/{validate_inputs.c => handle_sql.c} (73%) create mode 100644 src/gufi_vt.c create mode 100644 test/regression/gufi_vt.expected create mode 100755 test/regression/gufi_vt.sh.in create mode 100644 test/unit/googletest/handle_sql.cpp diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index d0cb5edbb..79e354321 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -102,7 +102,7 @@ jobs: run: ctest || true - name: Delete files not reported for coverage - run: find -name "*.gc*" -a \( -name "gendir.*" -o -name "make_testindex.*" -o -name "bfwreaddirplus2db.*" -o -name "bffuse.*" -o -name "bfresultfuse.*" -o -name "dfw.*" -o -name "tsmepoch2time.*" -o -name "tsmtime2epoch.*" -o -path "*/test/*" \) -delete + run: find -name "*.gc*" -a \( -name "gendir.*" -o -name "make_testindex.*" -o -name "bfwreaddirplus2db.*" -o -name "bffuse.*" -o -name "bfresultfuse.*" -o -name "dfw.*" -o -name "tsmepoch2time.*" -o -name "tsmtime2epoch.*" -o -path "*/test/*" -o \( -path "*/gufi_vt.dir/*" -a -not -name "gufi_vt.*" \) \) -delete - name: Generate Python Coverage Report run: | diff --git a/contrib/treediff.c b/contrib/treediff.c index dc7acbb83..d31a82f35 100644 --- a/contrib/treediff.c +++ b/contrib/treediff.c @@ -255,12 +255,14 @@ static int processdir(QPTPool_t *ctx, const size_t id, void *data, void *args) { /* ********************************************** */ const size_t next_level = cp->level + 1; - struct PrintArgs print; - print.output_buffer = &pa->obufs.buffers[id]; - print.delim = '/'; - print.mutex = pa->obufs.mutex; - print.outfile = stdout; - print.rows = 0; + struct PrintArgs print = { + .output_buffer = &pa->obufs.buffers[id], + .delim = '/', + .mutex = pa->obufs.mutex, + .outfile = stdout, + .rows = 0, + .types = NULL, + }; char *buf[] = {NULL, NULL}; /* passed to print_parallel */ diff --git a/include/addqueryfuncs.h b/include/addqueryfuncs.h new file mode 100644 index 000000000..0d0b10356 --- /dev/null +++ b/include/addqueryfuncs.h @@ -0,0 +1,124 @@ +/* +This file is part of GUFI, which is part of MarFS, which is released +under the BSD license. + + +Copyright (c) 2017, Los Alamos National Security (LANS), LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +From Los Alamos National Security, LLC: +LA-CC-15-039 + +Copyright (c) 2017, Los Alamos National Security, LLC All rights reserved. +Copyright 2017. Los Alamos National Security, LLC. This software was produced +under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National +Laboratory (LANL), which is operated by Los Alamos National Security, LLC for +the U.S. Department of Energy. The U.S. Government has rights to use, +reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS +ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR +ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is +modified to produce derivative works, such modified software should be +clearly marked, so as not to confuse it with the version available from +LANL. + +THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +OF SUCH DAMAGE. +*/ + + + +#ifndef ADDQUERYFUNCS_H +#define ADDQUERYFUNCS_H + +#include + +#include "bf.h" +#include "histogram.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* list of functions to add to a SQLite3 db handle that do not have user data/context */ + +void uidtouser(sqlite3_context *context, int argc, sqlite3_value **argv); +void gidtogroup(sqlite3_context *context, int argc, sqlite3_value **argv); +void modetotxt(sqlite3_context *context, int argc, sqlite3_value **argv); +void sqlite3_strftime(sqlite3_context *context, int argc, sqlite3_value **argv); +void blocksize(sqlite3_context *context, int argc, sqlite3_value **argv); +void human_readable_size(sqlite3_context *context, int argc, sqlite3_value **argv); +void sqlite_basename(sqlite3_context *context, int argc, sqlite3_value **argv); +void stdev_step(sqlite3_context *context, int argc, sqlite3_value **argv); +void stdevs_final(sqlite3_context *context); +void stdevp_final(sqlite3_context *context); +void median_step(sqlite3_context *context, int argc, sqlite3_value **argv); +void median_final(sqlite3_context *context); + +static inline int addqueryfuncs(sqlite3 *db) { + return !( + (sqlite3_create_function(db, "uidtouser", 1, SQLITE_UTF8, + NULL, &uidtouser, NULL, NULL) == SQLITE_OK) && + (sqlite3_create_function(db, "gidtogroup", 1, SQLITE_UTF8, + NULL, &gidtogroup, NULL, NULL) == SQLITE_OK) && + (sqlite3_create_function(db, "modetotxt", 1, SQLITE_UTF8, + NULL, &modetotxt, NULL, NULL) == SQLITE_OK) && + (sqlite3_create_function(db, "strftime", 2, SQLITE_UTF8, + NULL, &sqlite3_strftime, NULL, NULL) == SQLITE_OK) && + (sqlite3_create_function(db, "blocksize", 2, SQLITE_UTF8, + NULL, &blocksize, NULL, NULL) == SQLITE_OK) && + (sqlite3_create_function(db, "human_readable_size", 1, SQLITE_UTF8, + NULL, &human_readable_size, NULL, NULL) == SQLITE_OK) && + (sqlite3_create_function(db, "basename", 1, SQLITE_UTF8, + NULL, &sqlite_basename, NULL, NULL) == SQLITE_OK) && + (sqlite3_create_function(db, "stdevs", 1, SQLITE_UTF8, + NULL, NULL, stdev_step, stdevs_final) == SQLITE_OK) && + (sqlite3_create_function(db, "stdevp", 1, SQLITE_UTF8, + NULL, NULL, stdev_step, stdevp_final) == SQLITE_OK) && + (sqlite3_create_function(db, "median", 1, SQLITE_UTF8, + NULL, NULL, median_step, median_final) == SQLITE_OK) && + addhistfuncs(db) + ); +} + +int addqueryfuncs_with_context(sqlite3 *db, struct work *work); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/bf.h b/include/bf.h index 8ba0272f9..594d9362e 100644 --- a/include/bf.h +++ b/include/bf.h @@ -178,6 +178,24 @@ struct input { refstr_t fin; } sql; + /* + * if outputting to STDOUT or OUTFILE, get list of + * types of final output to prefix columns with + * + * set up by gufi_query but cleaned up by input_fini + */ + struct { + int prefix; + + /* set if not aggregating */ + int *tsum; + int *sum; + int *ent; + + /* set if aggregating */ + int *agg; + } types; + int printdir; int printing; int printheader; diff --git a/include/dbutils.h b/include/dbutils.h index a6d925486..34ca0ce3b 100644 --- a/include/dbutils.h +++ b/include/dbutils.h @@ -71,6 +71,7 @@ OF SUCH DAMAGE. #include #include "SinglyLinkedList.h" +#include "addqueryfuncs.h" #include "template_db.h" #include "utils.h" #include "xattrs.h" @@ -91,18 +92,24 @@ extern const char *SQLITE_MEMORY; #define DROP_TABLE(name) "DROP TABLE IF EXISTS " #name ";" #define DROP_VIEW(name) "DROP VIEW IF EXISTS " #name ";" -#define READDIRPLUS "readdirplus" +#define READDIRPLUS "readdirplus" +#define READDIRPLUS_SCHEMA(name) \ + "CREATE TABLE " name "(path TEXT, type TEXT, inode TEXT PRIMARY KEY, pinode TEXT, suspect INT64);" extern const char READDIRPLUS_CREATE[]; extern const char READDIRPLUS_INSERT[]; /* contains all file and link metadata for the current directory */ /* prefer pentries over entries */ #define ENTRIES "entries" +#define ENTRIES_SCHEMA(name, extra_cols) \ + "CREATE TABLE " name "(" extra_cols "name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, crtime INT64, ossint1 INT64, ossint2 INT64, ossint3 INT64, ossint4 INT64, osstext1 TEXT, osstext2 TEXT);" extern const char ENTRIES_CREATE[]; extern const char ENTRIES_INSERT[]; /* directory metadata + aggregate data */ #define SUMMARY "summary" +#define SUMMARY_SCHEMA(name, extra_cols) \ + "CREATE TABLE " name "(" extra_cols "name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, rectype INT64, pinode TEXT, isroot INT64, rollupscore INT64);" extern const char SUMMARY_CREATE[]; /* view of summary table with rollups */ @@ -111,6 +118,8 @@ extern const char VRSUMMARY_CREATE[]; /* pentries pulled from children */ #define PENTRIES_ROLLUP "pentries_rollup" +#define PENTRIES_ROLLUP_SCHEMA(name) \ + "CREATE TABLE " name "(name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, crtime INT64, ossint1 INT64, ossint2 INT64, ossint3 INT64, ossint4 INT64, osstext1 TEXT, osstext2 TEXT, pinode TEXT, ppinode TEXT);" extern const char PENTRIES_ROLLUP_CREATE[]; extern const char PENTRIES_ROLLUP_INSERT[]; @@ -118,15 +127,19 @@ extern const char PENTRIES_ROLLUP_INSERT[]; #define PENTRIES "pentries" extern const char PENTRIES_CREATE[]; +/* vrentries is not created because rolled up entries tables are not correct */ + /* view of pentries view with rollups */ #define VRPENTRIES "vrpentries" extern const char VRPENTRIES_CREATE[]; /* aggregate data of tree starting at current directory */ #define TREESUMMARY "treesummary" -#define TREESUMMARY_CREATE \ - DROP_TABLE(TREESUMMARY) \ - "CREATE TABLE " TREESUMMARY "(inode TEXT, pinode TEXT, totsubdirs INT64, maxsubdirfiles INT64, maxsubdirlinks INT64, maxsubdirsize INT64, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, totextdbs INT64, rectype INT64, uid INT64, gid INT64);" +#define TREESUMMARY_SCHEMA(name, extra_cols) \ + "CREATE TABLE " name "(" extra_cols "inode TEXT, pinode TEXT, totsubdirs INT64, maxsubdirfiles INT64, maxsubdirlinks INT64, maxsubdirsize INT64, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, totextdbs INT64, rectype INT64, uid INT64, gid INT64);" +#define TREESUMMARY_CREATE \ + DROP_TABLE(TREESUMMARY) \ + TREESUMMARY_SCHEMA(TREESUMMARY, "") extern const char TREESUMMARY_EXISTS[]; @@ -202,9 +215,6 @@ int insertsumdb(sqlite3 *sdb, const char *path, struct work *pwork, struct entry int inserttreesumdb(const char *name, sqlite3 *sdb, struct sum *su, int rectype, int uid, int gid); -int addqueryfuncs(sqlite3 *db); -int addqueryfuncs_with_context(sqlite3 *db, struct work *work); - /* xattr db list item */ struct xattr_db { long long int pinode; @@ -258,6 +268,8 @@ enum CheckRollupScore { int bottomup_collect_treesummary(sqlite3 *db, const char *dirname, sll_t *subdirs, const enum CheckRollupScore check_rollupscore); +int *get_col_types(sqlite3 *db, const refstr_t *sql, int *cols); + #ifdef __cplusplus } #endif diff --git a/include/gufi_query/validate_inputs.h b/include/gufi_query/handle_sql.h similarity index 96% rename from include/gufi_query/validate_inputs.h rename to include/gufi_query/handle_sql.h index e5781aecc..f29e2cc3d 100644 --- a/include/gufi_query/validate_inputs.h +++ b/include/gufi_query/handle_sql.h @@ -62,11 +62,11 @@ OF SUCH DAMAGE. -#ifndef GUFI_QUERY_VALIDATE_INPUTS_H -#define GUFI_QUERY_VALIDATE_INPUTS_H +#ifndef GUFI_QUERY_HANDLE_SQL_H +#define GUFI_QUERY_HANDLE_SQL_H #include "bf.h" -int validate_inputs(struct input *in); +int handle_sql(struct input *in); #endif diff --git a/include/gufi_query/query.h b/include/gufi_query/query.h index cd8afe743..4af2c9df5 100644 --- a/include/gufi_query/query.h +++ b/include/gufi_query/query.h @@ -71,7 +71,7 @@ OF SUCH DAMAGE. void querydb(struct work *work, const char *dbname, const size_t dbname_len, - sqlite3 *db, const char *query, + sqlite3 *db, const char *query, const int *types, PoolArgs_t *pa, int id, int (*callback)(void *, int, char **, char**), int *rc); diff --git a/include/histogram.h b/include/histogram.h index d2cfcc10b..7e6176cc3 100644 --- a/include/histogram.h +++ b/include/histogram.h @@ -68,19 +68,19 @@ OF SUCH DAMAGE. #include #include -#include "dbutils.h" +#include #ifdef __cplusplus extern "C" { #endif -/* use this to add histogram functions to a sqlite database handle */ -int addhistfuncs(sqlite3 *db); - /* * Public API for parsing returned strings. * * These structs are intended for external use. + * + * Ignore the *_step and *_final functions. They are sqlite3 UDFs that + * need to be exposed here to get linking to work for some reason. */ /* ********************************************* */ @@ -108,6 +108,8 @@ typedef struct log2_hist { size_t ge; /* len >= 2^count */ } log2_hist_t; +void log2_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv); +void log2_hist_final(sqlite3_context *context); log2_hist_t *log2_hist_parse(const char *str); void log2_hist_free(log2_hist_t *hist); /* ********************************************* */ @@ -126,6 +128,8 @@ typedef struct mode_hist { size_t buckets[512]; } mode_hist_t; +void mode_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv); +void mode_hist_final(sqlite3_context *context); mode_hist_t *mode_hist_parse(const char *str); void mode_hist_free(mode_hist_t *hist); /* ********************************************* */ @@ -165,6 +169,8 @@ typedef struct time_hist { time_t ref; } time_hist_t; +void time_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv); +void time_hist_final(sqlite3_context *context); time_hist_t *time_hist_parse(const char *str); void time_hist_free(time_hist_t *hist); /* ********************************************* */ @@ -192,6 +198,9 @@ typedef struct category_hist { size_t count; } category_hist_t; +void category_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv); +void category_hist_combine_step(sqlite3_context *context, int argc, sqlite3_value **argv); +void category_hist_final(sqlite3_context *context); category_hist_t *category_hist_parse(const char *str); category_hist_t *category_hist_combine(category_hist_t *lhs, category_hist_t *rhs); void category_hist_free(category_hist_t *hist); @@ -211,10 +220,29 @@ typedef struct mode_count { size_t count; } mode_count_t; +void mode_count_final(sqlite3_context *context); mode_count_t *mode_count_parse(const char *str); void mode_count_free(mode_count_t *mc); /* ********************************************* */ +/* use this to add histogram functions to a sqlite database handle */ +static inline int addhistfuncs(sqlite3 *db) { + return ( + (sqlite3_create_function(db, "log2_hist", 2, SQLITE_UTF8, + NULL, NULL, log2_hist_step, log2_hist_final) == SQLITE_OK) && + (sqlite3_create_function(db, "mode_hist", 1, SQLITE_UTF8, + NULL, NULL, mode_hist_step, mode_hist_final) == SQLITE_OK) && + (sqlite3_create_function(db, "time_hist", 2, SQLITE_UTF8, + NULL, NULL, time_hist_step, time_hist_final) == SQLITE_OK) && + (sqlite3_create_function(db, "category_hist", 2, SQLITE_UTF8, + NULL, NULL, category_hist_step, category_hist_final) == SQLITE_OK) && + (sqlite3_create_function(db, "category_hist_combine", 1, SQLITE_UTF8, + NULL, NULL, category_hist_combine_step, category_hist_final) == SQLITE_OK) && + (sqlite3_create_function(db, "mode_count", 1, SQLITE_UTF8, + NULL, NULL, category_hist_step, mode_count_final) == SQLITE_OK) + ); +} + #ifdef __cplusplus } #endif diff --git a/include/print.h b/include/print.h index 3542cfa83..785b7e582 100644 --- a/include/print.h +++ b/include/print.h @@ -82,6 +82,7 @@ typedef struct PrintArgs { pthread_mutex_t *mutex; /* mutex for printing to stdout */ FILE *outfile; size_t rows; /* number of rows returned by the query */ + const int *types; /* if set, prefix output with 1 char type and 1 length */ /* size_t printed; /\* number of records printed by the callback *\/ */ } PrintArgs_t; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 22e8ec0e7..e3d847b3b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -106,6 +106,7 @@ set(GUFI_SOURCES OutputBuffers.c QueuePerThreadPool.c SinglyLinkedList.c + addqueryfuncs.c bf.c compress.c dbutils.c @@ -199,10 +200,10 @@ add_library(gufi_query_lib OBJECT gufi_query/aggregate.c gufi_query/external.c gufi_query/gqw.c + gufi_query/handle_sql.c gufi_query/process_queries.c gufi_query/processdir.c gufi_query/query.c - gufi_query/validate_inputs.c ) add_dependencies(gufi_query_lib GUFI) @@ -212,6 +213,29 @@ build_and_install_one(${BIN} TRUE gufi_query $ ) +# build custom SQLite virtual tables +add_library(gufi_vt MODULE + gufi_vt.c + + # have to recompile with -fPIC + SinglyLinkedList.c + addqueryfuncs.c + histogram.c + trie.c + utils.c +) +set_target_properties(gufi_vt PROPERTIES PREFIX "") +if(APPLE) + set(EXT "dylib") +elseif (CYGWIN) + set(EXT "dll.a") +else () + set(EXT "so") +endif() +target_link_libraries(gufi_vt "${DEP_INSTALL_PREFIX}/sqlite3/lib/libsqlite3.${EXT}") +add_dependencies(gufi_vt install_dependencies) +install(TARGETS gufi_vt DESTINATION ${LIB} COMPONENT Server) + # build binaries that do not need to link with GUFI set(MISC_SOURCES tsmtime2epoch.c tsmepoch2time.c) build_and_install(${BIN} FALSE ${MISC_SOURCES}) diff --git a/src/addqueryfuncs.c b/src/addqueryfuncs.c new file mode 100644 index 000000000..a259f4adc --- /dev/null +++ b/src/addqueryfuncs.c @@ -0,0 +1,538 @@ +/* +This file is part of GUFI, which is part of MarFS, which is released +under the BSD license. + + +Copyright (c) 2017, Los Alamos National Security (LANS), LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +From Los Alamos National Security, LLC: +LA-CC-15-039 + +Copyright (c) 2017, Los Alamos National Security, LLC All rights reserved. +Copyright 2017. Los Alamos National Security, LLC. This software was produced +under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National +Laboratory (LANL), which is operated by Los Alamos National Security, LLC for +the U.S. Department of Energy. The U.S. Government has rights to use, +reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS +ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR +ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is +modified to produce derivative works, such modified software should be +clearly marked, so as not to confuse it with the version available from +LANL. + +THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +OF SUCH DAMAGE. +*/ + + + +#include +#include +#include +#include +#include +#include + +#include "addqueryfuncs.h" +#include "utils.h" + +void uidtouser(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + (void) argc; + + const char *text = (char *) sqlite3_value_text(argv[0]); + + const int fuid = atoi(text); + struct passwd *fmypasswd = getpwuid(fuid); + const char *show = fmypasswd?fmypasswd->pw_name:text; + + sqlite3_result_text(context, show, -1, SQLITE_TRANSIENT); +} + +void gidtogroup(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + (void) argc; + + const char *text = (char *) sqlite3_value_text(argv[0]); + + const int fgid = atoi(text); + struct group *fmygroup = getgrgid(fgid); + const char *show = fmygroup?fmygroup->gr_name:text; + + sqlite3_result_text(context, show, -1, SQLITE_TRANSIENT); +} + +void modetotxt(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + (void) argc; + int fmode; + char tmode[64]; + fmode = sqlite3_value_int(argv[0]); + modetostr(tmode, sizeof(tmode), fmode); + sqlite3_result_text(context, tmode, -1, SQLITE_TRANSIENT); +} + +void sqlite3_strftime(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + (void) argc; + + const char *fmt = (char *) sqlite3_value_text(argv[0]); /* format */ + const time_t t = sqlite3_value_int64(argv[1]); /* timestamp */ + + char buf[MAXPATH]; + #ifdef LOCALTIME_R + struct tm tm; + strftime(buf, sizeof(buf), fmt, localtime_r(&t, &tm)); + #else + strftime(buf, sizeof(buf), fmt, localtime(&t)); + #endif + sqlite3_result_text(context, buf, -1, SQLITE_TRANSIENT); +} + +/* uint64_t goes up to E */ +static const char SIZE[] = {'K', 'M', 'G', 'T', 'P', 'E'}; + +/* + * Returns the number of blocks required to store a given size + * Unfilled blocks count as one full block (round up) + * + * This function attempts to replicate ls output and is mainly + * intended for gufi_ls, so use with caution. + * + * blocksize(1024, "K") -> 1K + * blocksize(1024, "1K") -> 1 + * blocksize(1024, "KB") -> 2KB + * blocksize(1024, "1KB") -> 2 + * blocksize(1024, "KiB") -> 1K + * blocksize(1024, "1KiB") -> 1 + */ +void blocksize(sqlite3_context *context, int argc, sqlite3_value **argv) { + (void) argc; + + const char *size_s = (const char *) sqlite3_value_text(argv[0]); + const char *unit_s = (const char *) sqlite3_value_text(argv[1]); + const size_t unit_s_len = strlen(unit_s); + + uint64_t size = 0; + if (sscanf(size_s, "%" PRIu64, &size) != 1) { + sqlite3_result_error(context, "Bad blocksize size", -1); + return; + } + + /* whether or not a coefficent was found - affects printing */ + uint64_t unit_size = 0; + const int coefficient_found = sscanf(unit_s, "%" PRIu64, &unit_size); + if (coefficient_found == 1) { + if (unit_size == 0) { + sqlite3_result_error(context, "Bad blocksize unit", -1); + return; + } + } + else { + /* if there were no numbers, default to 1 */ + unit_size = 1; + } + + /* + * get block size suffix i.e. 1KB -> KB + */ + const char *unit = unit_s; + { + /* + * find first non-numerical character + * decimal points are not accepted, and will break this loop + */ + size_t offset = 0; + while ((offset < unit_s_len) && + (('0' <= unit[offset]) && (unit[offset] <= '9'))) { + offset++; + } + + unit += offset; + } + + const size_t len = strlen(unit); + + /* suffix is too long */ + if (len > 3) { + sqlite3_result_error(context, "Bad blocksize unit", -1); + return; + } + + /* suffix is optional */ + if (len) { + if ((len > 1) && (unit[len - 1] != 'B')) { + sqlite3_result_error(context, "Bad blocksize unit", -1); + return; + } + + uint64_t multiplier = 1024; + if (len == 2) { + multiplier = 1000; + } + else if (len == 3) { + if (unit[1] != 'i') { + sqlite3_result_error(context, "Bad blocksize unit", -1); + return; + } + } + + int found = 0; + for(size_t i = 0; i < sizeof(SIZE); i++) { + unit_size *= multiplier; + if (unit[0] == SIZE[i]) { + found = 1; + break; + } + } + + if (!found) { + sqlite3_result_error(context, "Bad blocksize unit", -1); + return; + } + } + + const uint64_t blocks = (size / unit_size) + (!!(size % unit_size)); + + char buf[MAXPATH]; + size_t buf_len = snprintf(buf, sizeof(buf), "%" PRIu64, blocks); + + /* add unit to block count */ + if (!coefficient_found) { + buf_len += snprintf(buf + buf_len, sizeof(buf) - buf_len, "%s", unit); + } + + sqlite3_result_text(context, buf, buf_len, SQLITE_TRANSIENT); +} + +/* Returns a string containg the size with as large of a unit as reasonable */ +void human_readable_size(sqlite3_context *context, int argc, sqlite3_value **argv) { + (void) argc; + + char buf[MAXPATH]; + + const char *size_s = (const char *) sqlite3_value_text(argv[0]); + double size = 0; + + if (sscanf(size_s, "%lf", &size) != 1) { + sqlite3_result_error(context, "Bad size", -1); + return; + } + + size_t unit_index = 0; + while (size >= 1024) { + size /= 1024; + unit_index++; + } + + if (unit_index == 0) { + snprintf(buf, sizeof(buf), "%.1f", size); + } + else { + snprintf(buf, sizeof(buf), "%.1f%c", size, SIZE[unit_index - 1]); + } + + sqlite3_result_text(context, buf, -1, SQLITE_TRANSIENT); +} + +void sqlite_basename(sqlite3_context *context, int argc, sqlite3_value **argv) { + (void) argc; + + char *path = (char *) sqlite3_value_text(argv[0]); + + if (!path) { + sqlite3_result_text(context, "", 0, SQLITE_TRANSIENT); + return; + } + + const size_t path_len = strlen(path); + + /* remove trailing slashes */ + const size_t trimmed_len = trailing_non_match_index(path, path_len, "/", 1); + if (!trimmed_len) { + sqlite3_result_text(context, "/", 1, SQLITE_STATIC); + return; + } + + /* basename(work->name) will be the same as the first part of the input path, so remove it */ + const size_t offset = trailing_match_index(path, trimmed_len, "/", 1); + + const size_t bn_len = trimmed_len - offset; + char *bn = path + offset; + + sqlite3_result_text(context, bn, bn_len, SQLITE_STATIC); +} + +/* + * One pass standard deviation (sample) + * https://mathcentral.uregina.ca/QQ/database/QQ.09.06/h/murtaza1.html + */ +typedef struct { + double sum; + double sum_sq; + uint64_t count; +} stdev_t; + +void stdev_step(sqlite3_context *context, int argc, sqlite3_value **argv) { + (void) argc; + stdev_t *data = (stdev_t *) sqlite3_aggregate_context(context, sizeof(*data)); + const double value = sqlite3_value_double(argv[0]); + + data->sum += value; + data->sum_sq += value * value; + data->count++; +} + +void stdevs_final(sqlite3_context *context) { + stdev_t *data = (stdev_t *) sqlite3_aggregate_context(context, sizeof(*data)); + + if (data->count < 2) { + sqlite3_result_null(context); + } + else { + const double variance = ((data->count * data->sum_sq) - (data->sum * data->sum)) / (data->count * (data->count - 1)); + sqlite3_result_double(context, sqrt(variance)); + } +} + +void stdevp_final(sqlite3_context *context) { + stdev_t *data = (stdev_t *) sqlite3_aggregate_context(context, sizeof(*data)); + + if (data->count < 2) { + sqlite3_result_null(context); + } + else { + const double variance = ((data->count * data->sum_sq) - (data->sum * data->sum)) / (data->count * data->count); + sqlite3_result_double(context, sqrt(variance)); + } +} + +void median_step(sqlite3_context *context, int argc, sqlite3_value **argv) { + (void) argc; + sll_t *data = (sll_t *) sqlite3_aggregate_context(context, sizeof(*data)); + if (sll_get_size(data) == 0) { + sll_init(data); + } + + const double value = sqlite3_value_double(argv[0]); + sll_push(data, (void *) (uintptr_t) value); +} + +static int cmp_double(const void *lhs, const void *rhs) { + return * (double *) lhs - * (double *) rhs; +} + +void median_final(sqlite3_context *context) { + sll_t *data = (sll_t *) sqlite3_aggregate_context(context, sizeof(*data)); + + const uint64_t count = sll_get_size(data); + double median = 0; + + /* skip some mallocs */ + if (count == 0) { + sqlite3_result_null(context); + goto cleanup; + } + else if (count == 1) { + median = (double) (uintptr_t) sll_node_data(sll_head_node(data)); + goto ret_median; + } + else if (count == 2) { + median = ((double) (uintptr_t) sll_node_data(sll_head_node(data)) + + (double) (uintptr_t) sll_node_data(sll_tail_node(data))) / 2.0; + goto ret_median; + } + + const uint64_t half = count / 2; + + double *arr = malloc(count * sizeof(double)); + size_t i = 0; + sll_loop(data, node) { + arr[i++] = (double) (uintptr_t) sll_node_data(node); + } + + qsort(arr, count, sizeof(double), cmp_double); + + median = arr[half]; + if (!(count & 1)) { + median += arr[half - 1]; + median /= 2.0; + } + free(arr); + + /* median = quickselect(data, count, half); */ + /* if (!(count & 1)) { */ + /* median += quickselect(data, count, half - 1); */ + /* median /= 2.0; */ + /* } */ + + ret_median: + sqlite3_result_double(context, median); + + cleanup: + sll_destroy(data, NULL); +} + +/* return the directory you are currently in */ +void path(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + (void) argc; (void) argv; + struct work *work = (struct work *) sqlite3_user_data(context); + size_t user_dirname_len = work->orig_root.len + work->name_len - work->root_parent.len - work->root_basename_len; + char *user_dirname = malloc(user_dirname_len + 1); + + SNFORMAT_S(user_dirname, user_dirname_len + 1, 2, + work->orig_root.data, work->orig_root.len, + work->name + work->root_parent.len + work->root_basename_len, work->name_len - work->root_parent.len - work->root_basename_len); + + sqlite3_result_text(context, user_dirname, user_dirname_len, free); +} + +/* return the basename of the directory you are currently in */ +void epath(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + (void) argc; (void) argv; + struct work *work = (struct work *) sqlite3_user_data(context); + + sqlite3_result_text(context, work->name + work->name_len - work->basename_len, + work->basename_len, SQLITE_STATIC); +} + +/* return the fullpath of the directory you are currently in */ +void fpath(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + (void) argc; (void) argv; + struct work *work = (struct work *) sqlite3_user_data(context); + + if (!work->fullpath) { + work->fullpath = realpath(work->name, NULL); + work->fullpath_len = strlen(work->fullpath); + } + + sqlite3_result_text(context, work->fullpath, work->fullpath_len, SQLITE_STATIC); +} + +/* + * Usage: + * SELECT rpath(sname, sroll) + * FROM vrsummary; + * + * SELECT rpath(sname, sroll) || "/" || name + * FROM vrpentries; + */ +void rpath(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + (void) argc; + + /* work->name contains the current directory being operated on */ + struct work *work = (struct work *) sqlite3_user_data(context); + const int rollupscore = sqlite3_value_int(argv[1]); + + size_t user_dirname_len = 0; + char *user_dirname = NULL; + + const size_t root_len = work->root_parent.len + work->root_basename_len; + + if (rollupscore == 0) { /* use work->name */ + user_dirname_len = work->orig_root.len + work->name_len - root_len; + user_dirname = malloc(user_dirname_len + 1); + + SNFORMAT_S(user_dirname, user_dirname_len + 1, 2, + work->orig_root.data, work->orig_root.len, + work->name + root_len, work->name_len - root_len); + } + else { /* reconstruct full path out of argv[0] */ + refstr_t input; + input.data = (char *) sqlite3_value_text(argv[0]); + input.len = strlen(input.data); + + /* + * fullpath = work->name[:-work->basename_len] + input + */ + const size_t fullpath_len = work->name_len - work->basename_len + input.len; + char *fullpath = malloc(fullpath_len + 1); + SNFORMAT_S(fullpath, fullpath_len + 1, 2, + work->name, work->name_len - work->basename_len, + input.data, input.len); + + /* + * replace fullpath prefix with original user input + */ + user_dirname_len = work->orig_root.len + fullpath_len - root_len; + user_dirname = malloc(user_dirname_len + 1); + SNFORMAT_S(user_dirname, user_dirname_len + 1, 2, + work->orig_root.data, work->orig_root.len, + fullpath + root_len, fullpath_len - root_len); + + free(fullpath); + } + + sqlite3_result_text(context, user_dirname, user_dirname_len, free); +} + +void relative_level(sqlite3_context *context, int argc, sqlite3_value **argv) { + (void) argc; (void) argv; + + size_t level = (size_t) (uintptr_t) sqlite3_user_data(context); + sqlite3_result_int64(context, level); +} + +void starting_point(sqlite3_context *context, int argc, sqlite3_value **argv) { + (void) argc; (void) argv; + + refstr_t *root = (refstr_t *) sqlite3_user_data(context); + sqlite3_result_text(context, root->data, root->len, SQLITE_STATIC); +} + +int addqueryfuncs_with_context(sqlite3 *db, struct work *work) { + return !( + (sqlite3_create_function(db, "path", 0, SQLITE_UTF8, + work, &path, NULL, NULL) == SQLITE_OK) && + (sqlite3_create_function(db, "epath", 0, SQLITE_UTF8, + work, &epath, NULL, NULL) == SQLITE_OK) && + (sqlite3_create_function(db, "fpath", 0, SQLITE_UTF8, + work, &fpath, NULL, NULL) == SQLITE_OK) && + (sqlite3_create_function(db, "rpath", 2, SQLITE_UTF8, + work, &rpath, NULL, NULL) == SQLITE_OK) && + (sqlite3_create_function(db, "starting_point", 0, SQLITE_UTF8, + (void *) &work->orig_root, &starting_point, NULL, NULL) == SQLITE_OK) && + (sqlite3_create_function(db, "level", 0, SQLITE_UTF8, + (void *) (uintptr_t) work->level, &relative_level, NULL, NULL) == SQLITE_OK) + ); +} diff --git a/src/bf.c b/src/bf.c index bc8844ca4..2e08157a0 100644 --- a/src/bf.c +++ b/src/bf.c @@ -124,6 +124,10 @@ struct input *input_init(struct input *in) { void input_fini(struct input *in) { if (in) { + free(in->types.agg); + free(in->types.ent); + free(in->types.sum); + free(in->types.tsum); sll_destroy(&in->external_attach, free); trie_free(in->skip); } @@ -156,6 +160,7 @@ void print_help(const char* prog_name, case 'd': printf(" -d delimiter (one char) [use 'x' for 0x%02X]", (uint8_t)fielddelim); break; case 'o': printf(" -o output file (one-per-thread, with thread-id suffix)"); break; case 'O': printf(" -O output DB"); break; + case 'u': printf(" -u prefix row with 1 int column count and each column with 1 octet type and 1 size_t length"); break; /* need to use text to avoid \x0a confusion */ case 'I': printf(" -I SQL init"); break; case 'T': printf(" -T SQL for tree-summary table"); break; case 'S': printf(" -S SQL for summary table"); break; @@ -208,6 +213,7 @@ void show_input(struct input* in, int retval) { printf("in.maxthreads = %zu\n", in->maxthreads); printf("in.delim = '%c'\n", in->delim); printf("in.andor = %d\n", (int) in->andor); + printf("in.types.prefix = %d\n", in->types.prefix); printf("in.process_xattrs = %d\n", in->process_xattrs); printf("in.nobody.uid = %" STAT_uid "\n", in->nobody.uid); printf("in.nobody.gid = %" STAT_gid "\n", in->nobody.gid); @@ -353,6 +359,10 @@ int parse_cmd_line(int argc, INSTALL_STR(&in->outname, optarg); break; + case 'u': + in->types.prefix = 1; + break; + case 'I': // SQL initializations INSTALL_STR(&in->sql.init, optarg); break; diff --git a/src/dbutils.c b/src/dbutils.c index f519f47a6..434ccde8d 100644 --- a/src/dbutils.c +++ b/src/dbutils.c @@ -76,27 +76,28 @@ OF SUCH DAMAGE. #include "dbutils.h" #include "external.h" #include "histogram.h" +#include "trie.h" static const char SQLITE_MEMORY_ARRAY[] = ":memory:"; const char *SQLITE_MEMORY = SQLITE_MEMORY_ARRAY; const char READDIRPLUS_CREATE[] = DROP_TABLE(READDIRPLUS) - "CREATE TABLE " READDIRPLUS "(path TEXT, type TEXT, inode TEXT PRIMARY KEY, pinode TEXT, suspect INT64);"; + READDIRPLUS_SCHEMA(READDIRPLUS); const char READDIRPLUS_INSERT[] = "INSERT INTO " READDIRPLUS " VALUES (@path, @type, @inode, @pinode, @suspect);"; const char ENTRIES_CREATE[] = DROP_TABLE(ENTRIES) - "CREATE TABLE " ENTRIES "(name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, crtime INT64, ossint1 INT64, ossint2 INT64, ossint3 INT64, ossint4 INT64, osstext1 TEXT, osstext2 TEXT);"; + ENTRIES_SCHEMA(ENTRIES, ""); const char ENTRIES_INSERT[] = "INSERT INTO " ENTRIES " VALUES (@name, @type, @inode, @mode, @nlink, @uid, @gid, @size, @blksize, @blocks, @atime, @mtime, @ctime, @linkname, @xattr_names, @crtime, @ossint1, @ossint2, @ossint3, @ossint4, @osstext1, @osstext2);"; const char SUMMARY_CREATE[] = DROP_TABLE(SUMMARY) - "CREATE TABLE " SUMMARY "(name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, rectype INT64, pinode TEXT, isroot INT64, rollupscore INT64);"; + SUMMARY_SCHEMA(SUMMARY, ""); static const char SUMMARY_INSERT[] = "INSERT INTO " SUMMARY " VALUES (@name, @type, @inode, @mode, @nlink, @uid, @gid, @size, @blksize, @blocks, @atime, @mtime, @ctime, @linkname, @xattr_names, @totfiles, @totlinks, @minuid, @maxuid, @mingid, @maxgid, @minsize, @maxsize, @totzero, @totltk, @totmtk, @totltm, @totmtm, @totmtg, @totmtt, @totsize, @minctime, @maxctime, @minmtime, @maxmtime, @minatime, @maxatime, @minblocks, @maxblocks, @totxattr, @depth, @mincrtime, @maxcrtime, @minossint1, @maxossint1, @totossint1, @minossint2, @maxossint2, @totossint2, @minossint3, @maxossint3, @totossint3, @minossint4, @maxossint4, @totossint4, @rectype, @pinode, @isroot, @rollupscore);"; @@ -107,7 +108,7 @@ const char VRSUMMARY_CREATE[] = const char PENTRIES_ROLLUP_CREATE[] = DROP_TABLE(PENTRIES_ROLLUP) - "CREATE TABLE " PENTRIES_ROLLUP "(name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, crtime INT64, ossint1 INT64, ossint2 INT64, ossint3 INT64, ossint4 INT64, osstext1 TEXT, osstext2 TEXT, pinode TEXT, ppinode TEXT);"; + PENTRIES_ROLLUP_SCHEMA(PENTRIES_ROLLUP); const char PENTRIES_ROLLUP_INSERT[] = "INSERT INTO " PENTRIES_ROLLUP " VALUES (@name, @type, @inode, @mode, @nlink, @uid, @gid, @size, @blksize, @blocks, @atime, @mtime, @ctime, @linkname, @xattr_names, @crtime, @ossint1, @ossint2, @ossint3, @ossint4, @osstext1, @osstext2, @pinode, @ppinode);"; @@ -753,572 +754,6 @@ int inserttreesumdb(const char *name, sqlite3 *sdb, struct sum *su,int rectype,i return !!err; } -/* return the directory you are currently in */ -static void path(sqlite3_context *context, int argc, sqlite3_value **argv) -{ - (void) argc; (void) argv; - struct work *work = (struct work *) sqlite3_user_data(context); - size_t user_dirname_len = work->orig_root.len + work->name_len - work->root_parent.len - work->root_basename_len; - char *user_dirname = malloc(user_dirname_len + 1); - - SNFORMAT_S(user_dirname, user_dirname_len + 1, 2, - work->orig_root.data, work->orig_root.len, - work->name + work->root_parent.len + work->root_basename_len, work->name_len - work->root_parent.len - work->root_basename_len); - - sqlite3_result_text(context, user_dirname, user_dirname_len, free); -} - -/* return the basename of the directory you are currently in */ -static void epath(sqlite3_context *context, int argc, sqlite3_value **argv) -{ - (void) argc; (void) argv; - struct work *work = (struct work *) sqlite3_user_data(context); - - sqlite3_result_text(context, work->name + work->name_len - work->basename_len, - work->basename_len, SQLITE_STATIC); -} - -/* return the fullpath of the directory you are currently in */ -static void fpath(sqlite3_context *context, int argc, sqlite3_value **argv) -{ - (void) argc; (void) argv; - struct work *work = (struct work *) sqlite3_user_data(context); - - if (!work->fullpath) { - work->fullpath = realpath(work->name, NULL); - work->fullpath_len = strlen(work->fullpath); - } - - sqlite3_result_text(context, work->fullpath, work->fullpath_len, SQLITE_STATIC); -} - -/* - * Usage: - * SELECT rpath(sname, sroll) - * FROM vrsummary; - * - * SELECT rpath(sname, sroll) || "/" || name - * FROM vrpentries; - */ -static void rpath(sqlite3_context *context, int argc, sqlite3_value **argv) -{ - (void) argc; - - /* work->name contains the current directory being operated on */ - struct work *work = (struct work *) sqlite3_user_data(context); - const int rollupscore = sqlite3_value_int(argv[1]); - - size_t user_dirname_len = 0; - char *user_dirname = NULL; - - const size_t root_len = work->root_parent.len + work->root_basename_len; - - if (rollupscore == 0) { /* use work->name */ - user_dirname_len = work->orig_root.len + work->name_len - root_len; - user_dirname = malloc(user_dirname_len + 1); - - SNFORMAT_S(user_dirname, user_dirname_len + 1, 2, - work->orig_root.data, work->orig_root.len, - work->name + root_len, work->name_len - root_len); - } - else { /* reconstruct full path out of argv[0] */ - refstr_t input; - input.data = (char *) sqlite3_value_text(argv[0]); - input.len = strlen(input.data); - - /* - * fullpath = work->name[:-work->basename_len] + input - */ - const size_t fullpath_len = work->name_len - work->basename_len + input.len; - char *fullpath = malloc(fullpath_len + 1); - SNFORMAT_S(fullpath, fullpath_len + 1, 2, - work->name, work->name_len - work->basename_len, - input.data, input.len); - - /* - * replace fullpath prefix with original user input - */ - user_dirname_len = work->orig_root.len + fullpath_len - root_len; - user_dirname = malloc(user_dirname_len + 1); - SNFORMAT_S(user_dirname, user_dirname_len + 1, 2, - work->orig_root.data, work->orig_root.len, - fullpath + root_len, fullpath_len - root_len); - - free(fullpath); - } - - sqlite3_result_text(context, user_dirname, user_dirname_len, free); -} - -static void uidtouser(sqlite3_context *context, int argc, sqlite3_value **argv) -{ - (void) argc; - - const char *text = (char *) sqlite3_value_text(argv[0]); - - const int fuid = atoi(text); - struct passwd *fmypasswd = getpwuid(fuid); - const char *show = fmypasswd?fmypasswd->pw_name:text; - - sqlite3_result_text(context, show, -1, SQLITE_TRANSIENT); -} - -static void gidtogroup(sqlite3_context *context, int argc, sqlite3_value **argv) -{ - (void) argc; - - const char *text = (char *) sqlite3_value_text(argv[0]); - - const int fgid = atoi(text); - struct group *fmygroup = getgrgid(fgid); - const char *show = fmygroup?fmygroup->gr_name:text; - - sqlite3_result_text(context, show, -1, SQLITE_TRANSIENT); -} - -static void modetotxt(sqlite3_context *context, int argc, sqlite3_value **argv) -{ - (void) argc; - int fmode; - char tmode[64]; - fmode = sqlite3_value_int(argv[0]); - modetostr(tmode, sizeof(tmode), fmode); - sqlite3_result_text(context, tmode, -1, SQLITE_TRANSIENT); -} - -static void sqlite3_strftime(sqlite3_context *context, int argc, sqlite3_value **argv) -{ - (void) argc; - - const char *fmt = (char *) sqlite3_value_text(argv[0]); /* format */ - const time_t t = sqlite3_value_int64(argv[1]); /* timestamp */ - - char buf[MAXPATH]; - #ifdef LOCALTIME_R - struct tm tm; - strftime(buf, sizeof(buf), fmt, localtime_r(&t, &tm)); - #else - strftime(buf, sizeof(buf), fmt, localtime(&t)); - #endif - sqlite3_result_text(context, buf, -1, SQLITE_TRANSIENT); -} - -/* uint64_t goes up to E */ -static const char SIZE[] = {'K', 'M', 'G', 'T', 'P', 'E'}; - -/* - * Returns the number of blocks required to store a given size - * Unfilled blocks count as one full block (round up) - * - * This function attempts to replicate ls output and is mainly - * intended for gufi_ls, so use with caution. - * - * blocksize(1024, "K") -> 1K - * blocksize(1024, "1K") -> 1 - * blocksize(1024, "KB") -> 2KB - * blocksize(1024, "1KB") -> 2 - * blocksize(1024, "KiB") -> 1K - * blocksize(1024, "1KiB") -> 1 - */ -static void blocksize(sqlite3_context *context, int argc, sqlite3_value **argv) { - (void) argc; - - const char *size_s = (const char *) sqlite3_value_text(argv[0]); - const char *unit_s = (const char *) sqlite3_value_text(argv[1]); - const size_t unit_s_len = strlen(unit_s); - - uint64_t size = 0; - if (sscanf(size_s, "%" PRIu64, &size) != 1) { - sqlite3_result_error(context, "Bad blocksize size", -1); - return; - } - - /* whether or not a coefficent was found - affects printing */ - uint64_t unit_size = 0; - const int coefficient_found = sscanf(unit_s, "%" PRIu64, &unit_size); - if (coefficient_found == 1) { - if (unit_size == 0) { - sqlite3_result_error(context, "Bad blocksize unit", -1); - return; - } - } - else { - /* if there were no numbers, default to 1 */ - unit_size = 1; - } - - /* - * get block size suffix i.e. 1KB -> KB - */ - const char *unit = unit_s; - { - /* - * find first non-numerical character - * decimal points are not accepted, and will break this loop - */ - size_t offset = 0; - while ((offset < unit_s_len) && - (('0' <= unit[offset]) && (unit[offset] <= '9'))) { - offset++; - } - - unit += offset; - } - - const size_t len = strlen(unit); - - /* suffix is too long */ - if (len > 3) { - sqlite3_result_error(context, "Bad blocksize unit", -1); - return; - } - - /* suffix is optional */ - if (len) { - if ((len > 1) && (unit[len - 1] != 'B')) { - sqlite3_result_error(context, "Bad blocksize unit", -1); - return; - } - - uint64_t multiplier = 1024; - if (len == 2) { - multiplier = 1000; - } - else if (len == 3) { - if (unit[1] != 'i') { - sqlite3_result_error(context, "Bad blocksize unit", -1); - return; - } - } - - int found = 0; - for(size_t i = 0; i < sizeof(SIZE); i++) { - unit_size *= multiplier; - if (unit[0] == SIZE[i]) { - found = 1; - break; - } - } - - if (!found) { - sqlite3_result_error(context, "Bad blocksize unit", -1); - return; - } - } - - const uint64_t blocks = (size / unit_size) + (!!(size % unit_size)); - - char buf[MAXPATH]; - size_t buf_len = snprintf(buf, sizeof(buf), "%" PRIu64, blocks); - - /* add unit to block count */ - if (!coefficient_found) { - buf_len += snprintf(buf + buf_len, sizeof(buf) - buf_len, "%s", unit); - } - - sqlite3_result_text(context, buf, buf_len, SQLITE_TRANSIENT); -} - -/* Returns a string containg the size with as large of a unit as reasonable */ -static void human_readable_size(sqlite3_context *context, int argc, sqlite3_value **argv) { - (void) argc; - - char buf[MAXPATH]; - - const char *size_s = (const char *) sqlite3_value_text(argv[0]); - double size = 0; - - if (sscanf(size_s, "%lf", &size) != 1) { - sqlite3_result_error(context, "Bad size", -1); - return; - } - - size_t unit_index = 0; - while (size >= 1024) { - size /= 1024; - unit_index++; - } - - if (unit_index == 0) { - snprintf(buf, sizeof(buf), "%.1f", size); - } - else { - snprintf(buf, sizeof(buf), "%.1f%c", size, SIZE[unit_index - 1]); - } - - sqlite3_result_text(context, buf, -1, SQLITE_TRANSIENT); -} - -static void relative_level(sqlite3_context *context, int argc, sqlite3_value **argv) { - (void) argc; (void) argv; - - size_t level = (size_t) (uintptr_t) sqlite3_user_data(context); - sqlite3_result_int64(context, level); -} - -static void starting_point(sqlite3_context *context, int argc, sqlite3_value **argv) { - (void) argc; (void) argv; - - refstr_t *root = (refstr_t *) sqlite3_user_data(context); - sqlite3_result_text(context, root->data, root->len, SQLITE_STATIC); -} - -static void sqlite_basename(sqlite3_context *context, int argc, sqlite3_value **argv) { - (void) argc; - - char *path = (char *) sqlite3_value_text(argv[0]); - - if (!path) { - sqlite3_result_text(context, "", 0, SQLITE_TRANSIENT); - return; - } - - const size_t path_len = strlen(path); - - /* remove trailing slashes */ - const size_t trimmed_len = trailing_non_match_index(path, path_len, "/", 1); - if (!trimmed_len) { - sqlite3_result_text(context, "/", 1, SQLITE_STATIC); - return; - } - - /* basename(work->name) will be the same as the first part of the input path, so remove it */ - const size_t offset = trailing_match_index(path, trimmed_len, "/", 1); - - const size_t bn_len = trimmed_len - offset; - char *bn = path + offset; - - sqlite3_result_text(context, bn, bn_len, SQLITE_STATIC); -} - -/* - * One pass standard deviation (sample) - * https://mathcentral.uregina.ca/QQ/database/QQ.09.06/h/murtaza1.html - */ -typedef struct { - double sum; - double sum_sq; - uint64_t count; -} stdev_t; - -static void stdev_step(sqlite3_context *context, int argc, sqlite3_value **argv) { - (void) argc; - stdev_t *data = (stdev_t *) sqlite3_aggregate_context(context, sizeof(*data)); - const double value = sqlite3_value_double(argv[0]); - - data->sum += value; - data->sum_sq += value * value; - data->count++; -} - -static void stdevs_final(sqlite3_context *context) { - stdev_t *data = (stdev_t *) sqlite3_aggregate_context(context, sizeof(*data)); - - if (data->count < 2) { - sqlite3_result_null(context); - } - else { - const double variance = ((data->count * data->sum_sq) - (data->sum * data->sum)) / (data->count * (data->count - 1)); - sqlite3_result_double(context, sqrt(variance)); - } -} - -static void stdevp_final(sqlite3_context *context) { - stdev_t *data = (stdev_t *) sqlite3_aggregate_context(context, sizeof(*data)); - - if (data->count < 2) { - sqlite3_result_null(context); - } - else { - const double variance = ((data->count * data->sum_sq) - (data->sum * data->sum)) / (data->count * data->count); - sqlite3_result_double(context, sqrt(variance)); - } -} - -static void median_step(sqlite3_context *context, int argc, sqlite3_value **argv) { - (void) argc; - sll_t *data = (sll_t *) sqlite3_aggregate_context(context, sizeof(*data)); - if (sll_get_size(data) == 0) { - sll_init(data); - } - - const double value = sqlite3_value_double(argv[0]); - sll_push(data, (void *) (uintptr_t) value); -} - -/* /\* */ -/* * find kth largest element */ -/* * */ -/* * Adapted from code by Russell Cohen */ -/* * https://rcoh.me/posts/linear-time-median-finding/ */ -/* *\/ */ -/* static double quickselect(sll_t *sll, uint64_t count, uint64_t k) { */ -/* /\* cache unused values here since partitioning destroys the original list *\/ */ -/* sll_t cache; */ -/* sll_init(&cache); */ - -/* sll_t lt, eq, gt; */ -/* sll_init(<); */ -/* sll_init(&eq); */ -/* sll_init(>); */ - -/* while (count > 1) { */ -/* /\* TODO: Better pivot selection *\/ */ -/* const uint64_t pivot_idx = (rand() * rand()) % count; */ -/* double pivot = 0; */ -/* size_t i = 0; */ -/* sll_loop(sll, node) { */ -/* if (i == pivot_idx) { */ -/* pivot = (double) (uintptr_t) sll_node_data(node); */ -/* break; */ -/* } */ -/* i++; */ -/* } */ - -/* sll_node_t *node = NULL; */ -/* while ((node = sll_head_node(sll))) { */ -/* const double value = (double) (uint64_t) sll_node_data(node); */ -/* if (value < pivot) { */ -/* sll_move_append_first(<, sll, 1); */ -/* } */ -/* else if (value > pivot) { */ -/* sll_move_append_first(>, sll, 1); */ -/* } */ -/* else { */ -/* sll_move_append_first(&eq, sll, 1); */ -/* } */ -/* } */ - -/* /\* sll is empty at this point *\/ */ - -/* const uint64_t lt_size = sll_get_size(<); */ -/* const uint64_t eq_size = sll_get_size(&eq); */ - -/* if (k < lt_size) { */ -/* sll_move_append(sll, <); */ -/* sll_move_append(&cache, &eq); */ -/* sll_move_append(&cache, >); */ -/* } */ -/* else if (k < (lt_size + eq_size)) { */ -/* sll_move_append(&cache, <); */ -/* sll_move_append(sll, &eq); */ -/* sll_move_append(&cache, >); */ -/* break; */ -/* } */ -/* else { */ -/* k -= lt_size + eq_size; */ -/* sll_move_append(&cache, <); */ -/* sll_move_append(&cache, &eq); */ -/* sll_move_append(sll, >); */ -/* } */ - -/* count = sll_get_size(sll); */ -/* } */ - -/* /\* restore original list's contents (different order) *\/ */ -/* sll_move_append(sll, &cache); */ - -/* return (double) (uintptr_t) sll_node_data(sll_head_node(sll)); */ -/* } */ - -static int cmp_double(const void *lhs, const void *rhs) { - return * (double *) lhs - * (double *) rhs; -} - -static void median_final(sqlite3_context *context) { - sll_t *data = (sll_t *) sqlite3_aggregate_context(context, sizeof(*data)); - - const uint64_t count = sll_get_size(data); - double median = 0; - - /* skip some mallocs */ - if (count == 0) { - sqlite3_result_null(context); - goto cleanup; - } - else if (count == 1) { - median = (double) (uintptr_t) sll_node_data(sll_head_node(data)); - goto ret_median; - } - else if (count == 2) { - median = ((double) (uintptr_t) sll_node_data(sll_head_node(data)) + - (double) (uintptr_t) sll_node_data(sll_tail_node(data))) / 2.0; - goto ret_median; - } - - const uint64_t half = count / 2; - - double *arr = malloc(count * sizeof(double)); - size_t i = 0; - sll_loop(data, node) { - arr[i++] = (double) (uintptr_t) sll_node_data(node); - } - - qsort(arr, count, sizeof(double), cmp_double); - - median = arr[half]; - if (!(count & 1)) { - median += arr[half - 1]; - median /= 2.0; - } - free(arr); - - /* median = quickselect(data, count, half); */ - /* if (!(count & 1)) { */ - /* median += quickselect(data, count, half - 1); */ - /* median /= 2.0; */ - /* } */ - - ret_median: - sqlite3_result_double(context, median); - - cleanup: - sll_destroy(data, NULL); -} - -int addqueryfuncs(sqlite3 *db) { - return !( - (sqlite3_create_function(db, "uidtouser", 1, SQLITE_UTF8, - NULL, &uidtouser, NULL, NULL) == SQLITE_OK) && - (sqlite3_create_function(db, "gidtogroup", 1, SQLITE_UTF8, - NULL, &gidtogroup, NULL, NULL) == SQLITE_OK) && - (sqlite3_create_function(db, "modetotxt", 1, SQLITE_UTF8, - NULL, &modetotxt, NULL, NULL) == SQLITE_OK) && - (sqlite3_create_function(db, "strftime", 2, SQLITE_UTF8, - NULL, &sqlite3_strftime, NULL, NULL) == SQLITE_OK) && - (sqlite3_create_function(db, "blocksize", 2, SQLITE_UTF8, - NULL, &blocksize, NULL, NULL) == SQLITE_OK) && - (sqlite3_create_function(db, "human_readable_size", 1, SQLITE_UTF8, - NULL, &human_readable_size, NULL, NULL) == SQLITE_OK) && - (sqlite3_create_function(db, "basename", 1, SQLITE_UTF8, - NULL, &sqlite_basename, NULL, NULL) == SQLITE_OK) && - (sqlite3_create_function(db, "stdevs", 1, SQLITE_UTF8, - NULL, NULL, stdev_step, stdevs_final) == SQLITE_OK) && - (sqlite3_create_function(db, "stdevp", 1, SQLITE_UTF8, - NULL, NULL, stdev_step, stdevp_final) == SQLITE_OK) && - (sqlite3_create_function(db, "median", 1, SQLITE_UTF8, - NULL, NULL, median_step, median_final) == SQLITE_OK) && - addhistfuncs(db) - ); -} - -int addqueryfuncs_with_context(sqlite3 *db, struct work *work) { - return !( - (sqlite3_create_function(db, "path", 0, SQLITE_UTF8, - work, &path, NULL, NULL) == SQLITE_OK) && - (sqlite3_create_function(db, "epath", 0, SQLITE_UTF8, - work, &epath, NULL, NULL) == SQLITE_OK) && - (sqlite3_create_function(db, "fpath", 0, SQLITE_UTF8, - work, &fpath, NULL, NULL) == SQLITE_OK) && - (sqlite3_create_function(db, "rpath", 2, SQLITE_UTF8, - work, &rpath, NULL, NULL) == SQLITE_OK) && - (sqlite3_create_function(db, "starting_point", 0, SQLITE_UTF8, - (void *) &work->orig_root, &starting_point, NULL, NULL) == SQLITE_OK) && - (sqlite3_create_function(db, "level", 0, SQLITE_UTF8, - (void *) (uintptr_t) work->level, &relative_level, NULL, NULL) == SQLITE_OK) - ); -} - struct xattr_db *create_xattr_db(struct template_db *tdb, const char *path, const size_t path_len, struct input *in, @@ -1756,3 +1191,88 @@ int bottomup_collect_treesummary(sqlite3 *db, const char *dirname, sll_t *subdir return inserttreesumdb(dirname, db, &tsum, 0, 0, 0); } + +/* + * subset of known string to SQLite type conversions + * + * https://www.sqlite.org/datatype3.html + * https://www.sqlite.org/c3ref/c_blob.html + */ +static trie_t *sqlite3_types(void) { + trie_t *types = trie_alloc(); + + trie_insert(types, "INT", 3, (void *) (uintptr_t) SQLITE_INTEGER, NULL); + trie_insert(types, "INTEGER", 7, (void *) (uintptr_t) SQLITE_INTEGER, NULL); + trie_insert(types, "INT64", 5, (void *) (uintptr_t) SQLITE_INTEGER, NULL); + + trie_insert(types, "FLOAT", 5, (void *) (uintptr_t) SQLITE_FLOAT, NULL); + trie_insert(types, "DOUBLE", 6, (void *) (uintptr_t) SQLITE_FLOAT, NULL); + trie_insert(types, "REAL", 4, (void *) (uintptr_t) SQLITE_FLOAT, NULL); + + trie_insert(types, "TEXT", 4, (void *) (uintptr_t) SQLITE_TEXT, NULL); + + trie_insert(types, "BLOB", 4, (void *) (uintptr_t) SQLITE_BLOB, NULL); + + trie_insert(types, "NULL", 4, (void *) (uintptr_t) SQLITE_NULL, NULL); + + return types; +} + +int *get_col_types(sqlite3 *db, const refstr_t *sql, int *cols) { + int rc = SQLITE_OK; + + /* parse sql */ + sqlite3_stmt *stmt = NULL; + rc = sqlite3_prepare_v2(db, sql->data, sql->len, &stmt, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr, "Error: Could not prepare '%s' for getting column types: %s (%d)\n", + sql->data, sqlite3_errstr(rc), rc); + return NULL; + } + + /* /\* */ + /* * need to step if calling sqlite3_column_type, but requires */ + /* * that the table has at least 1 row of actual values */ + /* *\/ */ + /* rc = sqlite3_step(stmt); */ + /* if (rc != SQLITE_ROW) { */ + /* fprintf(stderr, "Error: Failed to evaluate SQL statement '%s': %s (%d)\n", */ + /* sql->data, sqlite3_errstr(rc), rc); */ + /* return NULL; */ + /* } */ + + /* get column count */ + *cols = sqlite3_column_count(stmt); + if (*cols == 0) { + fprintf(stderr, "Error: '%s' was detected to have 0 columns\n", sql->data); + sqlite3_finalize(stmt); + return NULL; + } + + trie_t *str2type = sqlite3_types(); + + /* get each column's type */ + int *types = malloc(*cols * sizeof(int)); + for(int i = 0; i < *cols; i++) { + const char *type = sqlite3_column_decltype(stmt, i); + if (!type) { + types[i] = SQLITE_NULL; + continue; + } + + const size_t type_len = strlen(type); + + void *sql_type = NULL; + if (trie_search(str2type, type, type_len, &sql_type) == 1) { + types[i] = (uintptr_t) sql_type; + } + else { + types[i] = 0; /* unknown type */ + } + } + + trie_free(str2type); + + sqlite3_finalize(stmt); + return types; +} diff --git a/src/gufi_query/aggregate.c b/src/gufi_query/aggregate.c index c5531d69c..fe6593b1d 100644 --- a/src/gufi_query/aggregate.c +++ b/src/gufi_query/aggregate.c @@ -157,12 +157,14 @@ int aggregate_process(Aggregate_t *aggregate, struct input *in) { /* normally expect STDOUT/OUTFILE to have SQL to run, but OUTDB can have SQL to run as well */ if ((in->output != OUTDB) || in->sql.agg.len) { - PrintArgs_t pa; - pa.output_buffer = &aggregate->ob; - pa.delim = in->delim; - pa.mutex = NULL; - pa.outfile = aggregate->outfile; - pa.rows = 0; + PrintArgs_t pa = { + .output_buffer = &aggregate->ob, + .delim = in->delim, + .mutex = NULL, + .outfile = aggregate->outfile, + .rows = 0, + .types = in->types.agg, + }; char *err = NULL; if (sqlite3_exec(aggregate->db, in->sql.agg.data, print_parallel, &pa, &err) != SQLITE_OK) { diff --git a/src/gufi_query/validate_inputs.c b/src/gufi_query/handle_sql.c similarity index 73% rename from src/gufi_query/validate_inputs.c rename to src/gufi_query/handle_sql.c index 7d64831dd..5cdd3c272 100644 --- a/src/gufi_query/validate_inputs.c +++ b/src/gufi_query/handle_sql.c @@ -63,10 +63,13 @@ OF SUCH DAMAGE. #include +#include -#include "gufi_query/validate_inputs.h" +#include "dbutils.h" +#include "gufi_query/handle_sql.h" +#include "template_db.h" -int validate_inputs(struct input *in) { +int handle_sql(struct input *in) { /* * - Leaves are final outputs * - OUTFILE/OUTDB + aggregation will create per thread and final aggregation files @@ -140,5 +143,69 @@ int validate_inputs(struct input *in) { } } + sqlite3 *db = NULL; + + /* now that the SQL has been validated, generate types if necessary */ + if ((in->types.prefix == 1) && ((in->output == STDOUT) || (in->output == OUTFILE))) { + /* have to create temporary db since there is no guarantee of a db yet */ + db = opendb(SQLITE_MEMORY, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, + 0, 0, create_dbdb_tables, NULL); + if (!db) { + return -1; + } + + int cols = 0; /* discarded */ + + struct work work; + if (addqueryfuncs_with_context(db, &work) != 0) { + goto error; + } + + if (in->sql.tsum.len) { + if (create_table_wrapper(SQLITE_MEMORY, db, TREESUMMARY, TREESUMMARY_CREATE) != SQLITE_OK) { + goto error; + } + } + + /* if not aggregating, get types for T, S, and E */ + if (!in->sql.init_agg.len) { + if (in->sql.tsum.len) { + if (!(in->types.tsum = get_col_types(db, &in->sql.tsum, &cols))) { + goto error; + } + } + if (in->sql.sum.len) { + if (!(in->types.sum = get_col_types(db, &in->sql.sum, &cols))) { + goto error; + } + } + if (in->sql.ent.len) { + if (!(in->types.ent = get_col_types(db, &in->sql.ent, &cols))) { + goto error; + } + } + } + /* types for G */ + else { + /* run -K so -G can pull the final columns */ + char *err = NULL; + if (sqlite3_exec(db, in->sql.init_agg.data, NULL, NULL, &err) != SQLITE_OK) { + fprintf(stderr, "Error: -K SQL failed while getting columns types: %s\n", err); + sqlite3_free(err); + goto error; + } + + if (!(in->types.agg = get_col_types(db, &in->sql.agg, &cols))) { + goto error; + } + } + + closedb(db); + } + return 0; + + error: + closedb(db); + return -1; } diff --git a/src/gufi_query/main.c b/src/gufi_query/main.c index 4f79eb6dc..398e8cb85 100644 --- a/src/gufi_query/main.c +++ b/src/gufi_query/main.c @@ -78,8 +78,8 @@ OF SUCH DAMAGE. #include "gufi_query/aggregate.h" #include "gufi_query/gqw.h" +#include "gufi_query/handle_sql.h" #include "gufi_query/processdir.h" -#include "gufi_query/validate_inputs.h" static void sub_help(void) { printf("GUFI_index find GUFI index here\n"); @@ -93,9 +93,9 @@ int main(int argc, char *argv[]) /* Callers provide the options-string for get_opt(), which will */ /* control which options are parsed for each program. */ struct input in; - process_args_and_maybe_exit("hHvT:S:E:an:jo:d:O:I:F:y:z:J:K:G:mB:wxk:M:s:" COMPRESS_OPT "Q:", 1, "GUFI_index ...", &in); + process_args_and_maybe_exit("hHvT:S:E:an:jo:d:O:uI:F:y:z:J:K:G:mB:wxk:M:s:" COMPRESS_OPT "Q:", 1, "GUFI_index ...", &in); - if (validate_inputs(&in) != 0) { + if (handle_sql(&in) != 0) { input_fini(&in); return EXIT_FAILURE; } diff --git a/src/gufi_query/process_queries.c b/src/gufi_query/process_queries.c index 4a8442e95..6c05e9dc2 100644 --- a/src/gufi_query/process_queries.c +++ b/src/gufi_query/process_queries.c @@ -228,7 +228,8 @@ int process_queries(PoolArgs_t *pa, if (in->sql.sum.len) { recs=1; /* set this to one record - if the sql succeeds it will set to 0 or 1 */ /* put in the path relative to the user's input */ - querydb(&gqw->work, dbname, dbname_len, db, in->sql.sum.data, pa, id, print_parallel, &recs); + querydb(&gqw->work, dbname, dbname_len, db, in->sql.sum.data, + in->types.sum, pa, id, print_parallel, &recs); } else { recs = 1; } @@ -238,7 +239,8 @@ int process_queries(PoolArgs_t *pa, /* if we have recs (or are running an OR) query the entries table */ if (recs > 0) { if (in->sql.ent.len) { - querydb(&gqw->work, dbname, dbname_len, db, in->sql.ent.data, pa, id, print_parallel, &recs); /* recs is not used */ + querydb(&gqw->work, dbname, dbname_len, db, in->sql.ent.data, + in->types.ent, pa, id, print_parallel, &recs); /* recs is not used */ } } } diff --git a/src/gufi_query/processdir.c b/src/gufi_query/processdir.c index 36a88cea8..39154fd2c 100644 --- a/src/gufi_query/processdir.c +++ b/src/gufi_query/processdir.c @@ -213,15 +213,17 @@ int processdir(QPTPool_t *ctx, const size_t id, void *data, void *args) { /* if this is OR, as well as no-sql-to-run, skip this query */ if (in->andor == AND) { /* make sure the treesummary table exists */ + static const int TSUM_CHECK_TYPES[] = { SQLITE_TEXT }; querydb(&gqw->work, dbname, dbname_len, db, "SELECT name FROM " ATTACH_NAME ".sqlite_master " "WHERE (type == 'table') AND (name == '" TREESUMMARY "');", - pa, id, count_rows, &recs); + in->types.prefix?TSUM_CHECK_TYPES:NULL, pa, id, count_rows, &recs); if (recs < 1) { recs = -1; } else { /* run in->sql.tsum */ - querydb(&gqw->work, dbname, dbname_len, db, in->sql.tsum.data, pa, id, print_parallel, &recs); + querydb(&gqw->work, dbname, dbname_len, db, in->sql.tsum.data, + in->types.tsum, pa, id, print_parallel, &recs); } } /* this is an OR or we got a record back. go on to summary/entries */ diff --git a/src/gufi_query/query.c b/src/gufi_query/query.c index c7970c97c..6f3e769b6 100644 --- a/src/gufi_query/query.c +++ b/src/gufi_query/query.c @@ -69,16 +69,18 @@ OF SUCH DAMAGE. /* wrapper wround sqlite3_exec to pass arguments and check for errors */ void querydb(struct work *work, const char *dbname, const size_t dbname_len, - sqlite3 *db, const char *query, + sqlite3 *db, const char *query, const int *types, PoolArgs_t *pa, int id, int (*callback)(void *, int, char **, char**), int *rc) { ThreadArgs_t *ta = &pa->ta[id]; - PrintArgs_t args; - args.output_buffer = &ta->output_buffer; - args.delim = pa->in->delim; - args.mutex = pa->stdout_mutex; - args.outfile = ta->outfile; - args.rows = 0; + PrintArgs_t args = { + .output_buffer = &ta->output_buffer, + .delim = pa->in->delim, + .mutex = pa->stdout_mutex, + .outfile = ta->outfile, + .rows = 0, + .types = types, + }; char *err = NULL; #ifdef SQL_EXEC diff --git a/src/gufi_sqlite3.c b/src/gufi_sqlite3.c index 4578ab8aa..d64380e9d 100644 --- a/src/gufi_sqlite3.c +++ b/src/gufi_sqlite3.c @@ -67,9 +67,12 @@ OF SUCH DAMAGE. #include "OutputBuffers.h" #include "dbutils.h" -#include "histogram.h" #include "print.h" +/* don't ask */ +#define SQLITE_CORE +#include "../src/gufi_vt.c" + static void sub_help(void) { printf("db db file path\n"); printf("SQL SQL statements to run\n"); @@ -94,8 +97,15 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } - addqueryfuncs(db); - addhistfuncs(db); + char *err = NULL; + + /* this calls addqueryfuncs */ + if (sqlite3_gufivt_init(db, &err, NULL) != SQLITE_OK) { + fprintf(stderr, "Error: Could not initialize virtual tables \"%s\"\n", err); + sqlite3_free(err); + input_fini(&in); + return EXIT_FAILURE; + } /* no buffering */ struct OutputBuffer ob; @@ -107,10 +117,9 @@ int main(int argc, char *argv[]) { .mutex = NULL, .outfile = stdout, .rows = 0, + .types = NULL, }; - char *err = NULL; - /* if using in-memory db or no SQL statements following db path, read from stdin */ if (args_left < 2) { char *line = NULL; diff --git a/src/gufi_vt.c b/src/gufi_vt.c new file mode 100644 index 000000000..f590cccec --- /dev/null +++ b/src/gufi_vt.c @@ -0,0 +1,628 @@ +/* +This file is part of GUFI, which is part of MarFS, which is released +under the BSD license. + + +Copyright (c) 2017, Los Alamos National Security (LANS), LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +From Los Alamos National Security, LLC: +LA-CC-15-039 + +Copyright (c) 2017, Los Alamos National Security, LLC All rights reserved. +Copyright 2017. Los Alamos National Security, LLC. This software was produced +under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National +Laboratory (LANL), which is operated by Los Alamos National Security, LLC for +the U.S. Department of Energy. The U.S. Government has rights to use, +reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS +ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR +ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is +modified to produce derivative works, such modified software should be +clearly marked, so as not to confuse it with the version available from +LANL. + +THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +OF SUCH DAMAGE. +*/ + + + +#include +SQLITE_EXTENSION_INIT1 + +#include +#include +#include +#include + +#include "addqueryfuncs.h" +#include "bf.h" +#include "dbutils.h" + +/* + * GUFI Virtual Tables + * + * This SQLite3 Module contains the code that allows for users to + * query GUFI trees as though they were tables via the SQLite3 Virtual + * Table Mechanism. These virtual tables act as table-valued + * functions, so CREATE VIRTUAL TABLE is not necessary. + * + * Firstly, the UDFs added to SQLite3 by GUFI that do not require + * internal state are also added to SQLite3 when this module is + * loaded. + * + * Then, 6 virtual tables are added: + * gufi_vt_treesummary + * gufi_vt_summary + * gufi_vt_entries + * gufi_vt_pentries + * gufi_vt_vrsummary + * gufi_vt_vrpentries + * + * These may be used like so: + * SELECT ... USING gufi_vt_*('', ...); + * + * The first argument, the index root, is required. The following + * arguments are all optional, but are positional, and appear in the + * following order: + * - # of threads + * - T + * - S + * + * The schemas of all 6 of the corresponding tables and views are + * recreated here, and thus all columns are accessible. + */ + +typedef struct gufi_query_sql { + const char *T; + const char *S; + const char *E; +} gq_sql_t; + +typedef struct gufi_vtab { + sqlite3_vtab base; + gq_sql_t sql; /* not const to allow for T and S to be modified */ +} gufi_vtab; + +typedef struct gufi_vtab_cursor { + sqlite3_vtab_cursor base; + + FILE *output; /* result of popen */ + char *row; /* current row */ + size_t len; /* length of current row */ + size_t *col_starts; + int col_count; + + sqlite_int64 rowid; /* current row id */ +} gufi_vtab_cursor; + +/* + * run gufi_query, aggregating results into a single db file + * + * have to fork+exec - cannot link gufi_query in without changing + * everything to link dynamically + */ +static int gufi_query_aggregate_db(const char *indexroot, const char *threads, const gq_sql_t *sql, + FILE **output, char **errmsg) { + const char *argv[12] = { + "gufi_query", + "-u", + }; + + #define set_argv(argc, argv, flag, value) if (value) { argv[argc++] = flag; argv[argc++] = value; } + + int argc = 2; + set_argv(argc, argv, "-n", threads); + set_argv(argc, argv, "-T", sql->T); + set_argv(argc, argv, "-S", sql->S); + set_argv(argc, argv, "-E", sql->E); + + argv[argc++] = indexroot; + argv[argc] = NULL; + + size_t len = 0; + for(int i = 0; i < argc; i++) { + len += strlen(argv[i]) + 3; /* + 2 for quotes around each argument + 1 for space between args */ + } + + /* convert array of args to single string */ + char *cmd = malloc(len + 1); + char *curr = cmd; + for(int i = 0; i < argc; i++) { + /* FIXME: this should use single quotes to avoid potentially processing variables, but needs to be double quotes to handle strings in SQLite properly */ + curr += snprintf(curr, len + 1 - (curr - cmd), "\"%s\" ", argv[i]); + } + + /* pass command to popen */ + FILE *out = popen(cmd, "re"); + + free(cmd); + + if (!out) { + const int err = errno; + *errmsg = sqlite3_mprintf("popen failed: %s (%d)", strerror(err), err); + return SQLITE_ERROR; + } + + *output = out; + + return SQLITE_OK; +} + +/* space taken up by type and length */ +static const size_t TL = sizeof(char) + sizeof(size_t); + +/* read TLV rows terminated by newline - this only works because type is in the range [1, 5] */ +static int gufi_query_read_row(gufi_vtab_cursor *pCur) { + size_t size = sizeof(int); + char *buf = malloc(size); + char *curr = buf; + ptrdiff_t curr_offset = 0; + + size_t *starts = NULL; /* index of where each column starts in buf */ + int count = 0; /* number of columns */ + + // each row is prefixed with a count + if (fread(curr, sizeof(char), sizeof(int), pCur->output) != sizeof(int)) { + goto error; + } + + count = * (int *) curr; + starts = malloc(count * sizeof(size_t)); + + curr += sizeof(int); + + char *new_buf = NULL; + for(int i = 0; i < count; i++) { + starts[i] = curr - buf; + + /* add space for type and length */ + size += TL; + + curr_offset = curr - buf; + + /* reallocate buffer for type and length */ + new_buf = realloc(buf, size); + if (!new_buf) { + const int err = errno; + fprintf(stderr, "Error: Could not resize buffer for reading column type and length. New size: %zu: %s (%d)\n", + size, strerror(err), err); + goto error; + } + + buf = new_buf; + curr = buf + curr_offset; + + /* read type and length */ + const size_t tl = fread(curr, sizeof(char), TL, pCur->output); + if (tl != TL) { + fprintf(stderr, "Error: Could not read type and length from column %d\n", i); + goto error; + } + + const size_t value_len = * (size_t *) (curr + sizeof(char)); + + size += value_len; /* update buffer size with value length */ + curr += TL; /* to go to end of buffer/start of value */ + + curr_offset = curr - buf; + + /* allocate space for value */ + new_buf = realloc(buf, size); + if (!new_buf) { + const int err = errno; + fprintf(stderr, "Error: Could not resize buffer for reading column value. New size: %zu: %s (%d)\n", + size, strerror(err), err); + goto error; + } + + buf = new_buf; + curr = buf + curr_offset; + + const size_t v = fread(curr, sizeof(char), value_len, pCur->output); + if (v != value_len) { + fprintf(stderr, "Eror: Could not read %zu octets. Got %zu\n", value_len, v); + goto error; + } + + curr += value_len; + } + + pCur->row = buf; + pCur->len = size; + pCur->col_starts = starts; + pCur->col_count = count; + + return 0; + + error: + free(buf); + pCur->row = NULL; + pCur->len = 0; + free(starts); + pCur->col_starts = NULL; + pCur->col_count = 0; + return 1; +} + +/* generic connect function */ +static int gufi_vtConnect(sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr, + const char *schema, + const gq_sql_t *sql) { + (void) pAux; (void) pzErr; + (void) argc; (void) argv; + + gufi_vtab *pNew = NULL; + const int rc = sqlite3_declare_vtab(db, schema); + if(rc == SQLITE_OK){ + pNew = (gufi_vtab *)sqlite3_malloc( sizeof(*pNew) ); + if( pNew==0 ) return SQLITE_NOMEM; + memset(pNew, 0, sizeof(*pNew)); + pNew->sql = *sql; + /* sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY); */ + } + + *ppVtab = &pNew->base; + return rc; +} + +/* positional arguments to virtual table/table-valued function */ +#define GUFI_VT_ARGS_INDEXROOT 0 +#define GUFI_VT_ARGS_THREADS 1 +#define GUFI_VT_ARGS_T 2 +#define GUFI_VT_ARGS_S 3 +#define GUFI_VT_ARGS_COUNT 4 + +#define GUFI_VT_ARG_COLUMNS "indexroot TEXT HIDDEN, threads INT64 HIDDEN, " \ + "T TEXT HIDDEN, S TEXT HIDDEN, " + +#define GUFI_VT_EXTRA_COLUMNS "path TEXT, epath TEXT, fpath TEXT, rpath TEXT, " +#define GUFI_VT_EXTRA_COLUMNS_SQL "path(), epath(), fpath(), path(), " +#define GUFI_VT_EXTRA_COLUMNS_SQL_VR "path(), epath(), fpath(), rpath(sname, sroll), " + +#define GUFI_VT_ALL_COLUMNS GUFI_VT_ARG_COLUMNS \ + GUFI_VT_EXTRA_COLUMNS + +#define PENTRIES_SCHEMA(name, extra_cols) \ + "CREATE TABLE " name "(" extra_cols "name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, crtime INT64, ossint1 INT64, ossint2 INT64, ossint3 INT64, ossint4 INT64, osstext1 TEXT, osstext2 TEXT, pinode TEXT, ppinode TEXT);" + +#define VRSUMMARY_SCHEMA(name, extra_cols) \ + "CREATE TABLE " name "(" extra_cols "dname TEXT, sname TEXT, sroll INT64, srollsubdirs INT64, name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, rectype INT64, pinode TEXT, isroot INT64, rollupscore INT64);" + +#define VRPENTRIES_SCHEMA(name, extra_cols) \ + "CREATE TABLE " name "(" extra_cols "dname TEXT, sname TEXT, dmode INT64, dnlink INT64, duid INT64, dgid INT64, dsize INT64, dblksize INT64, dblocks INT64, datime INT64, dmtime INT64, dctime INT64, dlinkname TEXT, dtotfile INT64, dtotlinks INT64, dminuid INT64, dmaxuid INT64, dmingid INT64, dmaxgid INT64, dminsize INT64, dmaxsize INT64, dtotzero INT64, dtotltk INT64, dtotmtk INT64, totltm INT64, dtotmtm INT64, dtotmtg INT64, dtotmtt INT64, dtotsize INT64, dminctime INT64, dmaxctime INT64, dminmtime INT64, dmaxmtime INT64, dminatime INT64, dmaxatime INT64, dminblocks INT64, dmaxblocks INT64, dtotxattr INT64, ddepth INT64, dmincrtime INT64, dmaxcrtime INT64, sroll INT64, atroot INT64, srollsubdirs INT64, name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, crtime INT64, ossint1 INT64, ossint2 INT64, ossint3 INT64, ossint4 INT64, osstext1 TEXT, osstext2 TEXT, pinode TEXT, ppinode TEXT);" + +#define SELECT_FROM(name) \ + "SELECT " GUFI_VT_EXTRA_COLUMNS_SQL "* " \ + "FROM " name ";" \ + +#define SELECT_FROM_VR(name) \ + "SELECT " GUFI_VT_EXTRA_COLUMNS_SQL_VR "* " \ + "FROM " name ";" \ + +/* generate xConnect function for each virtual table */ +#define gufi_vt_xConnect(name, abbrev, t, s, e, vr) \ + static int gufi_vt_ ##abbrev ##Connect(sqlite3 *db, \ + void *pAux, \ + int argc, const char * const *argv, \ + sqlite3_vtab **ppVtab, \ + char **pzErr) { \ + /* this is what the virtual table looks like */ \ + static const char schema[] = \ + name ##_SCHEMA(name, GUFI_VT_ALL_COLUMNS); \ + \ + static const char select_from[] = SELECT_FROM(name); \ + static const char select_from_vr[] = SELECT_FROM_VR(name); \ + \ + /* this is the actual query */ \ + static const gq_sql_t sql = { \ + .T = t?select_from:NULL, \ + .S = s?(vr?select_from_vr:select_from):NULL, \ + .E = e?(vr?select_from_vr:select_from):NULL, \ + }; \ + \ + return gufi_vtConnect(db, pAux, argc, argv, ppVtab, \ + pzErr, schema, &sql); \ + } + +/* generate xConnect for each table/view */ +gufi_vt_xConnect(TREESUMMARY, T, 1, 0, 0, 0) +gufi_vt_xConnect(SUMMARY, S, 0, 1, 0, 0) +gufi_vt_xConnect(ENTRIES, E, 0, 0, 1, 0) +gufi_vt_xConnect(PENTRIES, P, 0, 0, 1, 0) +gufi_vt_xConnect(VRSUMMARY, VRS, 0, 1, 0, 1) +gufi_vt_xConnect(VRPENTRIES, VRP, 0, 0, 1, 1) + +/* FIXME: This is probably not correct */ +static int gufi_vtBestIndex(sqlite3_vtab *tab, + sqlite3_index_info *pIdxInfo) { + (void) tab; + + int argc = 0; /* number of input arguments */ + + const struct sqlite3_index_constraint *constraint = pIdxInfo->aConstraint; + for(int i = 0; i < pIdxInfo->nConstraint; i++, constraint++) { + if (constraint->op != SQLITE_INDEX_CONSTRAINT_EQ) { + continue; + } + + if (constraint->iColumn < GUFI_VT_ARGS_COUNT) { + pIdxInfo->aConstraintUsage[i].argvIndex = constraint->iColumn + 1; + pIdxInfo->aConstraintUsage[i].omit = 1; + argc++; + } + } + + /* index root not found */ + if (argc == 0) { + return SQLITE_CONSTRAINT; + } + + return SQLITE_OK; +} + +static int gufi_vtDisconnect(sqlite3_vtab *pVtab) { + sqlite3_free(pVtab); + return SQLITE_OK; +} + +static int gufi_vtOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) { + (void) p; + gufi_vtab_cursor *pCur = sqlite3_malloc( sizeof(*pCur) ); + if( pCur==0 ) return SQLITE_NOMEM; + memset(pCur, 0, sizeof(*pCur)); + *ppCursor = &pCur->base; + return SQLITE_OK; +} + +static int gufi_vtClose(sqlite3_vtab_cursor *cur) { + gufi_vtab_cursor *pCur = (gufi_vtab_cursor *) cur; + free(pCur->col_starts); + pCur->col_starts = NULL; + free(pCur->row); + pCur->row = NULL; + sqlite3_free(cur); + return SQLITE_OK; +} + +static int gufi_vtFilter(sqlite3_vtab_cursor *cur, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv) { + (void) idxNum; (void) idxStr; + + gufi_vtab_cursor *pCur = (gufi_vtab_cursor *) cur; + gufi_vtab *vtab = (gufi_vtab *) cur->pVtab; + + /* indexroot must be present */ + const char *indexroot = (const char *) sqlite3_value_text(argv[GUFI_VT_ARGS_INDEXROOT]); + const char *threads = NULL; + + if (argc > GUFI_VT_ARGS_THREADS) { + /* passing NULL in the SQL will result in a NULL pointer */ + if ((threads = (const char *) sqlite3_value_text(argv[GUFI_VT_ARGS_THREADS]))) { + + size_t nthreads = 0; + if ((sscanf(threads, "%zu", &nthreads) != 1) || (nthreads == 0)) { + vtab->base.zErrMsg = sqlite3_mprintf("Bad thread count: '%s'", threads); + return SQLITE_CONSTRAINT; + } + } + } + + #define set_str(argc, argv, idx, dst) \ + if (argc > idx) { \ + if (sqlite3_value_bytes(argv[idx]) != 0) { \ + dst = (const char *) sqlite3_value_text(argv[idx]); \ + } \ + } + + /* -T and -S can be changed */ + set_str(argc, argv, GUFI_VT_ARGS_T, vtab->sql.T); + set_str(argc, argv, GUFI_VT_ARGS_S, vtab->sql.S); + + /* kick off gufi_query */ + const int rc = gufi_query_aggregate_db(indexroot, threads, &vtab->sql, + &pCur->output, &vtab->base.zErrMsg); + if (rc != SQLITE_OK) { + return SQLITE_ERROR; + } + + pCur->rowid = 0; + pCur->row = NULL; + pCur->len = 0; + + /* wait for first row */ + gufi_query_read_row(pCur); + + return SQLITE_OK; +} + +static int gufi_vtNext(sqlite3_vtab_cursor *cur) { + gufi_vtab_cursor *pCur = (gufi_vtab_cursor *) cur; + + free(pCur->row); + pCur->row = NULL; + pCur->len = 0; + free(pCur->col_starts); + pCur->col_starts = NULL; + pCur->col_count = 0; + + /* no more to read or error */ + if (gufi_query_read_row(pCur) != 0) { + return SQLITE_OK; + } + + pCur->rowid++; + + return SQLITE_OK; +} + +static int gufi_vtEof(sqlite3_vtab_cursor *cur) { + gufi_vtab_cursor *pCur = (gufi_vtab_cursor *) cur; + + const int eof = (pCur->len <= sizeof(int)); + if (eof) { + pclose(pCur->output); + pCur->output = NULL; + } + + return eof; +} + +static int gufi_vtColumn(sqlite3_vtab_cursor *cur, + sqlite3_context *ctx, + int N) { + gufi_vtab_cursor *pCur = (gufi_vtab_cursor *) cur; + + const char *buf = pCur->row + pCur->col_starts[N - GUFI_VT_ARGS_COUNT]; + const int type = *buf; + const size_t len = * (size_t *) (buf + 1); + const char *col = buf + TL; + + switch(type) { + case SQLITE_INTEGER: + { + int value = 0; + if (sscanf(col, "%d", &value) == 1) { + sqlite3_result_int(ctx, value); + } + else { + sqlite3_result_text(ctx, col, len, SQLITE_TRANSIENT); + } + break; + } + /* GUFI does not have floating point columns */ + /* case SQLITE_FLOAT: */ + /* { */ + /* double value = 0; */ + /* if (sscanf(col, "%lf", &value) == 1) { */ + /* sqlite3_result_double(ctx, value); */ + /* } */ + /* else { */ + /* sqlite3_result_text(ctx, col, len, SQLITE_TRANSIENT); */ + /* } */ + /* break; */ + /* } */ + case SQLITE_TEXT: + case SQLITE_BLOB: + sqlite3_result_text(ctx, col, len, SQLITE_TRANSIENT); + break; + case SQLITE_NULL: + default: + sqlite3_result_text(ctx, col, len, SQLITE_TRANSIENT); + /* sqlite3_result_null(ctx); */ + break; + } + + return SQLITE_OK; +} + +static int gufi_vtRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { + gufi_vtab_cursor *pCur = (gufi_vtab_cursor *) cur; + *pRowid = pCur->rowid; + return SQLITE_OK; +} + +static const sqlite3_module gufi_vtModule = { + 0, /* iVersion */ + 0, /* xCreate */ + 0, /* xConnect */ + gufi_vtBestIndex, /* xBestIndex */ + 0, /* xDisconnect */ + 0, /* xDestroy */ + gufi_vtOpen, /* xOpen - open a cursor */ + gufi_vtClose, /* xClose - close a cursor */ + gufi_vtFilter, /* xFilter - configure scan constraints */ + gufi_vtNext, /* xNext - advance a cursor */ + gufi_vtEof, /* xEof - check for end of scan */ + gufi_vtColumn, /* xColumn - read data */ + gufi_vtRowid, /* xRowid - read data */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindFunction */ + 0, /* xRename */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0, /* xRollbackTo */ + 0, /* xShadowName */ + /* 0 /\* xIntegrity *\/ */ +}; + +#define create_module(module_name, constructor) \ + { \ + static sqlite3_module module; \ + memcpy(&module, &gufi_vtModule, sizeof(gufi_vtModule)); \ + module.xCreate = NULL; \ + module.xConnect = constructor; \ + module.xDisconnect = gufi_vtDisconnect; \ + module.xDestroy = NULL; \ + const int rc = sqlite3_create_module(db, module_name, &module, 0); \ + if (rc != SQLITE_OK) { \ + return rc; \ + } \ + } + +/* no underscore between gufi and vt for entry point */ +int sqlite3_gufivt_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi) { + (void) pzErrMsg; + + SQLITE_EXTENSION_INIT2(pApi); + + if (addqueryfuncs(db) != 0) { + return SQLITE_ERROR; + } + + create_module("gufi_vt_treesummary", gufi_vt_TConnect); + create_module("gufi_vt_summary", gufi_vt_SConnect); + create_module("gufi_vt_entries", gufi_vt_EConnect); + create_module("gufi_vt_pentries", gufi_vt_PConnect); + create_module("gufi_vt_vrsummary", gufi_vt_VRSConnect); + create_module("gufi_vt_vrpentries", gufi_vt_VRPConnect); + + return SQLITE_OK; +} diff --git a/src/histogram.c b/src/histogram.c index 6e3512834..6628c6587 100644 --- a/src/histogram.c +++ b/src/histogram.c @@ -119,7 +119,7 @@ int serialize_bucket(sqlite3_context *context, } /* log2_hist(string/value, bucket_count) */ -static void log2_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv) { +void log2_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv) { (void) argc; log2_hist_t *hist = (log2_hist_t *) sqlite3_aggregate_context(context, sizeof(*hist)); if (hist->buckets == NULL) { @@ -169,7 +169,7 @@ static ssize_t serialize_log2_bucket(char *curr, const size_t avail, void *key, return snprintf(curr, avail, "%zu:%zu;", exp, hist->buckets[exp]); } -static void log2_hist_final(sqlite3_context *context) { +void log2_hist_final(sqlite3_context *context) { log2_hist_t *hist = (log2_hist_t *) sqlite3_aggregate_context(context, sizeof(*hist)); if (hist->buckets == NULL) { sqlite3_result_text(context, "0;0;0;", -1, SQLITE_TRANSIENT); @@ -233,7 +233,7 @@ void log2_hist_free(log2_hist_t *hist) { } /* mode_hist(mode) */ -static void mode_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv) { +void mode_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv) { (void) argc; mode_hist_t *hist = (mode_hist_t *) sqlite3_aggregate_context(context, sizeof(*hist)); const mode_t mode = (mode_t) sqlite3_value_int(argv[0]) & 0777; @@ -246,7 +246,7 @@ static ssize_t serialize_mode_bucket(char *curr, const size_t avail, void *key, return snprintf(curr, avail, "%03o:%zu;", (unsigned int) mode, hist->buckets[mode]); } -static void mode_hist_final(sqlite3_context *context) { +void mode_hist_final(sqlite3_context *context) { mode_hist_t *hist = (mode_hist_t *) sqlite3_aggregate_context(context, sizeof(*hist)); size_t size = DEFAULT_HIST_ALLOC; @@ -299,7 +299,7 @@ typedef struct sqlite_time_hist { int init; } sqlite_time_hist_t; -static void time_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv) { +void time_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv) { (void) argc; sqlite_time_hist_t *hist = (sqlite_time_hist_t *) sqlite3_aggregate_context(context, sizeof(*hist)); if (hist->init == 0) { @@ -329,7 +329,7 @@ static ssize_t serialize_time_bucket(char *curr, const size_t avail, void *key, return snprintf(curr, avail, "%zu:%zu;", (size_t) TIME_BUCKETS[bucket].seconds, hist->buckets[bucket]); } -static void time_hist_final(sqlite3_context *context) { +void time_hist_final(sqlite3_context *context) { sqlite_time_hist_t *hist = (sqlite_time_hist_t *) sqlite3_aggregate_context(context, sizeof(*hist)); if (hist->init == 0) { @@ -414,7 +414,7 @@ typedef struct sqlite_category_hist { int keep_1; } sqlite_category_hist_t; -static void category_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv) { +void category_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv) { (void) argc; sqlite_category_hist_t *hist = (sqlite_category_hist_t *) sqlite3_aggregate_context(context, sizeof(*hist)); if (!hist->trie) { @@ -468,7 +468,7 @@ static void free_str(void *ptr) { free(str); } -static void category_hist_final(sqlite3_context *context) { +void category_hist_final(sqlite3_context *context) { sqlite_category_hist_t *hist = (sqlite_category_hist_t *) sqlite3_aggregate_context(context, sizeof(*hist)); if (!hist->trie) { hist->trie = trie_alloc(); @@ -658,7 +658,7 @@ void category_hist_free(category_hist_t *hist) { } /* add a histogram into an existing histogram */ -static void category_hist_combine_step(sqlite3_context *context, int argc, sqlite3_value **argv) { +void category_hist_combine_step(sqlite3_context *context, int argc, sqlite3_value **argv) { (void) argv; (void) argc; sqlite_category_hist_t *hist = (sqlite_category_hist_t *) sqlite3_aggregate_context(context, sizeof(*hist)); if (!hist->trie) { @@ -683,7 +683,7 @@ static ssize_t serialize_mode(char *curr, const size_t avail, void *key, void *d return snprintf(curr, avail, "%zu:%s:%zu;", mode->len, mode->data, *count); } -static void mode_count_final(sqlite3_context *context) { +void mode_count_final(sqlite3_context *context) { sqlite_category_hist_t *hist = (sqlite_category_hist_t *) sqlite3_aggregate_context(context, sizeof(*hist)); if (!hist->trie) { @@ -780,20 +780,3 @@ void mode_count_free(mode_count_t *mc) { free(mc->mode); free(mc); } - -int addhistfuncs(sqlite3 *db) { - return ( - (sqlite3_create_function(db, "log2_hist", 2, SQLITE_UTF8, - NULL, NULL, log2_hist_step, log2_hist_final) == SQLITE_OK) && - (sqlite3_create_function(db, "mode_hist", 1, SQLITE_UTF8, - NULL, NULL, mode_hist_step, mode_hist_final) == SQLITE_OK) && - (sqlite3_create_function(db, "time_hist", 2, SQLITE_UTF8, - NULL, NULL, time_hist_step, time_hist_final) == SQLITE_OK) && - (sqlite3_create_function(db, "category_hist", 2, SQLITE_UTF8, - NULL, NULL, category_hist_step, category_hist_final) == SQLITE_OK) && - (sqlite3_create_function(db, "category_hist_combine", 1, SQLITE_UTF8, - NULL, NULL, category_hist_combine_step, category_hist_final) == SQLITE_OK) && - (sqlite3_create_function(db, "mode_count", 1, SQLITE_UTF8, - NULL, NULL, category_hist_step, mode_count_final) == SQLITE_OK) - ); -} diff --git a/src/print.c b/src/print.c index 68e2fde8a..5af32b810 100644 --- a/src/print.c +++ b/src/print.c @@ -62,6 +62,7 @@ OF SUCH DAMAGE. +#include #include #include @@ -72,9 +73,10 @@ int print_parallel(void *args, int count, char **data, char **columns) { PrintArgs_t *print = (PrintArgs_t *) args; struct OutputBuffer *ob = print->output_buffer; + const int *types = print->types; size_t *lens = malloc(count * sizeof(size_t)); - size_t row_len = count - 1 + 1; /* one delimiter per column except last column + newline */ + size_t row_len = 0; for(int i = 0; i < count; i++) { lens[i] = 0; if (data[i]) { @@ -83,6 +85,14 @@ int print_parallel(void *args, int count, char **data, char **columns) { } } + if (types) { + row_len += sizeof(count); /* start row with column count */ + row_len += count * (sizeof(char) + sizeof(size_t)); /* type and length per column */ + } + else { + row_len += count - 1 + 1; /* one delimiter per column except last column + newline */ + } + /* if a row cannot fit the buffer for whatever reason, flush the existing buffer */ if ((ob->capacity - ob->filled) < row_len) { if (print->mutex) { @@ -100,16 +110,47 @@ int print_parallel(void *args, int count, char **data, char **columns) { if (print->mutex) { pthread_mutex_lock(print->mutex); } + + /* write column count */ + if (types) { + fwrite(&count, sizeof(char), sizeof(count), print->outfile); + } + const int last = count - 1; for(int i = 0; i < last; i++) { + if (types) { + const char col_type = types[i]; + fwrite(&col_type, sizeof(char), sizeof(col_type), print->outfile); + + fwrite(&lens[i], sizeof(char), sizeof(lens[i]), print->outfile); + } + if (data[i]) { fwrite(data[i], sizeof(char), lens[i], print->outfile); } - fwrite(&print->delim, sizeof(char), 1, print->outfile); + + if (!types) { + fwrite(&print->delim, sizeof(char), 1, print->outfile); + } } + /* print last column with no follow up delimiter */ - fwrite(data[last], sizeof(char), lens[last], print->outfile); - fwrite("\n", sizeof(char), 1, print->outfile); + + if (types) { + const char col_type = types[last]; + fwrite(&col_type, sizeof(char), sizeof(col_type), print->outfile); + + fwrite(&lens[last], sizeof(char), sizeof(lens[last]), print->outfile); + } + + if (data[last]) { + fwrite(data[last], sizeof(char), lens[last], print->outfile); + } + + if (!types) { + fwrite("\n", sizeof(char), 1, print->outfile); + } + ob->count++; if (print->mutex) { pthread_mutex_unlock(print->mutex); @@ -121,18 +162,37 @@ int print_parallel(void *args, int count, char **data, char **columns) { else { char *buf = ob->buf; size_t filled = ob->filled; + + /* write column count */ + if (types) { + memcpy(&buf[filled], &count, sizeof(count)); + filled += sizeof(count); + } + for(int i = 0; i < count; i++) { + if (types) { + buf[filled] = types[i]; + filled++; + + memcpy(&buf[filled], &lens[i], sizeof(lens[i])); + filled += sizeof(lens[i]); + } + if (data[i]) { memcpy(&buf[filled], data[i], lens[i]); filled += lens[i]; } - buf[filled] = print->delim; - filled++; + if (!types) { + buf[filled] = print->delim; + filled++; + } } - /* replace final delimiter with newline */ - buf[filled - 1] = '\n'; + if (!types) { + /* replace final delimiter with newline */ + buf[filled - 1] = '\n'; + } ob->filled = filled; ob->count++; diff --git a/test/regression/CMakeLists.txt b/test/regression/CMakeLists.txt index d48936d11..ea40a9f79 100644 --- a/test/regression/CMakeLists.txt +++ b/test/regression/CMakeLists.txt @@ -85,6 +85,7 @@ set(BINARIES gufi_query gufi_stat_bin querydbs + gufi_vt ) set(PYTHON diff --git a/test/regression/gufi_query.expected b/test/regression/gufi_query.expected index b638e2e2c..7cee5d1f2 100644 --- a/test/regression/gufi_query.expected +++ b/test/regression/gufi_query.expected @@ -14,6 +14,7 @@ options: -o output file (one-per-thread, with thread-id suffix) -d delimiter (one char) [use 'x' for 0x1E] -O output DB + -u prefix row with 1 int column count and each column with 1 octet type and 1 size_t length -I SQL init -F SQL cleanup -y minimum level to go down @@ -219,6 +220,19 @@ prefix/repeat_name prefix/unusual#? directory , prefix/unusual#? directory ,/unusual, name?# +# Output TLV columns (no aggregation) +$ gufi_query -u -n 2 -E "SELECT name, size FROM vrpentries WHERE name == '.hidden';" "prefix" | od -x --endian=big +0000000 0200 0000 0307 0000 0000 0000 002e 6869 +0000020 6464 656e 0102 0000 0000 0000 0031 3000 +0000037 + +# Output TLV columns (with aggregation) +$ gufi_query -u -d " " -n 2 -a -I "CREATE TABLE out(path TEXT, size INT64);" -K "CREATE TABLE aggregate(path TEXT, size INT64);" -S "INSERT INTO out SELECT rpath(sname, sroll), size FROM vrsummary;" -E "INSERT INTO out SELECT rpath(sname, sroll) || '/' || name, size FROM vrpentries;" -J "INSERT INTO aggregate SELECT path, size FROM out;" -G "SELECT path, size FROM aggregate ORDER BY path ASC LIMIT 1;" "prefix" | od -x --endian=big +0000000 0200 0000 030d 0000 0000 0000 0073 6561 +0000020 7263 682f 7072 6566 6978 0102 0000 0000 +0000040 0000 0031 3700 +0000045 + ##################################### # Invalid Inputs # ##################################### diff --git a/test/regression/gufi_query.sh.in b/test/regression/gufi_query.sh.in index 52b2cd448..24543de82 100755 --- a/test/regression/gufi_query.sh.in +++ b/test/regression/gufi_query.sh.in @@ -105,6 +105,12 @@ run_sort "${GUFI_QUERY} -d \" \" -n ${THREADS} -S \"SELECT rpath(sname, sroll) F echo "# Get all directory and non-directory names and their xattrs" run_sort "${GUFI_QUERY} -d \" \" -n ${THREADS} -S \"SELECT rpath(sname, sroll), xattr_name, xattr_value FROM vrxsummary;\" -E \"SELECT rpath(sname, sroll) || '/' || name, xattr_name, xattr_value FROM vrxpentries;\" -x \"${INDEXROOT}\"" +echo "# Output TLV columns (no aggregation)" +run_no_sort "${GUFI_QUERY} -u -n ${THREADS} -E \"SELECT name, size FROM vrpentries WHERE name == '.hidden';\" \"${INDEXROOT}\" | od -x --endian=big" + +echo "# Output TLV columns (with aggregation)" +run_no_sort "${GUFI_QUERY} -u -d \" \" -n ${THREADS} -a -I \"CREATE TABLE out(path TEXT, size INT64);\" -K \"CREATE TABLE aggregate(path TEXT, size INT64);\" -S \"INSERT INTO out SELECT rpath(sname, sroll), size FROM vrsummary;\" -E \"INSERT INTO out SELECT rpath(sname, sroll) || '/' || name, size FROM vrpentries;\" -J \"INSERT INTO aggregate SELECT path, size FROM out;\" -G \"SELECT path, size FROM aggregate ORDER BY path ASC LIMIT 1;\" \"${INDEXROOT}\" | od -x --endian=big" + echo "#####################################" echo "# Invalid Inputs #" echo "#####################################" diff --git a/test/regression/gufi_sqlite3.expected b/test/regression/gufi_sqlite3.expected index bc2202536..230f02b2d 100644 --- a/test/regression/gufi_sqlite3.expected +++ b/test/regression/gufi_sqlite3.expected @@ -43,6 +43,23 @@ $ (echo "CREATE TABLE new_table(i INT);"; echo "INSERT INTO new_table VALUES (6) $ gufi_sqlite3 "search" Error: Could not open database file "search" +# virtual table +$ echo "SELECT size, path || '/' || name FROM gufi_vt_pentries('prefix') ORDER BY size ASC;" | gufi_sqlite3 -d "|" +0|prefix/old_file +1|prefix/directory/executable +2|prefix/directory/readonly +3|prefix/directory/writable +4|prefix/directory/subdirectory/directory_symlink +5|prefix/directory/subdirectory/repeat_name +9|prefix/file_symlink +10|prefix/.hidden +11|prefix/leaf_directory/leaf_file1 +12|prefix/leaf_directory/leaf_file2 +14|prefix/repeat_name +15|prefix/unusual#? directory ,/unusual, name?# +1024|prefix/1KB +1048576|prefix/1MB + # bad SQL $ (echo "CREATE TABLE;") | gufi_sqlite3 Error: SQL error: near ";": syntax error diff --git a/test/regression/gufi_sqlite3.sh.in b/test/regression/gufi_sqlite3.sh.in index 097e56b9c..b86d71d2b 100755 --- a/test/regression/gufi_sqlite3.sh.in +++ b/test/regression/gufi_sqlite3.sh.in @@ -114,6 +114,9 @@ run_no_sort "(echo \"CREATE TABLE new_table(i INT);\";" \ echo "# directory as db" run_no_sort "${GUFI_SQLITE3} \"${SEARCH}\"" +echo "# virtual table" +PATH="@CMAKE_BINARY_DIR@/src:${PATH}" run_no_sort "echo \"SELECT size, path || '/' || name FROM gufi_vt_pentries('${INDEXROOT}') ORDER BY size ASC;\" | ${GUFI_SQLITE3} -d \"|\"" + echo "# bad SQL" run_no_sort "(echo \"CREATE TABLE;\") |" \ "${GUFI_SQLITE3}" diff --git a/test/regression/gufi_vt.expected b/test/regression/gufi_vt.expected new file mode 100644 index 000000000..0cedb8b4e --- /dev/null +++ b/test/regression/gufi_vt.expected @@ -0,0 +1,236 @@ +# Generate treesummary tables in all directories +$ gufi_treesummary_all "prefix" + +# Query treesummary +$ ( + echo ".load gufi_vt" + echo "SELECT minsize, maxsize, minmtime, maxmtime FROM gufi_vt_treesummary('prefix', 2) ORDER BY minsize ASC, maxsize ASC;" +) | sqlite3 +-1|0|-1|0 +0|1048576|0|1048576 +1|5|1|5 +5|5|4|5 +11|12|11|12 +15|15|15|15 + +# Query summary +$ ( + echo ".load gufi_vt" + echo "SELECT name, size, modetotxt(mode), strftime('%a %b %d %H:%M:%S UTC %Y', mtime) FROM gufi_vt_summary('prefix', 2) ORDER BY name ASC, size ASC;" +) | sqlite3 +directory|7|drwxrwxr-x|Thu Jan 01 00:00:07 UTC 1970 +empty_directory|8|drwxrwxr-x|Thu Jan 01 00:00:08 UTC 1970 +leaf_directory|13|drwxrwxr-x|Thu Jan 01 00:00:13 UTC 1970 +prefix|17|drwxrwxr-x|Thu Jan 01 00:00:17 UTC 1970 +subdirectory|6|drwxrwxr-x|Thu Jan 01 00:00:06 UTC 1970 +unusual#? directory ,|16|drwxrwxr-x|Thu Jan 01 00:00:16 UTC 1970 + +# Query entries +$ ( + echo ".load gufi_vt" + echo "SELECT name, size, modetotxt(mode), strftime('%a %b %d %H:%M:%S UTC %Y', mtime) FROM gufi_vt_entries('prefix', 2) ORDER BY name ASC, size ASC;" +) | sqlite3 +.hidden|10|-rw-rw-r--|Thu Jan 01 00:00:10 UTC 1970 +1KB|1024|-rw-rw-r--|Thu Jan 01 00:17:04 UTC 1970 +1MB|1048576|-rw-rw-r--|Tue Jan 13 03:16:16 UTC 1970 +directory_symlink|4|lrwxrwxrwx|Thu Jan 01 00:00:04 UTC 1970 +executable|1|-rwxrwxrwx|Thu Jan 01 00:00:01 UTC 1970 +file_symlink|9|lrwxrwxrwx|Thu Jan 01 00:00:09 UTC 1970 +leaf_file1|11|-rw-rw-r--|Thu Jan 01 00:00:11 UTC 1970 +leaf_file2|12|-rw-rw-r--|Thu Jan 01 00:00:12 UTC 1970 +old_file|0|-rw-rw-r--|Thu Jan 01 00:00:00 UTC 1970 +readonly|2|-r--r--r--|Thu Jan 01 00:00:02 UTC 1970 +repeat_name|5|-rw-rw-r--|Thu Jan 01 00:00:05 UTC 1970 +repeat_name|14|-rw-rw-r--|Thu Jan 01 00:00:14 UTC 1970 +unusual, name?#|15|-rw-rw-r--|Thu Jan 01 00:00:15 UTC 1970 +writable|3|-rw-rw-rw-|Thu Jan 01 00:00:03 UTC 1970 + +# Query pentries +$ ( + echo ".load gufi_vt" + echo "SELECT name, size, modetotxt(mode), strftime('%a %b %d %H:%M:%S UTC %Y', mtime) FROM gufi_vt_pentries('prefix', 2) ORDER BY name ASC, size ASC;" +) | sqlite3 +.hidden|10|-rw-rw-r--|Thu Jan 01 00:00:10 UTC 1970 +1KB|1024|-rw-rw-r--|Thu Jan 01 00:17:04 UTC 1970 +1MB|1048576|-rw-rw-r--|Tue Jan 13 03:16:16 UTC 1970 +directory_symlink|4|lrwxrwxrwx|Thu Jan 01 00:00:04 UTC 1970 +executable|1|-rwxrwxrwx|Thu Jan 01 00:00:01 UTC 1970 +file_symlink|9|lrwxrwxrwx|Thu Jan 01 00:00:09 UTC 1970 +leaf_file1|11|-rw-rw-r--|Thu Jan 01 00:00:11 UTC 1970 +leaf_file2|12|-rw-rw-r--|Thu Jan 01 00:00:12 UTC 1970 +old_file|0|-rw-rw-r--|Thu Jan 01 00:00:00 UTC 1970 +readonly|2|-r--r--r--|Thu Jan 01 00:00:02 UTC 1970 +repeat_name|5|-rw-rw-r--|Thu Jan 01 00:00:05 UTC 1970 +repeat_name|14|-rw-rw-r--|Thu Jan 01 00:00:14 UTC 1970 +unusual, name?#|15|-rw-rw-r--|Thu Jan 01 00:00:15 UTC 1970 +writable|3|-rw-rw-rw-|Thu Jan 01 00:00:03 UTC 1970 + +# Query vrsummary +$ ( + echo ".load gufi_vt" + echo "SELECT name, size, modetotxt(mode), strftime('%a %b %d %H:%M:%S UTC %Y', mtime) FROM gufi_vt_vrsummary('prefix', 2) ORDER BY name ASC, size ASC;" +) | sqlite3 +directory|7|drwxrwxr-x|Thu Jan 01 00:00:07 UTC 1970 +empty_directory|8|drwxrwxr-x|Thu Jan 01 00:00:08 UTC 1970 +leaf_directory|13|drwxrwxr-x|Thu Jan 01 00:00:13 UTC 1970 +prefix|17|drwxrwxr-x|Thu Jan 01 00:00:17 UTC 1970 +subdirectory|6|drwxrwxr-x|Thu Jan 01 00:00:06 UTC 1970 +unusual#? directory ,|16|drwxrwxr-x|Thu Jan 01 00:00:16 UTC 1970 + +# Query vrpentries +$ ( + echo ".load gufi_vt" + echo "SELECT name, size, modetotxt(mode), strftime('%a %b %d %H:%M:%S UTC %Y', mtime) FROM gufi_vt_vrpentries('prefix', 2) ORDER BY name ASC, size ASC;" +) | sqlite3 +.hidden|10|-rw-rw-r--|Thu Jan 01 00:00:10 UTC 1970 +1KB|1024|-rw-rw-r--|Thu Jan 01 00:17:04 UTC 1970 +1MB|1048576|-rw-rw-r--|Tue Jan 13 03:16:16 UTC 1970 +directory_symlink|4|lrwxrwxrwx|Thu Jan 01 00:00:04 UTC 1970 +executable|1|-rwxrwxrwx|Thu Jan 01 00:00:01 UTC 1970 +file_symlink|9|lrwxrwxrwx|Thu Jan 01 00:00:09 UTC 1970 +leaf_file1|11|-rw-rw-r--|Thu Jan 01 00:00:11 UTC 1970 +leaf_file2|12|-rw-rw-r--|Thu Jan 01 00:00:12 UTC 1970 +old_file|0|-rw-rw-r--|Thu Jan 01 00:00:00 UTC 1970 +readonly|2|-r--r--r--|Thu Jan 01 00:00:02 UTC 1970 +repeat_name|5|-rw-rw-r--|Thu Jan 01 00:00:05 UTC 1970 +repeat_name|14|-rw-rw-r--|Thu Jan 01 00:00:14 UTC 1970 +unusual, name?#|15|-rw-rw-r--|Thu Jan 01 00:00:15 UTC 1970 +writable|3|-rw-rw-rw-|Thu Jan 01 00:00:03 UTC 1970 + +# Query with WHERE size < 10 +$ ( + echo ".load gufi_vt" + echo "SELECT name, size FROM gufi_vt_pentries('prefix', 2) WHERE size < 10 ORDER BY name ASC, size ASC;" +) | sqlite3 +directory_symlink|4 +executable|1 +file_symlink|9 +old_file|0 +readonly|2 +repeat_name|5 +writable|3 + +# Query with WHERE size > 10 +$ ( + echo ".load gufi_vt" + echo "SELECT name, size FROM gufi_vt_pentries('prefix', 2) WHERE size > 10 ORDER BY name ASC, size ASC;" +) | sqlite3 +1KB|1024 +1MB|1048576 +leaf_file1|11 +leaf_file2|12 +repeat_name|14 +unusual, name?#|15 + +# Query entries in directory where name == 'directory' +$ ( + echo ".load gufi_vt" + echo "SELECT name FROM gufi_vt_pentries('prefix', 2, NULL, 'SELECT NULL, NULL, NULL, NULL, * FROM summary WHERE name == ''directory'';') ORDER BY name ASC, size ASC;" +) | sqlite3 +directory +executable +readonly +writable + +# Query directories that contain entries larger than 1024 (only 1: prefix) +# -S needs to print out at least max_col_id(pentries), so using * +$ ( + echo ".load gufi_vt" + echo "SELECT rowid, name, size, mtime FROM gufi_vt_pentries('prefix', 2, NULL, 'SELECT NULL, NULL, NULL, NULL, * FROM summary WHERE maxsize > 1024;') WHERE type != 'd' ORDER BY rowid ASC;" +) | sqlite3 +1|.hidden|10|10 +2|1KB|1024|1024 +3|1MB|1048576|1048576 +4|file_symlink|9|9 +5|old_file|0|0 +6|repeat_name|14|14 + +# Paths +$ ( + echo ".load gufi_vt" + echo "SELECT path, epath, fpath, rpath FROM gufi_vt_pentries('prefix', 2) WHERE name == '.hidden';" +) | sqlite3 +prefix|prefix|prefix|prefix + +# Make sure all types work +$ ( + echo ".load gufi_vt" + echo "SELECT name, size, size * 1.0, CAST(name AS BLOB), NULL FROM gufi_vt_pentries('prefix', 2) WHERE name == '.hidden';" +) | sqlite3 +.hidden|10|10.0|.hidden| + +# Missing thread count (not an error) +$ ( + echo ".load gufi_vt" + echo "SELECT name FROM gufi_vt_pentries('prefix') ORDER BY name ASC, size ASC;" +) | sqlite3 +.hidden +1KB +1MB +directory_symlink +executable +file_symlink +leaf_file1 +leaf_file2 +old_file +readonly +repeat_name +repeat_name +unusual, name?# +writable + +# NULL thread count (not an error) +$ ( + echo ".load gufi_vt" + echo "SELECT name FROM gufi_vt_pentries('prefix', NULL) ORDER BY name ASC, size ASC;" +) | sqlite3 +.hidden +1KB +1MB +directory_symlink +executable +file_symlink +leaf_file1 +leaf_file2 +old_file +readonly +repeat_name +repeat_name +unusual, name?# +writable + +# Missing indexroot (error) +$ ( + echo ".load gufi_vt" + echo "SELECT name FROM gufi_vt_pentries();" +) | sqlite3 +Parse error near line 2: no query solution + +# Bad indexroot +$ ( + echo ".load gufi_vt" + echo "SELECT name FROM gufi_vt_pentries('baddir') ORDER BY name ASC, size ASC;" +) | sqlite3 +Could not get realpath of "baddir": No such file or directory (2) + +# Empty string thread count (error) +$ ( + echo ".load gufi_vt" + echo "SELECT name FROM gufi_vt_pentries('prefix', '') ORDER BY name ASC, size ASC;" +) | sqlite3 +Runtime error near line 2: Bad thread count: '' (19) + +# Zero thread count (error) +$ ( + echo ".load gufi_vt" + echo "SELECT name FROM gufi_vt_pentries('prefix', 0) ORDER BY name ASC, size ASC;" +) | sqlite3 +Runtime error near line 2: Bad thread count: '0' (19) + +# Bad SQL +$ ( + echo ".load gufi_vt" + echo "SELECT name FROM gufi_vt_pentries('prefix', NULL, 'bad SQL') ORDER BY name ASC, size ASC;" +) | sqlite3 +Error: Could not prepare 'bad SQL' for getting column types: SQL logic error (1) + diff --git a/test/regression/gufi_vt.sh.in b/test/regression/gufi_vt.sh.in new file mode 100755 index 000000000..b6e4c28d5 --- /dev/null +++ b/test/regression/gufi_vt.sh.in @@ -0,0 +1,156 @@ +#!/usr/bin/env bash +# This file is part of GUFI, which is part of MarFS, which is released +# under the BSD license. +# +# +# Copyright (c) 2017, Los Alamos National Security (LANS), LLC +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation and/or +# other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# +# From Los Alamos National Security, LLC: +# LA-CC-15-039 +# +# Copyright (c) 2017, Los Alamos National Security, LLC All rights reserved. +# Copyright 2017. Los Alamos National Security, LLC. This software was produced +# under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National +# Laboratory (LANL), which is operated by Los Alamos National Security, LLC for +# the U.S. Department of Energy. The U.S. Government has rights to use, +# reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS +# ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR +# ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is +# modified to produce derivative works, such modified software should be +# clearly marked, so as not to confuse it with the version available from +# LANL. +# +# THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +# OF SUCH DAMAGE. + + + +set -e +source @CMAKE_CURRENT_BINARY_DIR@/setup.sh 1 + +OUTPUT="gufi_vt.out" + +LOAD=".load @CMAKE_BINARY_DIR@/src/gufi_vt" +BADDIR=$(mktemp -d "${INDEXROOT}.XXXXXX") + +cleanup() { + rm -rf "${BADDIR}" +} + +cleanup_exit() { + cleanup + setup_cleanup +} + +trap cleanup_exit EXIT + +cleanup + +query_vt() { + sql="$1" + echo "$ (" + echo " echo \"${LOAD}\"" + echo " echo \"${sql}\"" + echo ") | ${SQLITE3}" + ( + echo "${LOAD}" + echo "${sql}" + ) | PATH="@CMAKE_BINARY_DIR@/src:${PATH}" "${SQLITE3}" 2>&1 + echo +} + +( +echo "# Generate treesummary tables in all directories" +run_no_sort "${GUFI_TREESUMMARY_ALL} \"${INDEXROOT}\"" | sed '/^Started .*$/d' + +echo "# Query treesummary" +query_vt "SELECT minsize, maxsize, minmtime, maxmtime FROM gufi_vt_treesummary('${INDEXROOT}', ${THREADS}) ORDER BY minsize ASC, maxsize ASC;" + +for name in summary entries pentries vrsummary vrpentries +do + echo "# Query ${name}" + query_vt "SELECT name, size, modetotxt(mode), strftime('%a %b %d %H:%M:%S UTC %Y', mtime) FROM gufi_vt_${name}('${INDEXROOT}', ${THREADS}) ORDER BY name ASC, size ASC;" +done + +echo "# Query with WHERE size < 10" +query_vt "SELECT name, size FROM gufi_vt_pentries('${INDEXROOT}', ${THREADS}) WHERE size < 10 ORDER BY name ASC, size ASC;" + +echo "# Query with WHERE size > 10" +query_vt "SELECT name, size FROM gufi_vt_pentries('${INDEXROOT}', ${THREADS}) WHERE size > 10 ORDER BY name ASC, size ASC;" + +echo "# Query entries in directory where name == 'directory'" +query_vt "SELECT name FROM gufi_vt_pentries('${INDEXROOT}', ${THREADS}, NULL, 'SELECT NULL, NULL, NULL, NULL, * FROM summary WHERE name == ''directory'';') ORDER BY name ASC, size ASC;" + +echo "# Query directories that contain entries larger than 1024 (only 1: ${INDEXROOT})" +echo "# -S needs to print out at least max_col_id(pentries), so using *" # FIXME +query_vt "SELECT rowid, name, size, mtime FROM gufi_vt_pentries('${INDEXROOT}', ${THREADS}, NULL, 'SELECT NULL, NULL, NULL, NULL, * FROM summary WHERE maxsize > 1024;') WHERE type != 'd' ORDER BY rowid ASC;" + +echo "# Paths" +query_vt "SELECT path, epath, fpath, rpath FROM gufi_vt_pentries('${INDEXROOT}', ${THREADS}) WHERE name == '.hidden';" + +echo "# Make sure all types work" +query_vt "SELECT name, size, size * 1.0, CAST(name AS BLOB), NULL FROM gufi_vt_pentries('${INDEXROOT}', ${THREADS}) WHERE name == '.hidden';" + +echo "# Missing thread count (not an error)" +query_vt "SELECT name FROM gufi_vt_pentries('${INDEXROOT}') ORDER BY name ASC, size ASC;" + +echo "# NULL thread count (not an error)" +query_vt "SELECT name FROM gufi_vt_pentries('${INDEXROOT}', NULL) ORDER BY name ASC, size ASC;" + +set +e +echo "# Missing indexroot (error)" +query_vt "SELECT name FROM gufi_vt_pentries();" + +echo "# Bad indexroot" +rm -rf "${BADDIR}" +query_vt "SELECT name FROM gufi_vt_pentries('${BADDIR}') ORDER BY name ASC, size ASC;" | sed "s/${BADDIR//\//\\/}/baddir/g" + +echo "# Empty string thread count (error)" +query_vt "SELECT name FROM gufi_vt_pentries('${INDEXROOT}', '') ORDER BY name ASC, size ASC;" + +echo "# Zero thread count (error)" +query_vt "SELECT name FROM gufi_vt_pentries('${INDEXROOT}', 0) ORDER BY name ASC, size ASC;" + +echo "# Bad SQL" +query_vt "SELECT name FROM gufi_vt_pentries('${INDEXROOT}', NULL, 'bad SQL') ORDER BY name ASC, size ASC;" +set -e +) | replace | tee "${OUTPUT}" + +@DIFF@ @CMAKE_CURRENT_BINARY_DIR@/gufi_vt.expected "${OUTPUT}" +rm "${OUTPUT}" diff --git a/test/regression/setup.sh.in b/test/regression/setup.sh.in index 5f7237214..7932e0ece 100755 --- a/test/regression/setup.sh.in +++ b/test/regression/setup.sh.in @@ -168,6 +168,7 @@ GUFI_TRACE2INDEX="@CMAKE_BINARY_DIR@/src/gufi_trace2index" GUFI_TREESUMMARY="@CMAKE_BINARY_DIR@/src/gufi_treesummary" GUFI_TREESUMMARY_ALL="@CMAKE_BINARY_DIR@/src/gufi_treesummary_all" GUFI_UNROLLUP="@CMAKE_BINARY_DIR@/src/gufi_unrollup" +GUFI_VT="@CMAKE_BINARY_DIR@/src/gufi_vt" LONGITUDINAL_SNAPSHOT="@CMAKE_BINARY_DIR@/contrib/longitudinal_snapshot.py" OLDBIGFILES="@CMAKE_BINARY_DIR@/examples/oldbigfiles" PARALLEL_CPR="@CMAKE_BINARY_DIR@/src/parallel_cpr" @@ -215,6 +216,7 @@ replace() { s/${GUFI_TREESUMMARY//\//\\/}/gufi_treesummary/g; s/${GUFI_TREESUMMARY_ALL//\//\\/}/gufi_treesummary_all/g; s/${GUFI_UNROLLUP//\//\\/}/gufi_unrollup/g; + s/${GUFI_VT//\//\\/}/gufi_vt/g s/${LONGITUDINAL_SNAPSHOT//\//\\/}/longitudinal_snapshot.py/g; s/${OLDBIGFILES//\//\\/}/oldbigfiles/g; s/${PARALLEL_CPR//\//\\/}/parallel_cpr/g; diff --git a/test/unit/googletest/CMakeLists.txt b/test/unit/googletest/CMakeLists.txt index b99568b57..f0ece9af2 100644 --- a/test/unit/googletest/CMakeLists.txt +++ b/test/unit/googletest/CMakeLists.txt @@ -77,6 +77,7 @@ if (CMAKE_CXX_COMPILER) bf.cpp compress.cpp debug.cpp + handle_sql.cpp histogram.cpp print.cpp swap.cpp diff --git a/test/unit/googletest/PoolArgs.cpp b/test/unit/googletest/PoolArgs.cpp index 2213a17ac..8c8347560 100644 --- a/test/unit/googletest/PoolArgs.cpp +++ b/test/unit/googletest/PoolArgs.cpp @@ -124,6 +124,7 @@ void test_common(PoolArgs *pa) { print.mutex = nullptr; print.outfile = file; print.rows = 0; + print.types = nullptr; // read from the database being processed // no need for WHERE - there should only be 1 table diff --git a/test/unit/googletest/bf.cpp b/test/unit/googletest/bf.cpp index 67785fd70..531387b88 100644 --- a/test/unit/googletest/bf.cpp +++ b/test/unit/googletest/bf.cpp @@ -85,6 +85,7 @@ static const std::string n = "-n"; static const std::string n_arg = "1"; static const std::string d = "-d"; static const std::string d_arg = "|"; static const std::string o = "-o"; static const std::string o_arg = "o arg"; static const std::string O = "-O"; static const std::string O_arg = "O arg"; +static const std::string u = "-u"; static const std::string I = "-I"; static const std::string I_arg = "I arg"; static const std::string T = "-T"; static const std::string T_arg = "T arg"; static const std::string S = "-S"; static const std::string S_arg = "S arg"; @@ -151,6 +152,7 @@ static void check_input(struct input *in, const bool helped, EXPECT_EQ(in->printrows, flags); EXPECT_EQ(in->buildindex, flags); EXPECT_EQ(in->andor, flags); + EXPECT_EQ(in->types.prefix, flags); EXPECT_EQ(in->insertfl, flags); EXPECT_EQ(in->insertdir, flags); EXPECT_EQ(in->suspectd, flags); @@ -263,7 +265,7 @@ TEST(parse_cmd_line, help) { } TEST(parse_cmd_line, debug) { - const char opts[] = "HxpPNVban:d:i:t:o:O:I:T:S:E:F:rRYZW:A:g:c:y:z:J:K:G:mB:wf:jXL:k:M:C:" COMPRESS_OPT "qQ:s:"; + const char opts[] = "HxpPNVban:d:i:t:o:O:uI:T:S:E:F:rRYZW:A:g:c:y:z:J:K:G:mB:wf:jXL:k:M:C:" COMPRESS_OPT "qQ:s:"; const char *argv[] = { exec.c_str(), @@ -277,6 +279,7 @@ TEST(parse_cmd_line, debug) { a.c_str(), n.c_str(), n_arg.c_str(), d.c_str(), d_arg.c_str(), + u.c_str(), I.c_str(), I_arg.c_str(), T.c_str(), T_arg.c_str(), S.c_str(), S_arg.c_str(), @@ -331,7 +334,7 @@ TEST(parse_cmd_line, debug) { } TEST(parse_cmd_line, flags) { - const char opts[] = "xpPNVbarRYZmwjX" COMPRESS_OPT "q"; + const char opts[] = "xpPNVbaurRYZmwjX" COMPRESS_OPT "q"; const char *argv[] = { exec.c_str(), @@ -342,6 +345,7 @@ TEST(parse_cmd_line, flags) { V.c_str(), b.c_str(), a.c_str(), + u.c_str(), r.c_str(), R.c_str(), Y.c_str(), diff --git a/test/unit/googletest/dbutils.cpp.in b/test/unit/googletest/dbutils.cpp.in index 7d80ebb46..7d7860ac4 100644 --- a/test/unit/googletest/dbutils.cpp.in +++ b/test/unit/googletest/dbutils.cpp.in @@ -815,3 +815,81 @@ TEST(bottomup_collect_treesummary, nullptr) { sll_destroy(&sll, nullptr); EXPECT_EQ(rmdir(dirname), 0); } + +static const char CREATE_TABLE_TEST[] = "CREATE TABLE test(i INT, f FLOAT, t TEXT, b BLOB, n NULL, d DATE);"; + +TEST(get_col_types, have_cols) { + sqlite3 *db = nullptr; + refstr_t str = {}; + int cols = 0; + + ASSERT_EQ(sqlite3_open_v2(SQLITE_MEMORY, &db, + SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_URI, + nullptr), SQLITE_OK); + + ASSERT_EQ(sqlite3_exec(db, CREATE_TABLE_TEST, + nullptr, nullptr, nullptr), SQLITE_OK); + + str.data = "SELECT * FROM test;"; + str.len = strlen(str.data); + + int *types = get_col_types(db, &str, &cols); + ASSERT_NE(types, nullptr); + + const int expected[] = { SQLITE_INTEGER, SQLITE_FLOAT, SQLITE_TEXT, SQLITE_BLOB, SQLITE_NULL, 0}; + EXPECT_EQ((std::size_t) cols, sizeof(expected) / sizeof(expected[0])); + + for(std::size_t i = 0; i < (sizeof(expected) / sizeof(expected[0])); i++) { + EXPECT_EQ(types[i], expected[i]); + } + + free(types); + + sqlite3_close(db); +} + +TEST(get_col_types, no_cols) { + sqlite3 *db = nullptr; + refstr_t str = {}; + int cols = 0; + + ASSERT_EQ(sqlite3_open_v2(SQLITE_MEMORY, &db, + SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_URI, + nullptr), SQLITE_OK); + + ASSERT_EQ(sqlite3_exec(db, CREATE_TABLE_TEST, + nullptr, nullptr, nullptr), SQLITE_OK); + + str.data = "INSERT INTO test (i) VALUES (0);"; + str.len = strlen(str.data); + + int *types = get_col_types(db, &str, &cols); + EXPECT_EQ(types, nullptr); + + sqlite3_close(db); +} + +TEST(get_col_types, bad) { + sqlite3 *db = nullptr; + refstr_t str = {}; + int cols = 0; + + ASSERT_EQ(sqlite3_open_v2(SQLITE_MEMORY, &db, + SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_URI, + nullptr), + SQLITE_OK); + ASSERT_EQ(sqlite3_exec(db, CREATE_TABLE_TEST, + nullptr, nullptr, nullptr), SQLITE_OK); + + EXPECT_EQ(get_col_types(nullptr, &str, &cols), nullptr); + EXPECT_EQ(get_col_types(db, &str, &cols), nullptr); + // &cols == nullptr will break + + str.data = "bad SQL"; + str.len = strlen(str.data); + + int *types = get_col_types(db, &str, &cols); + EXPECT_EQ(types, nullptr); + + sqlite3_close(db); +} diff --git a/test/unit/googletest/handle_sql.cpp b/test/unit/googletest/handle_sql.cpp new file mode 100644 index 000000000..b119ac99e --- /dev/null +++ b/test/unit/googletest/handle_sql.cpp @@ -0,0 +1,148 @@ +/* +This file is part of GUFI, which is part of MarFS, which is released +under the BSD license. + + +Copyright (c) 2017, Los Alamos National Security (LANS), LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +From Los Alamos National Security, LLC: +LA-CC-15-039 + +Copyright (c) 2017, Los Alamos National Security, LLC All rights reserved. +Copyright 2017. Los Alamos National Security, LLC. This software was produced +under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National +Laboratory (LANL), which is operated by Los Alamos National Security, LLC for +the U.S. Department of Energy. The U.S. Government has rights to use, +reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS +ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR +ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is +modified to produce derivative works, such modified software should be +clearly marked, so as not to confuse it with the version available from +LANL. + +THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +OF SUCH DAMAGE. +*/ + + + +#include +#include +#include +#include +#include +#include + +#include + +extern "C" { + +#include "bf.h" +#include "dbutils.h" +#include "gufi_query/handle_sql.h" +#include "print.h" + +} + +TEST(handle_sql, no_aggregation) { + // not testing treesummary for now + const char S_GOOD[] = "SELECT name FROM " SUMMARY ";"; + const char S_BAD[] = "INSERT INTO " SUMMARY "(name) VALUES ('');"; + const char E_GOOD[] = "SELECT name FROM " ENTRIES ";"; + const char E_BAD[] = "INSERT INTO " ENTRIES "(name) VALUES ('');"; + + struct input in; + ASSERT_EQ(input_init(&in), &in); + in.types.prefix = 1; + + for(const char *S : {(const char *) nullptr, S_GOOD, S_BAD}) { + in.sql.sum.data = S?S:nullptr; + in.sql.sum.len = S?strlen(S):0; + + for(const char *E : {(const char *) nullptr, E_GOOD, E_BAD}) { + in.sql.ent.data = E?E:nullptr; + in.sql.ent.len = E?strlen(E):0; + + /* Bad SQL -> return -1 */ + const int expected = -((S == S_BAD) || (E == E_BAD)); + + EXPECT_EQ(handle_sql(&in), expected); + + free(in.types.ent); + in.types.ent = nullptr; + free(in.types.sum); + in.types.sum = nullptr; + free(in.types.tsum); + in.types.tsum = nullptr; + } + } + + input_fini(&in); +} + +TEST(handle_sql, aggregation) { + struct input in; + ASSERT_EQ(input_init(&in), &in); + in.types.prefix = 1; + + const char I[] = "CREATE TABLE;"; + const char J[] = "INSERT INTO"; + const char K_GOOD[] = "CREATE TABLE agg(i INT)"; + const char K_BAD[] = "CREATE TABLE"; + const char G_GOOD[] = "SELECT i FROM agg"; + const char G_BAD[] = "INSERT INT agg (i) VALUES (0);"; + + in.sql.init.data = I; in.sql.init.len = strlen(I); + in.sql.intermediate.data = J; in.sql.intermediate.len = strlen(J); + + in.sql.init_agg.data = K_GOOD; in.sql.init_agg.len = strlen(K_GOOD); + in.sql.agg.data = G_GOOD; in.sql.agg.len = strlen(G_GOOD); + EXPECT_EQ(handle_sql(&in), 0); + + free(in.types.agg); + in.types.agg = nullptr; + + in.sql.agg.data = G_BAD; in.sql.agg.len = strlen(G_BAD); + EXPECT_EQ(handle_sql(&in), -1); + + in.sql.init_agg.data = K_BAD; in.sql.init_agg.len = strlen(K_BAD); + EXPECT_EQ(handle_sql(&in), -1); + + input_fini(&in); +} diff --git a/test/unit/googletest/histogram.cpp b/test/unit/googletest/histogram.cpp index 656d08b86..f2450d54f 100644 --- a/test/unit/googletest/histogram.cpp +++ b/test/unit/googletest/histogram.cpp @@ -69,6 +69,7 @@ OF SUCH DAMAGE. #include #include "bf.h" +#include "dbutils.h" #include "histogram.h" static void setup_db(sqlite3 **db) { diff --git a/test/unit/googletest/print.cpp b/test/unit/googletest/print.cpp index 6ac59c179..03f062784 100644 --- a/test/unit/googletest/print.cpp +++ b/test/unit/googletest/print.cpp @@ -64,13 +64,14 @@ OF SUCH DAMAGE. #include #include +#include #include #include #include "print.h" -static void print_parallel_mutex(pthread_mutex_t *mutex) { +static void print_parallel_mutex_actual(pthread_mutex_t *mutex) { const std::string A = "A"; const std::string BC = "BC"; const std::string D = "D"; @@ -103,6 +104,7 @@ static void print_parallel_mutex(pthread_mutex_t *mutex) { pa.mutex = mutex; pa.outfile = file; pa.rows = 0; + pa.types = nullptr; // A\n is buffered in OutputBuffer and takes up all available space { @@ -171,15 +173,100 @@ static void print_parallel_mutex(pthread_mutex_t *mutex) { } fclose(file); - OutputBuffer_destroy(&ob); delete [] buf; + OutputBuffer_destroy(&ob); } -TEST(print, parallel_w_mutex) { +TEST(print_parallel, mutex) { pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - print_parallel_mutex(&mutex); + print_parallel_mutex_actual(&mutex); + print_parallel_mutex_actual(nullptr); +} + +static void print_parallel_tlv_actual(const bool use_len) { + const std::string INTEGER = "1"; + const std::string FLOAT = "1.0"; + const std::string TEXT = "text"; + const std::string BLOB = "blob"; + const std::string NULL_ = "NULL"; + const std::string DATE = "date"; + + const char *DATA[] = { + INTEGER.c_str(), + FLOAT.c_str(), + TEXT.c_str(), + BLOB.c_str(), + NULL_.c_str(), + DATE.c_str(), + }; + + const std::size_t COL_COUNT = sizeof(DATA) / sizeof(DATA[0]); + + const int TYPES[] = { + SQLITE_INTEGER, + SQLITE_FLOAT, + SQLITE_TEXT, + SQLITE_BLOB, + SQLITE_NULL, + 0, + }; + + const std::size_t total_len = + sizeof(int) + // number of columns + INTEGER.size() + FLOAT.size() + TEXT.size() + + BLOB.size() + NULL_.size() + DATE.size() + + COL_COUNT + // 1 octet types + COL_COUNT * sizeof(size_t) // lengths + ; + + struct OutputBuffer ob; + EXPECT_EQ(OutputBuffer_init(&ob, use_len?total_len + 1:1), &ob); + + char *buf = new char[total_len + 1](); + FILE *file = fmemopen(buf, total_len + 1, "w+b"); + ASSERT_NE(file, nullptr); + + PrintArgs pa; + pa.output_buffer = &ob; + pa.delim = '|'; // ignored + pa.mutex = nullptr; + pa.outfile = file; + pa.rows = 0; + pa.types = TYPES; + + EXPECT_EQ(print_parallel(&pa, COL_COUNT, (char **) DATA, nullptr), 0); + EXPECT_EQ(ob.filled, use_len?total_len:0); + EXPECT_EQ(OutputBuffer_flush(&ob, file), use_len?total_len:0); + EXPECT_EQ(fflush(file), 0); + EXPECT_EQ(pa.rows, (std::size_t) 1); + + char *curr = buf; + + // column_count + EXPECT_EQ((std::size_t) * (int *) curr, COL_COUNT); + curr += sizeof(int); + + for(std::size_t i = 0; i < COL_COUNT; i++) { + // type + EXPECT_EQ(*curr, (char) TYPES[i]); + curr++; + + // length + const size_t len = * (size_t *) curr; + curr += sizeof(size_t); + + // value + EXPECT_EQ(len, strlen(DATA[i])); + EXPECT_EQ(std::string(curr, len), DATA[i]); + curr += len; + } + + fclose(file); + delete [] buf; + OutputBuffer_destroy(&ob); } -TEST(print, parallel_wo_mutex) { - print_parallel_mutex(nullptr); +TEST(print_parallel, tlv) { + print_parallel_tlv_actual(true); + print_parallel_tlv_actual(false); }