Skip to content

Commit

Permalink
SQLite3 virtual tables/table-valued functions
Browse files Browse the repository at this point in the history
This exposes an entire GUFI tree in a single view instead of as many
multiple database files through the SQLite virtual table interface.
Users can query gufi_vt_* virtual tables as though they have access
to that table across the entire GUFI tree in one database i.e.:

    SELECT name, size FROM gufi_vt_pentries('index root') WHERE size > 1024;

This is done by calling gufi_query through popen and reading all of
the results back from the returned FILE *, which points to stdout.

The first positional argument points to the starting directory, and is
required. The remaining arguments are listed in the expected order and
are optional: thread count, -T, and -S. To skip an argument to the
left of an argument not being skipped, pass in None. After the final
unskipped argument, the remaining arguments maybe skipped by not
passing in any value. -T and -S may be used to modify tree traversal
behavior.

The tree traversal behavior of gufi_query may be changed with
positional arguments.

GUFI user defined functions (UDFs) that do not require gufi_query
state not including histogram functions may be called. UDFs requiring
gufi_query state path(), epath(), fpath(), and rpath() can be accessed
from the virtual table by using columns with the same names.

gufi_query now has the -u flag that causes prints to prepend a 1 octet
type and a 4 digit (not binary) human readable length. The column
separate is still printed.
    - The length field was chosen to have 4 digits for now because
      if a column has more 10000+ characters, it is probably too big.
      This will change if the assumption is incorrect.
    - Digits were used instead of the octets of a fixed length
      integer in order to distinguish between a 0x0a in the length
      and newline characters.
  • Loading branch information
calccrypto committed Jan 24, 2025
1 parent 4bf414b commit e42b13b
Show file tree
Hide file tree
Showing 32 changed files with 2,313 additions and 619 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
run: ctest || true

- name: Delete files not reported for coverage
run: find -name "*.gc*" -a \( -name "gendir.*" -o -name "make_testindex.*" -o -name "bfwreaddirplus2db.*" -o -name "bffuse.*" -o -name "bfresultfuse.*" -o -name "dfw.*" -o -name "tsmepoch2time.*" -o -name "tsmtime2epoch.*" -o -path "*/test/*" \) -delete
run: find -name "*.gc*" -a \( -name "gendir.*" -o -name "make_testindex.*" -o -name "bfwreaddirplus2db.*" -o -name "bffuse.*" -o -name "bfresultfuse.*" -o -name "dfw.*" -o -name "tsmepoch2time.*" -o -name "tsmtime2epoch.*" -o -path "*/test/*" -o \( -path "*/gufi_vt.dir/*" -a -not -name "gufi_vt.*" \) \) -delete

- name: Generate Python Coverage Report
run: |
Expand Down
14 changes: 8 additions & 6 deletions contrib/treediff.c
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,14 @@ static int processdir(QPTPool_t *ctx, const size_t id, void *data, void *args) {
/* ********************************************** */
const size_t next_level = cp->level + 1;

struct PrintArgs print;
print.output_buffer = &pa->obufs.buffers[id];
print.delim = '/';
print.mutex = pa->obufs.mutex;
print.outfile = stdout;
print.rows = 0;
struct PrintArgs print = {
.output_buffer = &pa->obufs.buffers[id],
.delim = '/',
.mutex = pa->obufs.mutex,
.outfile = stdout,
.rows = 0,
.types = NULL,
};

char *buf[] = {NULL, NULL}; /* passed to print_parallel */

Expand Down
98 changes: 98 additions & 0 deletions include/addqueryfuncs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
This file is part of GUFI, which is part of MarFS, which is released
under the BSD license.
Copyright (c) 2017, Los Alamos National Security (LANS), LLC
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
From Los Alamos National Security, LLC:
LA-CC-15-039
Copyright (c) 2017, Los Alamos National Security, LLC All rights reserved.
Copyright 2017. Los Alamos National Security, LLC. This software was produced
under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
Laboratory (LANL), which is operated by Los Alamos National Security, LLC for
the U.S. Department of Energy. The U.S. Government has rights to use,
reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is
modified to produce derivative works, such modified software should be
clearly marked, so as not to confuse it with the version available from
LANL.
THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
OF SUCH DAMAGE.
*/



#ifndef ADDQUERYFUNCS_H
#define ADDQUERYFUNCS_H

#include <sqlite3.h>

#include "bf.h"

#ifdef __cplusplus
extern "C" {
#endif

/* list of functions to add to a SQLite3 db handle that do not have user data/context */

extern void uidtouser(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void gidtogroup(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void modetotxt(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void sqlite3_strftime(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void blocksize(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void human_readable_size(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void sqlite_basename(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void stdev_step(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void stdevs_final(sqlite3_context *context);
extern void stdevp_final(sqlite3_context *context);
extern void median_step(sqlite3_context *context, int argc, sqlite3_value **argv);
extern void median_final(sqlite3_context *context);

int addqueryfuncs(sqlite3 *db);
int addqueryfuncs_with_context(sqlite3 *db, struct work *work);

#ifdef __cplusplus
}
#endif

#endif
18 changes: 18 additions & 0 deletions include/bf.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,24 @@ struct input {
refstr_t fin;
} sql;

/*
* if outputting to STDOUT or OUTFILE, get list of
* types of final output to prefix columns with
*
* set up by gufi_query but cleaned up by input_fini
*/
struct {
int prefix;

/* set if not aggregating */
int *tsum;
int *sum;
int *ent;

/* set if aggregating */
int *agg;
} types;

int printdir;
int printing;
int printheader;
Expand Down
26 changes: 19 additions & 7 deletions include/dbutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ OF SUCH DAMAGE.
#include <sqlite3.h>

#include "SinglyLinkedList.h"
#include "addqueryfuncs.h"
#include "template_db.h"
#include "utils.h"
#include "xattrs.h"
Expand All @@ -91,18 +92,24 @@ extern const char *SQLITE_MEMORY;
#define DROP_TABLE(name) "DROP TABLE IF EXISTS " #name ";"
#define DROP_VIEW(name) "DROP VIEW IF EXISTS " #name ";"

#define READDIRPLUS "readdirplus"
#define READDIRPLUS "readdirplus"
#define READDIRPLUS_SCHEMA(name) \
"CREATE TABLE " name "(path TEXT, type TEXT, inode TEXT PRIMARY KEY, pinode TEXT, suspect INT64);"
extern const char READDIRPLUS_CREATE[];
extern const char READDIRPLUS_INSERT[];

/* contains all file and link metadata for the current directory */
/* prefer pentries over entries */
#define ENTRIES "entries"
#define ENTRIES_SCHEMA(name, extra_cols) \
"CREATE TABLE " name "(" extra_cols "name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, crtime INT64, ossint1 INT64, ossint2 INT64, ossint3 INT64, ossint4 INT64, osstext1 TEXT, osstext2 TEXT);"
extern const char ENTRIES_CREATE[];
extern const char ENTRIES_INSERT[];

/* directory metadata + aggregate data */
#define SUMMARY "summary"
#define SUMMARY_SCHEMA(name, extra_cols) \
"CREATE TABLE " name "(" extra_cols "name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, rectype INT64, pinode TEXT, isroot INT64, rollupscore INT64);"
extern const char SUMMARY_CREATE[];

/* view of summary table with rollups */
Expand All @@ -111,22 +118,28 @@ extern const char VRSUMMARY_CREATE[];

/* pentries pulled from children */
#define PENTRIES_ROLLUP "pentries_rollup"
#define PENTRIES_ROLLUP_SCHEMA(name) \
"CREATE TABLE " name "(name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, crtime INT64, ossint1 INT64, ossint2 INT64, ossint3 INT64, ossint4 INT64, osstext1 TEXT, osstext2 TEXT, pinode TEXT, ppinode TEXT);"
extern const char PENTRIES_ROLLUP_CREATE[];
extern const char PENTRIES_ROLLUP_INSERT[];

/* (entries + summary.inode) UNION pentries_rollup */
#define PENTRIES "pentries"
extern const char PENTRIES_CREATE[];

/* vrentries is not created because rolled up entries tables are not correct */

/* view of pentries view with rollups */
#define VRPENTRIES "vrpentries"
extern const char VRPENTRIES_CREATE[];

/* aggregate data of tree starting at current directory */
#define TREESUMMARY "treesummary"
#define TREESUMMARY_CREATE \
DROP_TABLE(TREESUMMARY) \
"CREATE TABLE " TREESUMMARY "(inode TEXT, pinode TEXT, totsubdirs INT64, maxsubdirfiles INT64, maxsubdirlinks INT64, maxsubdirsize INT64, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, totextdbs INT64, rectype INT64, uid INT64, gid INT64);"
#define TREESUMMARY_SCHEMA(name, extra_cols) \
"CREATE TABLE " name "(" extra_cols "inode TEXT, pinode TEXT, totsubdirs INT64, maxsubdirfiles INT64, maxsubdirlinks INT64, maxsubdirsize INT64, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, totextdbs INT64, rectype INT64, uid INT64, gid INT64);"
#define TREESUMMARY_CREATE \
DROP_TABLE(TREESUMMARY) \
TREESUMMARY_SCHEMA(TREESUMMARY, "")

extern const char TREESUMMARY_EXISTS[];

Expand Down Expand Up @@ -202,9 +215,6 @@ int insertsumdb(sqlite3 *sdb, const char *path, struct work *pwork, struct entry

int inserttreesumdb(const char *name, sqlite3 *sdb, struct sum *su, int rectype, int uid, int gid);

int addqueryfuncs(sqlite3 *db);
int addqueryfuncs_with_context(sqlite3 *db, struct work *work);

/* xattr db list item */
struct xattr_db {
long long int pinode;
Expand Down Expand Up @@ -258,6 +268,8 @@ enum CheckRollupScore {
int bottomup_collect_treesummary(sqlite3 *db, const char *dirname, sll_t *subdirs,
const enum CheckRollupScore check_rollupscore);

int *get_col_types(sqlite3 *db, const refstr_t *sql, int *cols);

#ifdef __cplusplus
}
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ OF SUCH DAMAGE.



#ifndef GUFI_QUERY_VALIDATE_INPUTS_H
#define GUFI_QUERY_VALIDATE_INPUTS_H
#ifndef GUFI_QUERY_HANDLE_SQL_H
#define GUFI_QUERY_HANDLE_SQL_H

#include "bf.h"

int validate_inputs(struct input *in);
int handle_sql(struct input *in);

#endif
2 changes: 1 addition & 1 deletion include/gufi_query/query.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ OF SUCH DAMAGE.

void querydb(struct work *work,
const char *dbname, const size_t dbname_len,
sqlite3 *db, const char *query,
sqlite3 *db, const char *query, int *types,
PoolArgs_t *pa, int id,
int (*callback)(void *, int, char **, char**), int *rc);

Expand Down
1 change: 1 addition & 0 deletions include/print.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ typedef struct PrintArgs {
pthread_mutex_t *mutex; /* mutex for printing to stdout */
FILE *outfile;
size_t rows; /* number of rows returned by the query */
int *types; /* if set, prefix output with 1 octet type and 4 digit human readable length */
/* size_t printed; /\* number of records printed by the callback *\/ */
} PrintArgs_t;

Expand Down
26 changes: 25 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ set(GUFI_SOURCES
OutputBuffers.c
QueuePerThreadPool.c
SinglyLinkedList.c
addqueryfuncs.c
bf.c
compress.c
dbutils.c
Expand Down Expand Up @@ -199,10 +200,10 @@ add_library(gufi_query_lib OBJECT
gufi_query/aggregate.c
gufi_query/external.c
gufi_query/gqw.c
gufi_query/handle_sql.c
gufi_query/process_queries.c
gufi_query/processdir.c
gufi_query/query.c
gufi_query/validate_inputs.c
)

add_dependencies(gufi_query_lib GUFI)
Expand All @@ -212,6 +213,29 @@ build_and_install_one(${BIN} TRUE gufi_query
$<TARGET_OBJECTS:gufi_query_lib>
)

# build custom SQLite virtual tables
add_library(gufi_vt MODULE
gufi_vt.c

# have to recompile with -fPIC
SinglyLinkedList.c
addqueryfuncs.c
histogram.c
trie.c
utils.c
)
set_target_properties(gufi_vt PROPERTIES PREFIX "")
if(APPLE)
set(EXT "dylib")
elseif (CYGWIN)
set(EXT "dll.a")
else ()
set(EXT "so")
endif()
target_link_libraries(gufi_vt "${DEP_INSTALL_PREFIX}/sqlite3/lib/libsqlite3.${EXT}")
add_dependencies(gufi_vt install_dependencies)
install(TARGETS gufi_vt DESTINATION ${LIB} COMPONENT Server)

# build binaries that do not need to link with GUFI
set(MISC_SOURCES tsmtime2epoch.c tsmepoch2time.c)
build_and_install(${BIN} FALSE ${MISC_SOURCES})
Expand Down
Loading

0 comments on commit e42b13b

Please sign in to comment.