Skip to content

Commit

Permalink
SQLite3 virtual tables/table-valued functions
Browse files Browse the repository at this point in the history
This exposes an entire GUFI tree in a single view instead of as many
multiple database files through the SQLite virtual table interface.
Users can query gufi_vt_* virtual tables as though they have access
to that table across the entire GUFI tree in one database i.e.:

    SELECT name, size FROM gufi_vt_pentries('index root') WHERE size > 1024;

This is done by calling gufi_query through popen and reading all of
the results back from the returned FILE *, which points to stdout.

The first positional argument points to the starting directory, and is
required. The remaining arguments are listed in the expected order and
are optional: thread count, -T, and -S. To skip an argument to the
left of an argument not being skipped, pass in NULL. After the final
unskipped argument, the remaining arguments maybe skipped by not
passing in any value. -T and -S may be used to modify tree traversal
behavior.

GUFI user defined functions (UDFs) that do not require gufi_query
state (excluding histogram functions) may be called. UDFs requiring
gufi_query state path(), epath(), fpath(), and rpath() can be accessed
from the virtual table by using columns with the same names.

gufi_query now has the -u flag that causes prints to prepend the row
with a 1 int (host size and endianess) column count folowed by columns
prepended with a 1 octet type and 1 size_t length (host size and
endianess). Column separators and newlines are not printed.
  • Loading branch information
calccrypto committed Jan 30, 2025
1 parent ae1b9a0 commit cdd1db2
Show file tree
Hide file tree
Showing 38 changed files with 2,450 additions and 663 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
run: ctest || true

- name: Delete files not reported for coverage
run: find -name "*.gc*" -a \( -name "gendir.*" -o -name "make_testindex.*" -o -name "bfwreaddirplus2db.*" -o -name "bffuse.*" -o -name "bfresultfuse.*" -o -name "dfw.*" -o -name "tsmepoch2time.*" -o -name "tsmtime2epoch.*" -o -path "*/test/*" \) -delete
run: find -name "*.gc*" -a \( -name "gendir.*" -o -name "make_testindex.*" -o -name "bfwreaddirplus2db.*" -o -name "bffuse.*" -o -name "bfresultfuse.*" -o -name "dfw.*" -o -name "tsmepoch2time.*" -o -name "tsmtime2epoch.*" -o -path "*/test/*" -o \( -path "*/gufi_vt.dir/*" -a -not -name "gufi_vt.*" \) \) -delete

- name: Generate Python Coverage Report
run: |
Expand Down
14 changes: 8 additions & 6 deletions contrib/treediff.c
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,14 @@ static int processdir(QPTPool_t *ctx, const size_t id, void *data, void *args) {
/* ********************************************** */
const size_t next_level = cp->level + 1;

struct PrintArgs print;
print.output_buffer = &pa->obufs.buffers[id];
print.delim = '/';
print.mutex = pa->obufs.mutex;
print.outfile = stdout;
print.rows = 0;
struct PrintArgs print = {
.output_buffer = &pa->obufs.buffers[id],
.delim = '/',
.mutex = pa->obufs.mutex,
.outfile = stdout,
.rows = 0,
.types = NULL,
};

char *buf[] = {NULL, NULL}; /* passed to print_parallel */

Expand Down
124 changes: 124 additions & 0 deletions include/addqueryfuncs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/*
This file is part of GUFI, which is part of MarFS, which is released
under the BSD license.
Copyright (c) 2017, Los Alamos National Security (LANS), LLC
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
From Los Alamos National Security, LLC:
LA-CC-15-039
Copyright (c) 2017, Los Alamos National Security, LLC All rights reserved.
Copyright 2017. Los Alamos National Security, LLC. This software was produced
under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
Laboratory (LANL), which is operated by Los Alamos National Security, LLC for
the U.S. Department of Energy. The U.S. Government has rights to use,
reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is
modified to produce derivative works, such modified software should be
clearly marked, so as not to confuse it with the version available from
LANL.
THIS SOFTWARE IS PROVIDED BY LOS ALAMOS NATIONAL SECURITY, LLC AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL SECURITY, LLC OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
OF SUCH DAMAGE.
*/



#ifndef ADDQUERYFUNCS_H
#define ADDQUERYFUNCS_H

#include <sqlite3.h>

#include "bf.h"
#include "histogram.h"

#ifdef __cplusplus
extern "C" {
#endif

/* list of functions to add to a SQLite3 db handle that do not have user data/context */

void uidtouser(sqlite3_context *context, int argc, sqlite3_value **argv);
void gidtogroup(sqlite3_context *context, int argc, sqlite3_value **argv);
void modetotxt(sqlite3_context *context, int argc, sqlite3_value **argv);
void sqlite3_strftime(sqlite3_context *context, int argc, sqlite3_value **argv);
void blocksize(sqlite3_context *context, int argc, sqlite3_value **argv);
void human_readable_size(sqlite3_context *context, int argc, sqlite3_value **argv);
void sqlite_basename(sqlite3_context *context, int argc, sqlite3_value **argv);
void stdev_step(sqlite3_context *context, int argc, sqlite3_value **argv);
void stdevs_final(sqlite3_context *context);
void stdevp_final(sqlite3_context *context);
void median_step(sqlite3_context *context, int argc, sqlite3_value **argv);
void median_final(sqlite3_context *context);

static inline int addqueryfuncs(sqlite3 *db) {
return !(
(sqlite3_create_function(db, "uidtouser", 1, SQLITE_UTF8,
NULL, &uidtouser, NULL, NULL) == SQLITE_OK) &&
(sqlite3_create_function(db, "gidtogroup", 1, SQLITE_UTF8,
NULL, &gidtogroup, NULL, NULL) == SQLITE_OK) &&
(sqlite3_create_function(db, "modetotxt", 1, SQLITE_UTF8,
NULL, &modetotxt, NULL, NULL) == SQLITE_OK) &&
(sqlite3_create_function(db, "strftime", 2, SQLITE_UTF8,
NULL, &sqlite3_strftime, NULL, NULL) == SQLITE_OK) &&
(sqlite3_create_function(db, "blocksize", 2, SQLITE_UTF8,
NULL, &blocksize, NULL, NULL) == SQLITE_OK) &&
(sqlite3_create_function(db, "human_readable_size", 1, SQLITE_UTF8,
NULL, &human_readable_size, NULL, NULL) == SQLITE_OK) &&
(sqlite3_create_function(db, "basename", 1, SQLITE_UTF8,
NULL, &sqlite_basename, NULL, NULL) == SQLITE_OK) &&
(sqlite3_create_function(db, "stdevs", 1, SQLITE_UTF8,
NULL, NULL, stdev_step, stdevs_final) == SQLITE_OK) &&
(sqlite3_create_function(db, "stdevp", 1, SQLITE_UTF8,
NULL, NULL, stdev_step, stdevp_final) == SQLITE_OK) &&
(sqlite3_create_function(db, "median", 1, SQLITE_UTF8,
NULL, NULL, median_step, median_final) == SQLITE_OK) &&
addhistfuncs(db)
);
}

int addqueryfuncs_with_context(sqlite3 *db, struct work *work);

#ifdef __cplusplus
}
#endif

#endif
18 changes: 18 additions & 0 deletions include/bf.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,24 @@ struct input {
refstr_t fin;
} sql;

/*
* if outputting to STDOUT or OUTFILE, get list of
* types of final output to prefix columns with
*
* set up by gufi_query but cleaned up by input_fini
*/
struct {
int prefix;

/* set if not aggregating */
int *tsum;
int *sum;
int *ent;

/* set if aggregating */
int *agg;
} types;

int printdir;
int printing;
int printheader;
Expand Down
26 changes: 19 additions & 7 deletions include/dbutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ OF SUCH DAMAGE.
#include <sqlite3.h>

#include "SinglyLinkedList.h"
#include "addqueryfuncs.h"
#include "template_db.h"
#include "utils.h"
#include "xattrs.h"
Expand All @@ -91,18 +92,24 @@ extern const char *SQLITE_MEMORY;
#define DROP_TABLE(name) "DROP TABLE IF EXISTS " #name ";"
#define DROP_VIEW(name) "DROP VIEW IF EXISTS " #name ";"

#define READDIRPLUS "readdirplus"
#define READDIRPLUS "readdirplus"
#define READDIRPLUS_SCHEMA(name) \
"CREATE TABLE " name "(path TEXT, type TEXT, inode TEXT PRIMARY KEY, pinode TEXT, suspect INT64);"
extern const char READDIRPLUS_CREATE[];
extern const char READDIRPLUS_INSERT[];

/* contains all file and link metadata for the current directory */
/* prefer pentries over entries */
#define ENTRIES "entries"
#define ENTRIES_SCHEMA(name, extra_cols) \
"CREATE TABLE " name "(" extra_cols "name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, crtime INT64, ossint1 INT64, ossint2 INT64, ossint3 INT64, ossint4 INT64, osstext1 TEXT, osstext2 TEXT);"
extern const char ENTRIES_CREATE[];
extern const char ENTRIES_INSERT[];

/* directory metadata + aggregate data */
#define SUMMARY "summary"
#define SUMMARY_SCHEMA(name, extra_cols) \
"CREATE TABLE " name "(" extra_cols "name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, rectype INT64, pinode TEXT, isroot INT64, rollupscore INT64);"
extern const char SUMMARY_CREATE[];

/* view of summary table with rollups */
Expand All @@ -111,22 +118,28 @@ extern const char VRSUMMARY_CREATE[];

/* pentries pulled from children */
#define PENTRIES_ROLLUP "pentries_rollup"
#define PENTRIES_ROLLUP_SCHEMA(name) \
"CREATE TABLE " name "(name TEXT, type TEXT, inode TEXT, mode INT64, nlink INT64, uid INT64, gid INT64, size INT64, blksize INT64, blocks INT64, atime INT64, mtime INT64, ctime INT64, linkname TEXT, xattr_names BLOB, crtime INT64, ossint1 INT64, ossint2 INT64, ossint3 INT64, ossint4 INT64, osstext1 TEXT, osstext2 TEXT, pinode TEXT, ppinode TEXT);"
extern const char PENTRIES_ROLLUP_CREATE[];
extern const char PENTRIES_ROLLUP_INSERT[];

/* (entries + summary.inode) UNION pentries_rollup */
#define PENTRIES "pentries"
extern const char PENTRIES_CREATE[];

/* vrentries is not created because rolled up entries tables are not correct */

/* view of pentries view with rollups */
#define VRPENTRIES "vrpentries"
extern const char VRPENTRIES_CREATE[];

/* aggregate data of tree starting at current directory */
#define TREESUMMARY "treesummary"
#define TREESUMMARY_CREATE \
DROP_TABLE(TREESUMMARY) \
"CREATE TABLE " TREESUMMARY "(inode TEXT, pinode TEXT, totsubdirs INT64, maxsubdirfiles INT64, maxsubdirlinks INT64, maxsubdirsize INT64, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, totextdbs INT64, rectype INT64, uid INT64, gid INT64);"
#define TREESUMMARY_SCHEMA(name, extra_cols) \
"CREATE TABLE " name "(" extra_cols "inode TEXT, pinode TEXT, totsubdirs INT64, maxsubdirfiles INT64, maxsubdirlinks INT64, maxsubdirsize INT64, totfiles INT64, totlinks INT64, minuid INT64, maxuid INT64, mingid INT64, maxgid INT64, minsize INT64, maxsize INT64, totzero INT64, totltk INT64, totmtk INT64, totltm INT64, totmtm INT64, totmtg INT64, totmtt INT64, totsize INT64, minctime INT64, maxctime INT64, minmtime INT64, maxmtime INT64, minatime INT64, maxatime INT64, minblocks INT64, maxblocks INT64, totxattr INT64, depth INT64, mincrtime INT64, maxcrtime INT64, minossint1 INT64, maxossint1 INT64, totossint1 INT64, minossint2 INT64, maxossint2 INT64, totossint2 INT64, minossint3 INT64, maxossint3 INT64, totossint3 INT64, minossint4 INT64, maxossint4 INT64, totossint4 INT64, totextdbs INT64, rectype INT64, uid INT64, gid INT64);"
#define TREESUMMARY_CREATE \
DROP_TABLE(TREESUMMARY) \
TREESUMMARY_SCHEMA(TREESUMMARY, "")

extern const char TREESUMMARY_EXISTS[];

Expand Down Expand Up @@ -202,9 +215,6 @@ int insertsumdb(sqlite3 *sdb, const char *path, struct work *pwork, struct entry

int inserttreesumdb(const char *name, sqlite3 *sdb, struct sum *su, int rectype, int uid, int gid);

int addqueryfuncs(sqlite3 *db);
int addqueryfuncs_with_context(sqlite3 *db, struct work *work);

/* xattr db list item */
struct xattr_db {
long long int pinode;
Expand Down Expand Up @@ -258,6 +268,8 @@ enum CheckRollupScore {
int bottomup_collect_treesummary(sqlite3 *db, const char *dirname, sll_t *subdirs,
const enum CheckRollupScore check_rollupscore);

int *get_col_types(sqlite3 *db, const refstr_t *sql, int *cols);

#ifdef __cplusplus
}
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ OF SUCH DAMAGE.



#ifndef GUFI_QUERY_VALIDATE_INPUTS_H
#define GUFI_QUERY_VALIDATE_INPUTS_H
#ifndef GUFI_QUERY_HANDLE_SQL_H
#define GUFI_QUERY_HANDLE_SQL_H

#include "bf.h"

int validate_inputs(struct input *in);
int handle_sql(struct input *in);

#endif
2 changes: 1 addition & 1 deletion include/gufi_query/query.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ OF SUCH DAMAGE.

void querydb(struct work *work,
const char *dbname, const size_t dbname_len,
sqlite3 *db, const char *query,
sqlite3 *db, const char *query, const int *types,
PoolArgs_t *pa, int id,
int (*callback)(void *, int, char **, char**), int *rc);

Expand Down
36 changes: 32 additions & 4 deletions include/histogram.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,19 +68,19 @@ OF SUCH DAMAGE.
#include <stddef.h>
#include <time.h>

#include "dbutils.h"
#include <sqlite3.h>

#ifdef __cplusplus
extern "C" {
#endif

/* use this to add histogram functions to a sqlite database handle */
int addhistfuncs(sqlite3 *db);

/*
* Public API for parsing returned strings.
*
* These structs are intended for external use.
*
* Ignore the *_step and *_final functions. They are sqlite3 UDFs that
* need to be exposed here to get linking to work for some reason.
*/

/* ********************************************* */
Expand Down Expand Up @@ -108,6 +108,8 @@ typedef struct log2_hist {
size_t ge; /* len >= 2^count */
} log2_hist_t;

void log2_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv);
void log2_hist_final(sqlite3_context *context);
log2_hist_t *log2_hist_parse(const char *str);
void log2_hist_free(log2_hist_t *hist);
/* ********************************************* */
Expand All @@ -126,6 +128,8 @@ typedef struct mode_hist {
size_t buckets[512];
} mode_hist_t;

void mode_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv);
void mode_hist_final(sqlite3_context *context);
mode_hist_t *mode_hist_parse(const char *str);
void mode_hist_free(mode_hist_t *hist);
/* ********************************************* */
Expand Down Expand Up @@ -165,6 +169,8 @@ typedef struct time_hist {
time_t ref;
} time_hist_t;

void time_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv);
void time_hist_final(sqlite3_context *context);
time_hist_t *time_hist_parse(const char *str);
void time_hist_free(time_hist_t *hist);
/* ********************************************* */
Expand Down Expand Up @@ -192,6 +198,9 @@ typedef struct category_hist {
size_t count;
} category_hist_t;

void category_hist_step(sqlite3_context *context, int argc, sqlite3_value **argv);
void category_hist_combine_step(sqlite3_context *context, int argc, sqlite3_value **argv);
void category_hist_final(sqlite3_context *context);
category_hist_t *category_hist_parse(const char *str);
category_hist_t *category_hist_combine(category_hist_t *lhs, category_hist_t *rhs);
void category_hist_free(category_hist_t *hist);
Expand All @@ -211,10 +220,29 @@ typedef struct mode_count {
size_t count;
} mode_count_t;

void mode_count_final(sqlite3_context *context);
mode_count_t *mode_count_parse(const char *str);
void mode_count_free(mode_count_t *mc);
/* ********************************************* */

/* use this to add histogram functions to a sqlite database handle */
static inline int addhistfuncs(sqlite3 *db) {
return (
(sqlite3_create_function(db, "log2_hist", 2, SQLITE_UTF8,
NULL, NULL, log2_hist_step, log2_hist_final) == SQLITE_OK) &&
(sqlite3_create_function(db, "mode_hist", 1, SQLITE_UTF8,
NULL, NULL, mode_hist_step, mode_hist_final) == SQLITE_OK) &&
(sqlite3_create_function(db, "time_hist", 2, SQLITE_UTF8,
NULL, NULL, time_hist_step, time_hist_final) == SQLITE_OK) &&
(sqlite3_create_function(db, "category_hist", 2, SQLITE_UTF8,
NULL, NULL, category_hist_step, category_hist_final) == SQLITE_OK) &&
(sqlite3_create_function(db, "category_hist_combine", 1, SQLITE_UTF8,
NULL, NULL, category_hist_combine_step, category_hist_final) == SQLITE_OK) &&
(sqlite3_create_function(db, "mode_count", 1, SQLITE_UTF8,
NULL, NULL, category_hist_step, mode_count_final) == SQLITE_OK)
);
}

#ifdef __cplusplus
}
#endif
Expand Down
1 change: 1 addition & 0 deletions include/print.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ typedef struct PrintArgs {
pthread_mutex_t *mutex; /* mutex for printing to stdout */
FILE *outfile;
size_t rows; /* number of rows returned by the query */
const int *types; /* if set, prefix output with 1 char type and 1 length */
/* size_t printed; /\* number of records printed by the callback *\/ */
} PrintArgs_t;

Expand Down
Loading

0 comments on commit cdd1db2

Please sign in to comment.