diff --git a/api/librdb-api.h b/api/librdb-api.h index 9bc0a96..fac46ed 100644 --- a/api/librdb-api.h +++ b/api/librdb-api.h @@ -436,20 +436,31 @@ _LIBRDB_API void RDB_IgnoreChecksum(RdbParser *p); * terminate its operation. The default threshold is unlimited. */ _LIBRDB_API void RDB_setMaxRawSize(RdbParser *p, size_t maxSize); -/* logger */ +/* Following function returns a hint for the total number of items in the current + * parsed key context - to assist with memory allocation or other optimizations. + * If hint is not available, then return -1. */ +_LIBRDB_API int64_t RDB_getNumItemsHint(RdbParser *p); + +/**************************************************************** + * Logger + ****************************************************************/ +/* Set the logging level for the parser */ _LIBRDB_API void RDB_setLogLevel(RdbParser *p, RdbLogLevel l); + +/* Set a custom logger callback function */ _LIBRDB_API void RDB_setLogger(RdbParser *p, RdbLoggerCB f); + +/* Log a message with the specified log level */ #ifdef __GNUC__ _LIBRDB_API void RDB_log(RdbParser *p, RdbLogLevel lvl, const char *format, ...) - __attribute__((format(printf, 3, 4))); + __attribute__((format(printf, 3, 4))); #else _LIBRDB_API void RDB_log(RdbParser *p, RdbLogLevel lvl, const char *format, ...); #endif -/* Following function returns a hint for the total number of items in the current - * parsed key context - to assist with memory allocation or other optimizations. - * If hint is not available, then return -1. */ -_LIBRDB_API int64_t RDB_getNumItemsHint(RdbParser *p); +/* To hide keys in logs by printing first 8 hex digits of SHA256(key) instead of + * printing the key itself */ +_LIBRDB_API void RDB_hideKeysInLog(RdbParser *p); /**************************************************************** * Pause the Parser diff --git a/deps/redis/sha256.c b/deps/redis/sha256.c new file mode 100644 index 0000000..aaeba5b --- /dev/null +++ b/deps/redis/sha256.c @@ -0,0 +1,163 @@ +/********************************************************************* +* Filename: sha256.c +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Implementation of the SHA-256 hashing algorithm. + SHA-256 is one of the three algorithms in the SHA2 + specification. The others, SHA-384 and SHA-512, are not + offered in this implementation. + Algorithm specification can be found here: + * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf + This implementation uses little endian byte order. +*********************************************************************/ + +/*************************** HEADER FILES ***************************/ +#include +#include +#include "sha256.h" + +/****************************** MACROS ******************************/ +#define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b)))) +#define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b)))) + +#define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z))) +#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22)) +#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25)) +#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3)) +#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10)) + +/**************************** VARIABLES *****************************/ +static const WORD k[64] = { + 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, + 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, + 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, + 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, + 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, + 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070, + 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, + 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +}; + +/*********************** FUNCTION DEFINITIONS ***********************/ +void sha256_transform(SHA256_CTX *ctx, const BYTE data[]) +{ + WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64]; + + for (i = 0, j = 0; i < 16; ++i, j += 4) { + m[i] = ((WORD) data[j + 0] << 24) | + ((WORD) data[j + 1] << 16) | + ((WORD) data[j + 2] << 8) | + ((WORD) data[j + 3]); + } + + for ( ; i < 64; ++i) + m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16]; + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + f = ctx->state[5]; + g = ctx->state[6]; + h = ctx->state[7]; + + for (i = 0; i < 64; ++i) { + t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i]; + t2 = EP0(a) + MAJ(a,b,c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + } + + ctx->state[0] += a; + ctx->state[1] += b; + ctx->state[2] += c; + ctx->state[3] += d; + ctx->state[4] += e; + ctx->state[5] += f; + ctx->state[6] += g; + ctx->state[7] += h; +} + +void sha256_init(SHA256_CTX *ctx) +{ + ctx->datalen = 0; + ctx->bitlen = 0; + ctx->state[0] = 0x6a09e667; + ctx->state[1] = 0xbb67ae85; + ctx->state[2] = 0x3c6ef372; + ctx->state[3] = 0xa54ff53a; + ctx->state[4] = 0x510e527f; + ctx->state[5] = 0x9b05688c; + ctx->state[6] = 0x1f83d9ab; + ctx->state[7] = 0x5be0cd19; +} + +void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len) +{ + WORD i; + + for (i = 0; i < len; ++i) { + ctx->data[ctx->datalen] = data[i]; + ctx->datalen++; + if (ctx->datalen == 64) { + sha256_transform(ctx, ctx->data); + ctx->bitlen += 512; + ctx->datalen = 0; + } + } +} + +void sha256_final(SHA256_CTX *ctx, BYTE hash[]) +{ + WORD i; + + i = ctx->datalen; + + // Pad whatever data is left in the buffer. + if (ctx->datalen < 56) { + ctx->data[i++] = 0x80; + while (i < 56) + ctx->data[i++] = 0x00; + } + else { + ctx->data[i++] = 0x80; + while (i < 64) + ctx->data[i++] = 0x00; + sha256_transform(ctx, ctx->data); + memset(ctx->data, 0, 56); + } + + // Append to the padding the total message's length in bits and transform. + ctx->bitlen += ctx->datalen * 8; + ctx->data[63] = ctx->bitlen; + ctx->data[62] = ctx->bitlen >> 8; + ctx->data[61] = ctx->bitlen >> 16; + ctx->data[60] = ctx->bitlen >> 24; + ctx->data[59] = ctx->bitlen >> 32; + ctx->data[58] = ctx->bitlen >> 40; + ctx->data[57] = ctx->bitlen >> 48; + ctx->data[56] = ctx->bitlen >> 56; + sha256_transform(ctx, ctx->data); + + // Since this implementation uses little endian byte ordering and SHA uses big endian, + // reverse all the bytes when copying the final state to the output hash. + for (i = 0; i < 4; ++i) { + hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff; + hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff; + hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff; + hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff; + hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff; + hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff; + hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff; + hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff; + } +} diff --git a/deps/redis/sha256.h b/deps/redis/sha256.h new file mode 100644 index 0000000..76637ed --- /dev/null +++ b/deps/redis/sha256.h @@ -0,0 +1,35 @@ +/********************************************************************* +* Filename: sha256.h +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Defines the API for the corresponding SHA256 implementation. +*********************************************************************/ + +#ifndef SHA256_H +#define SHA256_H + +/*************************** HEADER FILES ***************************/ +#include +#include + +/****************************** MACROS ******************************/ +#define SHA256_BLOCK_SIZE 32 // SHA256 outputs a 32 byte digest + +/**************************** DATA TYPES ****************************/ +typedef uint8_t BYTE; // 8-bit byte +typedef uint32_t WORD; // 32-bit word + +typedef struct { + BYTE data[64]; + WORD datalen; + unsigned long long bitlen; + WORD state[8]; +} SHA256_CTX; + +/*********************** FUNCTION DECLARATIONS **********************/ +void sha256_init(SHA256_CTX *ctx); +void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len); +void sha256_final(SHA256_CTX *ctx, BYTE hash[]); + +#endif // SHA256_H diff --git a/src/cli/rdb-cli.c b/src/cli/rdb-cli.c index 595b0fc..7949fb5 100644 --- a/src/cli/rdb-cli.c +++ b/src/cli/rdb-cli.c @@ -342,8 +342,8 @@ int matchRdbDataType(const char *dataTypeStr) { if (!strcmp(dataTypeStr, "func")) return RDB_DATA_TYPE_FUNCTION; loggerWrap(RDB_LOG_ERR, - "Invalid TYPE argument (%s). Valid values: str, list, set, zset, hash, module, stream, func", - dataTypeStr); + "Invalid TYPE argument (%s). Valid values: str, list, set, zset, hash, module, stream, func", + dataTypeStr); exit(1); } diff --git a/src/ext/common.c b/src/ext/extCommon.c similarity index 96% rename from src/ext/common.c rename to src/ext/extCommon.c index 9b86a12..3318d0e 100644 --- a/src/ext/common.c +++ b/src/ext/extCommon.c @@ -1,4 +1,4 @@ -#include "common.h" +#include "extCommon.h" #include "../../deps/redis/util.h" /* Example:: Input: length=123 return: buf="\r\n$123\r\n" */ diff --git a/src/ext/common.h b/src/ext/extCommon.h similarity index 92% rename from src/ext/common.h rename to src/ext/extCommon.h index a51bd28..f8b6791 100644 --- a/src/ext/common.h +++ b/src/ext/extCommon.h @@ -50,4 +50,7 @@ static inline void iov_plain(struct iovec *iov, const char *s, size_t l) { iov->iov_len = l; } +/*** hidden LIB API function (not declared in librdb-api.h) ***/ +_LIBRDB_API char *__RDB_key(RdbParser *p, char *key, char buf[9]); + #endif /*define RDBX_COMMON_H*/ diff --git a/src/ext/handlersFilter.c b/src/ext/handlersFilter.c index ff39497..d5bbbb2 100644 --- a/src/ext/handlersFilter.c +++ b/src/ext/handlersFilter.c @@ -1,7 +1,7 @@ #include #include #include "../lib/defines.h" /* valid include since it brings only RDB_* defines */ -#include "common.h" +#include "extCommon.h" struct RdbxFilter { regex_t regex_compiled; diff --git a/src/ext/handlersToJson.c b/src/ext/handlersToJson.c index 3923695..a75f160 100755 --- a/src/ext/handlersToJson.c +++ b/src/ext/handlersToJson.c @@ -4,7 +4,7 @@ #include #include #include -#include "common.h" +#include "extCommon.h" #include "../../deps/redis/util.h" struct RdbxToJson; diff --git a/src/ext/handlersToPrint.c b/src/ext/handlersToPrint.c index a83f5d6..78d83e2 100644 --- a/src/ext/handlersToPrint.c +++ b/src/ext/handlersToPrint.c @@ -2,7 +2,7 @@ #include #include #include -#include "common.h" +#include "extCommon.h" #include "../../deps/redis/util.h" struct RdbxToPrint; diff --git a/src/ext/handlersToResp.c b/src/ext/handlersToResp.c index 9377653..7a15d7c 100644 --- a/src/ext/handlersToResp.c +++ b/src/ext/handlersToResp.c @@ -2,7 +2,7 @@ #include #include #include -#include "common.h" +#include "extCommon.h" #include "../../deps/redis/crc64.h" #include "../../deps/redis/util.h" diff --git a/src/ext/readerFile.c b/src/ext/readerFile.c index 10b84ab..11dc11d 100644 --- a/src/ext/readerFile.c +++ b/src/ext/readerFile.c @@ -1,7 +1,7 @@ #include #include #include -#include "common.h" +#include "extCommon.h" struct RdbxReaderFile { RdbParser *parser; diff --git a/src/ext/readerFileDesc.c b/src/ext/readerFileDesc.c index e5ed754..c0fc9d5 100644 --- a/src/ext/readerFileDesc.c +++ b/src/ext/readerFileDesc.c @@ -5,7 +5,7 @@ #include #include #include -#include "common.h" +#include "extCommon.h" struct RdbxReaderFileDesc { diff --git a/src/ext/respToFileWriter.c b/src/ext/respToFileWriter.c index cb61c80..e0eda15 100644 --- a/src/ext/respToFileWriter.c +++ b/src/ext/respToFileWriter.c @@ -1,5 +1,5 @@ #include -#include "common.h" +#include "extCommon.h" #include #include diff --git a/src/ext/respToRedisLoader.c b/src/ext/respToRedisLoader.c index 4d7d960..fd84912 100644 --- a/src/ext/respToRedisLoader.c +++ b/src/ext/respToRedisLoader.c @@ -6,7 +6,7 @@ #include #include #include -#include "common.h" +#include "extCommon.h" #include "readerResp.h" #ifdef USE_OPENSSL @@ -71,10 +71,11 @@ static int onReadRepliesErrorCb(void *context, char *msg) { (strstr(msg, "not found"))) /* error includes "not found" */ return 0; /* mask error */ + char buf[9]; RDB_reportError(ctx->p, (RdbRes) RDBX_ERR_RESP_WRITE, "\nerror from dst '-%s' on key '%s' on command '%s' (RESP Command #%zu)\n", msg, - ctx->pendingCmds.key[currIdx], + __RDB_key(ctx->p, ctx->pendingCmds.key[currIdx], buf), ctx->pendingCmds.cmd[currIdx], ctx->respReader.countReplies); diff --git a/src/lib/parser.c b/src/lib/parser.c index 9494739..362ff17 100644 --- a/src/lib/parser.c +++ b/src/lib/parser.c @@ -33,6 +33,7 @@ #include "../../deps/redis/lzf.h" #include "../../deps/redis/stream.h" #include "../../deps/redis/t_zset.h" +#include "../../deps/redis/sha256.h" #define DONE_FILL_BULK SIZE_MAX @@ -183,6 +184,23 @@ static RdbStatus readRdbFromReader(RdbParser *p, size_t len, AllocTypeRq type, c static RdbStatus readRdbFromBuff(RdbParser *p, size_t len, AllocTypeRq type, char *refBuf, BulkInfo **binfo); static RdbStatus readRdbWaitMoreDataDbg(RdbParser *p, size_t len, AllocTypeRq type, char *refBuf, BulkInfo **binfo); +/*** hidden LIB API function (not declared in librdb-api.h) ***/ + +_LIBRDB_API char *__RDB_key(RdbParser *p, char *key, char buf[9]) { + if (!(p->hideKeysInLog)) return key; + + BYTE hash[SHA256_BLOCK_SIZE]; + SHA256_CTX ctx; + sha256_init(&ctx); + sha256_update(&ctx, (unsigned char*) key, strlen(key)); + sha256_final(&ctx, hash); + + for (int i = 0; i < 4; i++) + snprintf(buf + (i * 2), 3, "%02x", hash[i]); + buf[8] = '\0'; + return buf; +} + /*** LIB API functions ***/ _LIBRDB_API RdbParser *RDB_createParserRdb(RdbMemAlloc *memAlloc) { @@ -215,6 +233,7 @@ _LIBRDB_API RdbParser *RDB_createParserRdb(RdbMemAlloc *memAlloc) { p->errorMsgAt = 0; p->appCbCtx.numBulks = 0; p->loggerCb = loggerCbDefault; + p->hideKeysInLog = 0; p->logLevel = RDB_LOG_DBG; p->maxRawSize = SIZE_MAX; p->errorCode = RDB_OK; @@ -428,6 +447,10 @@ _LIBRDB_API int64_t RDB_getNumItemsHint(RdbParser *p) { return p->elmCtx.key.numItemsHint; } +_LIBRDB_API void RDB_hideKeysInLog(RdbParser *p) { + p->hideKeysInLog = 1; +} + _LIBRDB_API void RDB_setLogLevel(RdbParser *p, RdbLogLevel l) { p->logLevel = l; } @@ -872,11 +895,11 @@ static RdbStatus finalizeConfig(RdbParser *p, int isParseFromBuff) { } static void printParserState(RdbParser *p) { - RDB_log(p, RDB_LOG_ERR, "Parser error message: %s", RDB_getErrorMessage(p)); RDB_log(p, RDB_LOG_ERR, "Parser error code: %d", RDB_getErrorCode(p)); RDB_log(p, RDB_LOG_ERR, "Parser element func name: %s(state=%d)", peInfo[p->parsingElement].funcname, p->elmCtx.state); RDB_log(p, RDB_LOG_ERR, "Parsed opcode: %d", p->currOpcode); + RDB_log(p, RDB_LOG_ERR, "Parser 'bytesRead': %zu", p->bytesRead); } static void loggerCbDefault(RdbLogLevel l, const char *msg) { @@ -1562,6 +1585,8 @@ RdbStatus elementEndKey(RdbParser *p) { /*** ENTER SAFE STATE ***/ CALL_HANDLERS_CB_NO_ARGS(p, NOP, p->elmCtx.key.handleByLevel, common.handleEndKey); + p->elmCtx.key.info.dataType = RDB_DATA_TYPE_MAX; + return nextParsingElement(p, PE_NEXT_RDB_TYPE); } diff --git a/src/lib/parser.h b/src/lib/parser.h index 1b65a35..fefcf02 100644 --- a/src/lib/parser.h +++ b/src/lib/parser.h @@ -368,6 +368,7 @@ struct RdbParser { int ignoreChecksum; CrcFunc crcFunc; RdbLoggerCB loggerCb; + int hideKeysInLog; RdbLogLevel logLevel; size_t maxRawSize; diff --git a/test/test_main.c b/test/test_main.c index 8bc4bee..973efaa 100644 --- a/test/test_main.c +++ b/test/test_main.c @@ -142,6 +142,8 @@ RdbRes handle_start_rdb_report_long_errors(RdbParser *p, void *userData, int rdb return 1001; /* This value will be eventually returned as the error code */ } +/* Test a faulty parser handler that reports on 999 errors. Only the first + * errors are reported, and the last one is kept as well. */ static void test_report_long_error(void **state) { RdbStatus status; UNUSED(state); diff --git a/test/test_rdb_to_redis.c b/test/test_rdb_to_redis.c index 04a444c..fc5ac4b 100644 --- a/test/test_rdb_to_redis.c +++ b/test/test_rdb_to_redis.c @@ -415,6 +415,62 @@ static void test_rdb_to_redis_del_before_write(void **state) { } } +/* This test verifies the behavior of the RDB parser when the `hideKeysInLog` + * option is set. Specifically, it ensures that keys in error messages are + * replaced with the first 8 hex digits of their SHA256 hash, rather than being + * logged directly. + */ +static void test_rdb_to_redis_hide_keys_in_log(void **state) { + UNUSED(state); + RdbParser *p; + RdbStatus status; + + RdbxToRespConf rdb2respConf = { + .delKeyBeforeWrite = 0, + .funcLibReplaceIfExist=0, + .supportRestore = 1, + .dstRedisVersion = getTargetRedisVersion(NULL, NULL), + }; + + /* create key that goanna appear as well in the RDB file */ + sendRedisCmd("set mylist27 1", REDIS_REPLY_STATUS, NULL); + + /* RDB to TCP */ + RdbxToResp *rdbToResp; + p = RDB_createParserRdb(NULL); + + /* hide keys in log */ + RDB_hideKeysInLog(p); + + RDB_setLogger(p, dummyLogger); + assert_non_null(RDBX_createReaderFile(p, DUMP_FOLDER("100_lists.rdb"))); + assert_non_null(rdbToResp = RDBX_createHandlersToResp(p, &rdb2respConf)); + + assert_non_null(RDBX_createRespToRedisTcp(p, + rdbToResp, + NULL, + "127.0.0.1", + getRedisPort())); + + while ((status = RDB_parse(p)) == RDB_STATUS_WAIT_MORE_DATA); + + /* verify returned error code. Verify error message. */ + assert_int_equal(status, RDB_STATUS_ERROR); + assert_int_equal(RDB_getErrorCode(p), RDBX_ERR_RESP_WRITE); + + /* Expected to print first 8 hex digits of SHA256(key) instead of the key + * itself. To eval via bash apply: + * > echo -n "mylist27" | sha256sum | cut -c 1-8 + */ + printf("%s\n", RDB_getErrorMessage(p)); + assert_non_null(strstr(RDB_getErrorMessage(p), "0bdab52c")); /* sha256("mylist27") */ + + /* Verify that the key is not in the log */ + assert_null(strstr(RDB_getErrorMessage(p), "mylist27")); + + RDB_deleteParser(p); +} + /* Load "function.rdb" more than once. If 'funcLibReplaceIfExist' is not set, then * expected to fail */ static void test_rdb_to_redis_func_lib_replace_if_exist(void **state) { @@ -520,6 +576,7 @@ int group_rdb_to_redis(void) { cmocka_unit_test_setup(test_rdb_to_redis_multiple_lists_strings, setupTest), cmocka_unit_test_setup(test_rdb_to_redis_multiple_lists_strings_pipeline_depth_1, setupTest), cmocka_unit_test_setup(test_rdb_to_redis_del_before_write, setupTest), + cmocka_unit_test_setup(test_rdb_to_redis_hide_keys_in_log, setupTest), cmocka_unit_test_setup(test_rdb_to_redis_multiple_dbs, setupTest), cmocka_unit_test_setup(test_rdb_to_redis_function, setupTest), cmocka_unit_test_setup(test_rdb_to_redis_func_lib_replace_if_exist, setupTest),