Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test(clp-s): Add end-to-end test case for compression and extraction. #595

Merged
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
5382e80
Adding end to end test case for clp-s
AVMatthews Nov 18, 2024
acce22c
Add required jq install
AVMatthews Nov 18, 2024
ad187f7
Additional error checking and small modification to test input files
AVMatthews Nov 19, 2024
692d8d8
Add install and check for diff command
AVMatthews Nov 19, 2024
8f76f0a
Adding extra debug prints to help solve ubuntu build problems
AVMatthews Nov 19, 2024
5486700
More debug prints
AVMatthews Nov 19, 2024
df6122c
change input files only include max int supported by jq
AVMatthews Nov 19, 2024
e1b636d
Remove debug prints
AVMatthews Nov 19, 2024
7a21d85
remove magic values, add addtional file cleanup, test on structurized…
AVMatthews Nov 20, 2024
508c39b
Merge branch 'y-scope:main' into End-to-End---CLP-S-Unit-Testing
AVMatthews Nov 20, 2024
a039bf9
Fix compilation error form merging msot recent commit form main, remo…
AVMatthews Nov 20, 2024
ad18256
brace init, move variables, doc string, command construction
AVMatthews Nov 22, 2024
ea375b2
remove std::format due to lack of support
AVMatthews Nov 22, 2024
90c6cef
remove format include
AVMatthews Nov 22, 2024
1a16528
Merge branch 'y-scope:main' into End-to-End---CLP-S-Unit-Testing
AVMatthews Nov 22, 2024
01ca9ca
fmt:format for command string building
AVMatthews Nov 22, 2024
03d6510
restructure into compress, extract, compare
AVMatthews Nov 29, 2024
c362b22
small declaration/assignment changes
AVMatthews Nov 29, 2024
d013e46
remove no lint line and change type to auto
AVMatthews Nov 30, 2024
1d187b3
update jsonl test file
AVMatthews Nov 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 53 additions & 1 deletion components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,42 @@ add_subdirectory(src/clp_s)
add_subdirectory(src/reducer)

set(SOURCE_FILES_clp_s_unitTest
src/clp_s/ArchiveReader.cpp
src/clp_s/ArchiveReader.hpp
src/clp_s/ArchiveWriter.cpp
src/clp_s/ArchiveWriter.hpp
src/clp_s/ColumnReader.cpp
src/clp_s/ColumnReader.hpp
src/clp_s/ColumnWriter.cpp
src/clp_s/ColumnWriter.hpp
src/clp_s/DictionaryEntry.cpp
src/clp_s/DictionaryEntry.hpp
src/clp_s/DictionaryWriter.cpp
src/clp_s/DictionaryWriter.hpp
src/clp_s/FileReader.cpp
src/clp_s/FileReader.hpp
src/clp_s/FileWriter.cpp
src/clp_s/FileWriter.hpp
src/clp_s/JsonConstructor.cpp
src/clp_s/JsonConstructor.hpp
src/clp_s/JsonFileIterator.cpp
src/clp_s/JsonFileIterator.hpp
src/clp_s/JsonParser.cpp
src/clp_s/JsonParser.hpp
src/clp_s/PackedStreamReader.cpp
src/clp_s/PackedStreamReader.hpp
src/clp_s/ReaderUtils.cpp
src/clp_s/ReaderUtils.hpp
src/clp_s/Schema.cpp
src/clp_s/Schema.hpp
src/clp_s/SchemaMap.cpp
src/clp_s/SchemaMap.hpp
src/clp_s/SchemaReader.cpp
src/clp_s/SchemaReader.hpp
src/clp_s/SchemaTree.cpp
src/clp_s/SchemaTree.hpp
src/clp_s/SchemaWriter.cpp
src/clp_s/SchemaWriter.hpp
src/clp_s/search/AndExpr.cpp
src/clp_s/search/AndExpr.hpp
src/clp_s/search/BooleanLiteral.cpp
Expand Down Expand Up @@ -273,11 +309,24 @@ set(SOURCE_FILES_clp_s_unitTest
src/clp_s/search/StringLiteral.hpp
src/clp_s/search/Transformation.hpp
src/clp_s/search/Value.hpp
src/clp_s/SchemaTree.hpp
src/clp_s/TimestampDictionaryReader.cpp
src/clp_s/TimestampDictionaryReader.hpp
src/clp_s/TimestampDictionaryWriter.cpp
src/clp_s/TimestampDictionaryWriter.hpp
src/clp_s/TimestampEntry.cpp
src/clp_s/TimestampEntry.hpp
src/clp_s/TimestampPattern.cpp
src/clp_s/TimestampPattern.hpp
src/clp_s/Utils.cpp
src/clp_s/Utils.hpp
src/clp_s/VariableDecoder.cpp
src/clp_s/VariableDecoder.hpp
src/clp_s/VariableEncoder.cpp
src/clp_s/VariableEncoder.hpp
src/clp_s/ZstdCompressor.cpp
src/clp_s/ZstdCompressor.hpp
src/clp_s/ZstdDecompressor.cpp
src/clp_s/ZstdDecompressor.hpp
)

set(SOURCE_FILES_unitTest
Expand Down Expand Up @@ -499,6 +548,7 @@ set(SOURCE_FILES_unitTest
tests/LogSuppressor.hpp
tests/test-Array.cpp
tests/test-BufferedFileReader.cpp
tests/test-clp_s-end_to_end.cpp
tests/test-EncodedVariableInterpreter.cpp
tests/test-encoding_methods.cpp
tests/test-ffi_IrUnitHandlerInterface.cpp
Expand Down Expand Up @@ -542,6 +592,8 @@ target_link_libraries(unitTest
log_surgeon::log_surgeon
LibArchive::LibArchive
MariaDBClient::MariaDBClient
${MONGOCXX_TARGET}
simdjson
spdlog::spdlog
OpenSSL::Crypto
${sqlite_LIBRARY_DEPENDENCIES}
Expand Down
159 changes: 159 additions & 0 deletions components/core/tests/test-clp_s-end_to_end.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
#include <sys/wait.h>

#include <cstdlib>
#include <filesystem>
#include <string>
#include <string_view>
#include <vector>

#include <Catch2/single_include/catch2/catch.hpp>
#include <fmt/format.h>

#include "../src/clp_s/JsonConstructor.hpp"
#include "../src/clp_s/JsonParser.hpp"

constexpr std::string_view cTestEndToEndArchiveDirectory{"test-end-to-end-archive"};
constexpr std::string_view cTestEndToEndOutputDirectory{"test-end-to-end-out"};
constexpr std::string_view cTestEndToEndOutputSortedJson{"test-end-to-end_sorted.jsonl"};
constexpr std::string_view cTestEndToEndInputFileDirectory{"test_log_files"};
constexpr std::string_view cTestEndToEndInputFile{"test_no_floats_sorted.jsonl"};

namespace {
/**
* A class that deletes the directories and files created by test cases, both before and after each
* test case where the class is instantiated.
*/
class TestOutputCleaner {
public:
TestOutputCleaner() { delete_files(); }

~TestOutputCleaner() { delete_files(); }

// Delete copy & move constructors and assignment operators
TestOutputCleaner(TestOutputCleaner const&) = delete;
TestOutputCleaner(TestOutputCleaner&&) = delete;
auto operator=(TestOutputCleaner const&) -> TestOutputCleaner& = delete;
auto operator=(TestOutputCleaner&&) -> TestOutputCleaner& = delete;

private:
static void delete_files() {
std::filesystem::remove_all(cTestEndToEndArchiveDirectory);
std::filesystem::remove_all(cTestEndToEndOutputDirectory);
std::filesystem::remove(cTestEndToEndOutputSortedJson);
}
};

auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path;
auto get_test_input_local_path() -> std::string;
void compress(bool structurize_arrays);
auto extract() -> std::filesystem::path;
void compare(std::filesystem::path const& extracted_json_path);

auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path {
return std::filesystem::path{cTestEndToEndInputFileDirectory} / cTestEndToEndInputFile;
}

auto get_test_input_local_path() -> std::string {
std::filesystem::path const current_file_path{__FILE__};
auto const tests_dir{current_file_path.parent_path()};
return (tests_dir / get_test_input_path_relative_to_tests_dir()).string();
}

void compress(bool structurize_arrays) {
constexpr auto cDefaultTargetEncodedSize = 8ULL * 1024 * 1024 * 1024; // 8 GiB
constexpr auto cDefaultMaxDocumentSize = 512ULL * 1024 * 1024; // 512 MiB
constexpr auto cDefaultMinTableSize = 1ULL * 1024 * 1024; // 1 MiB
constexpr auto cDefaultCompressionLevel = 3;
constexpr auto cDefaultPrintArchiveStats = false;

std::filesystem::create_directory(cTestEndToEndArchiveDirectory);
REQUIRE((std::filesystem::is_directory(cTestEndToEndArchiveDirectory)));

clp_s::JsonParserOption parser_option{};
parser_option.file_paths.push_back(get_test_input_local_path());
parser_option.archives_dir = cTestEndToEndArchiveDirectory;
parser_option.target_encoded_size = cDefaultTargetEncodedSize;
parser_option.max_document_size = cDefaultMaxDocumentSize;
parser_option.min_table_size = cDefaultMinTableSize;
parser_option.compression_level = cDefaultCompressionLevel;
parser_option.print_archive_stats = cDefaultPrintArchiveStats;
parser_option.structurize_arrays = structurize_arrays;

clp_s::JsonParser parser{parser_option};
REQUIRE(parser.parse());
parser.store();

AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
REQUIRE((false == std::filesystem::is_empty(cTestEndToEndArchiveDirectory)));
}

auto extract() -> std::filesystem::path {
constexpr auto cDefaultOrdered = false;
constexpr auto cDefaultTargetOrderedChunkSize = 0;

std::filesystem::create_directory(cTestEndToEndOutputDirectory);
REQUIRE(std::filesystem::is_directory(cTestEndToEndOutputDirectory));

clp_s::JsonConstructorOption constructor_option{};
constructor_option.archives_dir = cTestEndToEndArchiveDirectory;
constructor_option.output_dir = cTestEndToEndOutputDirectory;
constructor_option.ordered = cDefaultOrdered;
constructor_option.target_ordered_chunk_size = cDefaultTargetOrderedChunkSize;
for (auto const& entry : std::filesystem::directory_iterator(constructor_option.archives_dir)) {
if (false == entry.is_directory()) {
// Skip non-directories
continue;
}

constructor_option.archive_id = entry.path().filename();
clp_s::JsonConstructor constructor{constructor_option};
constructor.store();
}
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
std::filesystem::path extracted_json_path{cTestEndToEndOutputDirectory};
extracted_json_path /= "original";
REQUIRE(std::filesystem::exists(extracted_json_path));

return extracted_json_path;
}

// Silence the checks below since our use of `std::system` is safe in the context of testing.
// NOLINTBEGIN(cert-env33-c,concurrency-mt-unsafe)
void compare(std::filesystem::path const& extracted_json_path) {
int result{std::system("command -v jq >/dev/null 2>&1")};
REQUIRE((0 == result));
std::string command = fmt::format(
"jq --sort-keys --compact-output '.' {} | sort > {}",
extracted_json_path.string(),
cTestEndToEndOutputSortedJson
);
result = std::system(command.c_str());
REQUIRE((0 == result));

REQUIRE((false == std::filesystem::is_empty(cTestEndToEndOutputSortedJson)));

result = std::system("command -v diff >/dev/null 2>&1");
REQUIRE((0 == result));
command = fmt::format(
"diff --unified {} {} > /dev/null",
cTestEndToEndOutputSortedJson,
get_test_input_local_path()
);
result = std::system(command.c_str());
REQUIRE((true == WIFEXITED(result)));
REQUIRE((0 == WEXITSTATUS(result)));
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
}

// NOLINTEND(cert-env33-c,concurrency-mt-unsafe)
} // namespace

// NOLINTNEXTLINE(readability-function-cognitive-complexity)
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
TEST_CASE("clp-s-compress-extract-no-floats", "[clp-s][end-to-end]") {
auto structurize_arrays = GENERATE(true, false);

TestOutputCleaner const test_cleanup;

compress(structurize_arrays);

std::filesystem::path extracted_json_path = extract();
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved

compare(extracted_json_path);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"non_empty_object2":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"nonempty_array":[1,2,3,4,5],"null":null,"string":"short_string","true":true}
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ set -u

dnf install -y \
cmake \
diffutils \
gcc-c++ \
git \
java-11-openjdk \
jq \
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
libarchive-devel \
libcurl-devel \
libzstd-devel \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
gcc \
gcc-10 \
git \
jq \
libcurl4 \
libcurl4-openssl-dev \
libmariadb-dev \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
curl \
build-essential \
git \
jq \
libboost-filesystem-dev \
libboost-iostreams-dev \
libboost-program-options-dev \
Expand Down
Loading