Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test(clp-s): Add end-to-end test case for compression and extraction. #595

Merged
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
5382e80
Adding end to end test case for clp-s
AVMatthews Nov 18, 2024
acce22c
Add required jq install
AVMatthews Nov 18, 2024
ad187f7
Additional error checking and small modification to test input files
AVMatthews Nov 19, 2024
692d8d8
Add install and check for diff command
AVMatthews Nov 19, 2024
8f76f0a
Adding extra debug prints to help solve ubuntu build problems
AVMatthews Nov 19, 2024
5486700
More debug prints
AVMatthews Nov 19, 2024
df6122c
change input files only include max int supported by jq
AVMatthews Nov 19, 2024
e1b636d
Remove debug prints
AVMatthews Nov 19, 2024
7a21d85
remove magic values, add addtional file cleanup, test on structurized…
AVMatthews Nov 20, 2024
508c39b
Merge branch 'y-scope:main' into End-to-End---CLP-S-Unit-Testing
AVMatthews Nov 20, 2024
a039bf9
Fix compilation error form merging msot recent commit form main, remo…
AVMatthews Nov 20, 2024
ad18256
brace init, move variables, doc string, command construction
AVMatthews Nov 22, 2024
ea375b2
remove std::format due to lack of support
AVMatthews Nov 22, 2024
90c6cef
remove format include
AVMatthews Nov 22, 2024
1a16528
Merge branch 'y-scope:main' into End-to-End---CLP-S-Unit-Testing
AVMatthews Nov 22, 2024
01ca9ca
fmt:format for command string building
AVMatthews Nov 22, 2024
03d6510
restructure into compress, extract, compare
AVMatthews Nov 29, 2024
c362b22
small declaration/assignment changes
AVMatthews Nov 29, 2024
d013e46
remove no lint line and change type to auto
AVMatthews Nov 30, 2024
1d187b3
update jsonl test file
AVMatthews Nov 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 53 additions & 1 deletion components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,42 @@ add_subdirectory(src/clp_s)
add_subdirectory(src/reducer)

set(SOURCE_FILES_clp_s_unitTest
src/clp_s/ArchiveReader.cpp
src/clp_s/ArchiveReader.hpp
src/clp_s/ArchiveWriter.cpp
src/clp_s/ArchiveWriter.hpp
src/clp_s/ColumnReader.cpp
src/clp_s/ColumnReader.hpp
src/clp_s/ColumnWriter.cpp
src/clp_s/ColumnWriter.hpp
src/clp_s/DictionaryEntry.cpp
src/clp_s/DictionaryEntry.hpp
src/clp_s/DictionaryWriter.cpp
src/clp_s/DictionaryWriter.hpp
src/clp_s/FileReader.cpp
src/clp_s/FileReader.hpp
src/clp_s/FileWriter.cpp
src/clp_s/FileWriter.hpp
src/clp_s/JsonConstructor.cpp
src/clp_s/JsonConstructor.hpp
src/clp_s/JsonFileIterator.cpp
src/clp_s/JsonFileIterator.hpp
src/clp_s/JsonParser.cpp
src/clp_s/JsonParser.hpp
src/clp_s/PackedStreamReader.cpp
src/clp_s/PackedStreamReader.hpp
src/clp_s/ReaderUtils.cpp
src/clp_s/ReaderUtils.hpp
src/clp_s/Schema.cpp
src/clp_s/Schema.hpp
src/clp_s/SchemaMap.cpp
src/clp_s/SchemaMap.hpp
src/clp_s/SchemaReader.cpp
src/clp_s/SchemaReader.hpp
src/clp_s/SchemaTree.cpp
src/clp_s/SchemaTree.hpp
src/clp_s/SchemaWriter.cpp
src/clp_s/SchemaWriter.hpp
src/clp_s/search/AndExpr.cpp
src/clp_s/search/AndExpr.hpp
src/clp_s/search/BooleanLiteral.cpp
Expand Down Expand Up @@ -273,11 +309,24 @@ set(SOURCE_FILES_clp_s_unitTest
src/clp_s/search/StringLiteral.hpp
src/clp_s/search/Transformation.hpp
src/clp_s/search/Value.hpp
src/clp_s/SchemaTree.hpp
src/clp_s/TimestampDictionaryReader.cpp
src/clp_s/TimestampDictionaryReader.hpp
src/clp_s/TimestampDictionaryWriter.cpp
src/clp_s/TimestampDictionaryWriter.hpp
src/clp_s/TimestampEntry.cpp
src/clp_s/TimestampEntry.hpp
src/clp_s/TimestampPattern.cpp
src/clp_s/TimestampPattern.hpp
src/clp_s/Utils.cpp
src/clp_s/Utils.hpp
src/clp_s/VariableDecoder.cpp
src/clp_s/VariableDecoder.hpp
src/clp_s/VariableEncoder.cpp
src/clp_s/VariableEncoder.hpp
src/clp_s/ZstdCompressor.cpp
src/clp_s/ZstdCompressor.hpp
src/clp_s/ZstdDecompressor.cpp
src/clp_s/ZstdDecompressor.hpp
)

set(SOURCE_FILES_unitTest
Expand Down Expand Up @@ -501,6 +550,7 @@ set(SOURCE_FILES_unitTest
tests/test-BufferedFileReader.cpp
tests/test-EncodedVariableInterpreter.cpp
tests/test-encoding_methods.cpp
tests/test-end_to_end.cpp
tests/test-ffi_IrUnitHandlerInterface.cpp
tests/test-ffi_KeyValuePairLogEvent.cpp
tests/test-ffi_SchemaTree.cpp
Expand Down Expand Up @@ -542,6 +592,8 @@ target_link_libraries(unitTest
log_surgeon::log_surgeon
LibArchive::LibArchive
MariaDBClient::MariaDBClient
${MONGOCXX_TARGET}
simdjson
spdlog::spdlog
OpenSSL::Crypto
${sqlite_LIBRARY_DEPENDENCIES}
Expand Down
119 changes: 119 additions & 0 deletions components/core/tests/test-end_to_end.cpp
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#include <cstdlib>
#include <filesystem>
#include <string>
#include <string_view>
#include <sys/wait.h>
#include <vector>

#include <Catch2/single_include/catch2/catch.hpp>

#include "../src/clp_s/JsonConstructor.hpp"
#include "../src/clp_s/JsonParser.hpp"

auto const cDefaultTargetEncodedSize = 8ULL * 1024 * 1024 * 1024; // 8 GB
auto const cDefaultMaxDocumentSize = 512ULL * 1024 * 1024; // 512 MB
auto const cDefaultMinTableSize = 1ULL * 1024 * 1024; // 1 MB
auto const cDefaultCompressionLevel = 3;
auto const cDefaultPrintArchiveStats = false;

constexpr char cTestEndToEndArchiveDirectory[] = "test-end-to-end-archive";
constexpr char cTestEndToEndOutputDirectory[] = "test-end-to-end-out";
constexpr char cTestEndToEndOutputSortedJson[] = "test-end-to-end_sorted.json";
constexpr char cTestEndToEndInputFileDirectory[] = "test_log_files";
constexpr char cTestEndToEndInputFile[] = "test_no_floats_sorted.json";

AVMatthews marked this conversation as resolved.
Show resolved Hide resolved

namespace {

AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
class Cleanup {
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
public:
Cleanup() {
std::filesystem::remove_all(cTestEndToEndArchiveDirectory);
std::filesystem::remove_all(cTestEndToEndOutputDirectory);
std::filesystem::remove(cTestEndToEndOutputSortedJson);
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
}
~Cleanup() {
std::filesystem::remove_all(cTestEndToEndArchiveDirectory);
std::filesystem::remove_all(cTestEndToEndOutputDirectory);
std::filesystem::remove(cTestEndToEndOutputSortedJson);
}

};

auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path {
return std::filesystem::path{cTestEndToEndInputFileDirectory} / cTestEndToEndInputFile;
}

auto get_test_input_local_path() -> std::string {
std::filesystem::path const current_file_path{__FILE__};
auto const tests_dir{current_file_path.parent_path()};
return (tests_dir / get_test_input_path_relative_to_tests_dir()).string();
}
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
}

// NOLINTNEXTLINE(readability-function-cognitive-complexity)
TEST_CASE("clp-s_compression_and_extraction_no_floats",
"[clp-s][end-to-end]") {

auto structurize_arrays = GENERATE(true, false);

Cleanup test_cleanup;

std::filesystem::create_directory(cTestEndToEndArchiveDirectory);
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
REQUIRE(std::filesystem::is_directory(cTestEndToEndArchiveDirectory));

AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
clp_s::JsonParserOption parser_option{};
parser_option.file_paths.push_back(get_test_input_local_path());
parser_option.archives_dir = cTestEndToEndArchiveDirectory;
parser_option.target_encoded_size = cDefaultTargetEncodedSize;
parser_option.max_document_size = cDefaultMaxDocumentSize;
parser_option.min_table_size = cDefaultMinTableSize;
parser_option.compression_level = cDefaultCompressionLevel;
parser_option.print_archive_stats = cDefaultPrintArchiveStats;
parser_option.structurize_arrays = structurize_arrays;

clp_s::JsonParser parser(parser_option);
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
REQUIRE(parser.parse());
parser.store();

REQUIRE(false == std::filesystem::is_empty(cTestEndToEndArchiveDirectory));
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved

std::filesystem::create_directory(cTestEndToEndOutputDirectory);
REQUIRE(std::filesystem::is_directory(cTestEndToEndOutputDirectory));

clp_s::JsonConstructorOption constructor_option{};
constructor_option.output_dir = cTestEndToEndOutputDirectory;
constructor_option.ordered = false;
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
constructor_option.archives_dir = parser_option.archives_dir;
constructor_option.ordered_chunk_size = 0;
for (auto const& entry : std::filesystem::directory_iterator(constructor_option.archives_dir)) {
if (false == entry.is_directory()) {
// Skip non-directories
continue;
}

constructor_option.archive_id = entry.path().filename();
clp_s::JsonConstructor constructor(constructor_option);
constructor.store();
}

std::string command = cTestEndToEndOutputDirectory;
command += "/original";
REQUIRE(std::filesystem::exists(command.c_str()));

int result = std::system("command -v jq >/dev/null 2>&1");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the two command invocations, do we need the 2>&1?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should redirect both stderr and stdout to /dev/null. I'm not sure if it's strictly necessary, but I don't think it is harmful.

AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
REQUIRE(0 == result);
command = "";
command = (((command += "jq -S -c '.' ") += cTestEndToEndOutputDirectory) += "/original | sort > ") += cTestEndToEndOutputSortedJson;
result = std::system(command.c_str());
REQUIRE(0 == result);

REQUIRE(false == std::filesystem::is_empty(cTestEndToEndOutputSortedJson));

result = std::system("command -v diff >/dev/null 2>&1");
REQUIRE(0 == result);
command = "";
command = ((((command +="diff -u ") += cTestEndToEndOutputSortedJson) += " ") += get_test_input_local_path()) += " > /dev/null";
result = std::system(command.c_str());
REQUIRE(0 == WEXITSTATUS(result));
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
}
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true}
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"non_empty_object2":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"nonempty_array":[1,2,3,4,5],"null":null,"string":"short_string","true":true}
4 changes: 4 additions & 0 deletions components/core/tests/test_log_files/test_sorted.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true}
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"non_empty_object2":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_object":{},"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"nonempty_array":[1,2,3,4,5],"null":null,"string":"short_string","true":true}
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ set -u

dnf install -y \
cmake \
diffutils \
gcc-c++ \
git \
java-11-openjdk \
jq \
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
libarchive-devel \
libcurl-devel \
libzstd-devel \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
checkinstall \
cmake \
curl \
diffutils \
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
g++ \
g++-10 \
gcc \
gcc-10 \
git \
jq \
libcurl4 \
libcurl4-openssl-dev \
libmariadb-dev \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
cmake \
curl \
build-essential \
diffutils \
git \
jq \
libboost-filesystem-dev \
libboost-iostreams-dev \
libboost-program-options-dev \
Expand Down
Loading