Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test(clp-s): Add end-to-end test case for compression and extraction. #595

Merged
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
5382e80
Adding end to end test case for clp-s
AVMatthews Nov 18, 2024
acce22c
Add required jq install
AVMatthews Nov 18, 2024
ad187f7
Additional error checking and small modification to test input files
AVMatthews Nov 19, 2024
692d8d8
Add install and check for diff command
AVMatthews Nov 19, 2024
8f76f0a
Adding extra debug prints to help solve ubuntu build problems
AVMatthews Nov 19, 2024
5486700
More debug prints
AVMatthews Nov 19, 2024
df6122c
change input files only include max int supported by jq
AVMatthews Nov 19, 2024
e1b636d
Remove debug prints
AVMatthews Nov 19, 2024
7a21d85
remove magic values, add addtional file cleanup, test on structurized…
AVMatthews Nov 20, 2024
508c39b
Merge branch 'y-scope:main' into End-to-End---CLP-S-Unit-Testing
AVMatthews Nov 20, 2024
a039bf9
Fix compilation error form merging msot recent commit form main, remo…
AVMatthews Nov 20, 2024
ad18256
brace init, move variables, doc string, command construction
AVMatthews Nov 22, 2024
ea375b2
remove std::format due to lack of support
AVMatthews Nov 22, 2024
90c6cef
remove format include
AVMatthews Nov 22, 2024
1a16528
Merge branch 'y-scope:main' into End-to-End---CLP-S-Unit-Testing
AVMatthews Nov 22, 2024
01ca9ca
fmt:format for command string building
AVMatthews Nov 22, 2024
03d6510
restructure into compress, extract, compare
AVMatthews Nov 29, 2024
c362b22
small declaration/assignment changes
AVMatthews Nov 29, 2024
d013e46
remove no lint line and change type to auto
AVMatthews Nov 30, 2024
1d187b3
update jsonl test file
AVMatthews Nov 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 53 additions & 1 deletion components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,42 @@ add_subdirectory(src/clp_s)
add_subdirectory(src/reducer)

set(SOURCE_FILES_clp_s_unitTest
src/clp_s/ArchiveReader.cpp
src/clp_s/ArchiveReader.hpp
src/clp_s/ArchiveWriter.cpp
src/clp_s/ArchiveWriter.hpp
src/clp_s/ColumnReader.cpp
src/clp_s/ColumnReader.hpp
src/clp_s/ColumnWriter.cpp
src/clp_s/ColumnWriter.hpp
src/clp_s/DictionaryEntry.cpp
src/clp_s/DictionaryEntry.hpp
src/clp_s/DictionaryWriter.cpp
src/clp_s/DictionaryWriter.hpp
src/clp_s/FileReader.cpp
src/clp_s/FileReader.hpp
src/clp_s/FileWriter.cpp
src/clp_s/FileWriter.hpp
src/clp_s/JsonConstructor.cpp
src/clp_s/JsonConstructor.hpp
src/clp_s/JsonFileIterator.cpp
src/clp_s/JsonFileIterator.hpp
src/clp_s/JsonParser.cpp
src/clp_s/JsonParser.hpp
src/clp_s/PackedStreamReader.cpp
src/clp_s/PackedStreamReader.hpp
src/clp_s/ReaderUtils.cpp
src/clp_s/ReaderUtils.hpp
src/clp_s/Schema.cpp
src/clp_s/Schema.hpp
src/clp_s/SchemaMap.cpp
src/clp_s/SchemaMap.hpp
src/clp_s/SchemaReader.cpp
src/clp_s/SchemaReader.hpp
src/clp_s/SchemaTree.cpp
src/clp_s/SchemaTree.hpp
src/clp_s/SchemaWriter.cpp
src/clp_s/SchemaWriter.hpp
src/clp_s/search/AndExpr.cpp
src/clp_s/search/AndExpr.hpp
src/clp_s/search/BooleanLiteral.cpp
Expand Down Expand Up @@ -273,11 +309,24 @@ set(SOURCE_FILES_clp_s_unitTest
src/clp_s/search/StringLiteral.hpp
src/clp_s/search/Transformation.hpp
src/clp_s/search/Value.hpp
src/clp_s/SchemaTree.hpp
src/clp_s/TimestampDictionaryReader.cpp
src/clp_s/TimestampDictionaryReader.hpp
src/clp_s/TimestampDictionaryWriter.cpp
src/clp_s/TimestampDictionaryWriter.hpp
src/clp_s/TimestampEntry.cpp
src/clp_s/TimestampEntry.hpp
src/clp_s/TimestampPattern.cpp
src/clp_s/TimestampPattern.hpp
src/clp_s/Utils.cpp
src/clp_s/Utils.hpp
src/clp_s/VariableDecoder.cpp
src/clp_s/VariableDecoder.hpp
src/clp_s/VariableEncoder.cpp
src/clp_s/VariableEncoder.hpp
src/clp_s/ZstdCompressor.cpp
src/clp_s/ZstdCompressor.hpp
src/clp_s/ZstdDecompressor.cpp
src/clp_s/ZstdDecompressor.hpp
)

set(SOURCE_FILES_unitTest
Expand Down Expand Up @@ -501,6 +550,7 @@ set(SOURCE_FILES_unitTest
tests/test-BufferedFileReader.cpp
tests/test-EncodedVariableInterpreter.cpp
tests/test-encoding_methods.cpp
tests/test-end_to_end.cpp
tests/test-ffi_IrUnitHandlerInterface.cpp
tests/test-ffi_KeyValuePairLogEvent.cpp
tests/test-ffi_SchemaTree.cpp
Expand Down Expand Up @@ -542,6 +592,8 @@ target_link_libraries(unitTest
log_surgeon::log_surgeon
LibArchive::LibArchive
MariaDBClient::MariaDBClient
${MONGOCXX_TARGET}
simdjson
spdlog::spdlog
OpenSSL::Crypto
${sqlite_LIBRARY_DEPENDENCIES}
Expand Down
127 changes: 127 additions & 0 deletions components/core/tests/test-end_to_end.cpp
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#include <sys/wait.h>

#include <cstdlib>
#include <filesystem>
#include <string>
#include <string_view>
#include <vector>
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved

#include <Catch2/single_include/catch2/catch.hpp>
#include <fmt/format.h>

#include "../src/clp_s/JsonConstructor.hpp"
#include "../src/clp_s/JsonParser.hpp"

constexpr std::string_view cTestEndToEndArchiveDirectory{"test-end-to-end-archive"};
constexpr std::string_view cTestEndToEndOutputDirectory{"test-end-to-end-out"};
constexpr std::string_view cTestEndToEndOutputSortedJson{"test-end-to-end_sorted.json"};
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
constexpr std::string_view cTestEndToEndInputFileDirectory{"test_log_files"};
constexpr std::string_view cTestEndToEndInputFile{"test_no_floats_sorted.json"};

namespace {
/**
* Class with no data members who's contruction and destruction is soley
* for cleanup up files and directorys created by the test case.
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
*/
class Cleanup {
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
public:
Cleanup() { delete_files(); }

AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
~Cleanup() { delete_files(); }

private:
static void delete_files() {
std::filesystem::remove_all(cTestEndToEndArchiveDirectory);
std::filesystem::remove_all(cTestEndToEndOutputDirectory);
std::filesystem::remove(cTestEndToEndOutputSortedJson);
}
};

auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path {
return std::filesystem::path{cTestEndToEndInputFileDirectory} / cTestEndToEndInputFile;
}

auto get_test_input_local_path() -> std::string {
std::filesystem::path const current_file_path{__FILE__};
auto const tests_dir{current_file_path.parent_path()};
return (tests_dir / get_test_input_path_relative_to_tests_dir()).string();
}
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
} // namespace

// NOLINTNEXTLINE(readability-function-cognitive-complexity)
TEST_CASE("clp-s_compression_and_extraction_no_floats", "[clp-s][end-to-end]") {
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
auto const default_target_encoded_size = 8ULL * 1024 * 1024 * 1024; // 8 GiB
auto const default_max_document_size = 512ULL * 1024 * 1024; // 512 MiB
auto const default_min_table_size = 1ULL * 1024 * 1024; // 1 MiB
auto const default_compression_level = 3;
auto const default_print_archive_stats = false;
auto const default_ordered = false;
auto const default_target_ordered_chunk_size = 0;
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
auto structurize_arrays = GENERATE(true, false);

Cleanup const test_cleanup;

std::filesystem::create_directory(cTestEndToEndArchiveDirectory);
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
REQUIRE(std::filesystem::is_directory(cTestEndToEndArchiveDirectory));

AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
clp_s::JsonParserOption parser_option{};
parser_option.file_paths.push_back(get_test_input_local_path());
parser_option.archives_dir = cTestEndToEndArchiveDirectory;
parser_option.target_encoded_size = default_target_encoded_size;
parser_option.max_document_size = default_max_document_size;
parser_option.min_table_size = default_min_table_size;
parser_option.compression_level = default_compression_level;
parser_option.print_archive_stats = default_print_archive_stats;
parser_option.structurize_arrays = structurize_arrays;

clp_s::JsonParser parser{parser_option};
REQUIRE(parser.parse());
parser.store();

REQUIRE(false == std::filesystem::is_empty(cTestEndToEndArchiveDirectory));
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved

std::filesystem::create_directory(cTestEndToEndOutputDirectory);
REQUIRE(std::filesystem::is_directory(cTestEndToEndOutputDirectory));

clp_s::JsonConstructorOption constructor_option{};
constructor_option.archives_dir = parser_option.archives_dir;
constructor_option.output_dir = cTestEndToEndOutputDirectory;
constructor_option.ordered = false;
constructor_option.target_ordered_chunk_size = 0;
for (auto const& entry : std::filesystem::directory_iterator(constructor_option.archives_dir)) {
if (false == entry.is_directory()) {
// Skip non-directories
continue;
}

constructor_option.archive_id = entry.path().filename();
clp_s::JsonConstructor constructor{constructor_option};
constructor.store();
}

std::filesystem::path extracted_json_path{cTestEndToEndOutputDirectory};
extracted_json_path /= "original";
REQUIRE(std::filesystem::exists(extracted_json_path));

int result = std::system("command -v jq >/dev/null 2>&1");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the two command invocations, do we need the 2>&1?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should redirect both stderr and stdout to /dev/null. I'm not sure if it's strictly necessary, but I don't think it is harmful.

AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
REQUIRE(0 == result);
std::string command = fmt::format(
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
"jq --sort-keys --compact-output '.' {}/original | sort > {}",
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
cTestEndToEndOutputDirectory,
cTestEndToEndOutputSortedJson
);
result = std::system(command.c_str());
REQUIRE(0 == result);

REQUIRE(false == std::filesystem::is_empty(cTestEndToEndOutputSortedJson));

result = std::system("command -v diff >/dev/null 2>&1");
REQUIRE(0 == result);
command = fmt::format(
"diff -u {} {} > /dev/null",
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
cTestEndToEndOutputSortedJson,
get_test_input_local_path()
);
result = std::system(command.c_str());
REQUIRE(0 == WEXITSTATUS(result));
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true}
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"non_empty_object2":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true}
{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max_supported":9223372036854776,"int64_min_supported":-9223372036854776,"int8_max":127,"int8_min":-128,"nonempty_array":[1,2,3,4,5],"null":null,"string":"short_string","true":true}
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ set -u

dnf install -y \
cmake \
diffutils \
gcc-c++ \
git \
java-11-openjdk \
jq \
AVMatthews marked this conversation as resolved.
Show resolved Hide resolved
libarchive-devel \
libcurl-devel \
libzstd-devel \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
gcc \
gcc-10 \
git \
jq \
libcurl4 \
libcurl4-openssl-dev \
libmariadb-dev \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
curl \
build-essential \
git \
jq \
libboost-filesystem-dev \
libboost-iostreams-dev \
libboost-program-options-dev \
Expand Down
Loading