From 5382e8099de924e6de0779c905e590f72c97cc9e Mon Sep 17 00:00:00 2001 From: Abigail Matthews Date: Mon, 18 Nov 2024 15:42:20 -0500 Subject: [PATCH] Adding end to end test case for clp-s --- components/core/CMakeLists.txt | 54 ++++++++- components/core/tests/test-end_to_end.cpp | 112 ++++++++++++++++++ .../test_log_files/test_no_floats_sorted.json | 4 + .../tests/test_log_files/test_sorted.json | 4 + 4 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 components/core/tests/test-end_to_end.cpp create mode 100644 components/core/tests/test_log_files/test_no_floats_sorted.json create mode 100644 components/core/tests/test_log_files/test_sorted.json diff --git a/components/core/CMakeLists.txt b/components/core/CMakeLists.txt index e5c9b06c8..d4f9ff918 100644 --- a/components/core/CMakeLists.txt +++ b/components/core/CMakeLists.txt @@ -241,6 +241,42 @@ add_subdirectory(src/clp_s) add_subdirectory(src/reducer) set(SOURCE_FILES_clp_s_unitTest + src/clp_s/ArchiveReader.cpp + src/clp_s/ArchiveReader.hpp + src/clp_s/ArchiveWriter.cpp + src/clp_s/ArchiveWriter.hpp + src/clp_s/ColumnReader.cpp + src/clp_s/ColumnReader.hpp + src/clp_s/ColumnWriter.cpp + src/clp_s/ColumnWriter.hpp + src/clp_s/DictionaryEntry.cpp + src/clp_s/DictionaryEntry.hpp + src/clp_s/DictionaryWriter.cpp + src/clp_s/DictionaryWriter.hpp + src/clp_s/FileReader.cpp + src/clp_s/FileReader.hpp + src/clp_s/FileWriter.cpp + src/clp_s/FileWriter.hpp + src/clp_s/JsonConstructor.cpp + src/clp_s/JsonConstructor.hpp + src/clp_s/JsonFileIterator.cpp + src/clp_s/JsonFileIterator.hpp + src/clp_s/JsonParser.cpp + src/clp_s/JsonParser.hpp + src/clp_s/PackedStreamReader.cpp + src/clp_s/PackedStreamReader.hpp + src/clp_s/ReaderUtils.cpp + src/clp_s/ReaderUtils.hpp + src/clp_s/Schema.cpp + src/clp_s/Schema.hpp + src/clp_s/SchemaMap.cpp + src/clp_s/SchemaMap.hpp + src/clp_s/SchemaReader.cpp + src/clp_s/SchemaReader.hpp + src/clp_s/SchemaTree.cpp + src/clp_s/SchemaTree.hpp + src/clp_s/SchemaWriter.cpp + src/clp_s/SchemaWriter.hpp src/clp_s/search/AndExpr.cpp src/clp_s/search/AndExpr.hpp src/clp_s/search/BooleanLiteral.cpp @@ -273,11 +309,24 @@ set(SOURCE_FILES_clp_s_unitTest src/clp_s/search/StringLiteral.hpp src/clp_s/search/Transformation.hpp src/clp_s/search/Value.hpp - src/clp_s/SchemaTree.hpp + src/clp_s/TimestampDictionaryReader.cpp + src/clp_s/TimestampDictionaryReader.hpp + src/clp_s/TimestampDictionaryWriter.cpp + src/clp_s/TimestampDictionaryWriter.hpp + src/clp_s/TimestampEntry.cpp + src/clp_s/TimestampEntry.hpp src/clp_s/TimestampPattern.cpp src/clp_s/TimestampPattern.hpp src/clp_s/Utils.cpp src/clp_s/Utils.hpp + src/clp_s/VariableDecoder.cpp + src/clp_s/VariableDecoder.hpp + src/clp_s/VariableEncoder.cpp + src/clp_s/VariableEncoder.hpp + src/clp_s/ZstdCompressor.cpp + src/clp_s/ZstdCompressor.hpp + src/clp_s/ZstdDecompressor.cpp + src/clp_s/ZstdDecompressor.hpp ) set(SOURCE_FILES_unitTest @@ -501,6 +550,7 @@ set(SOURCE_FILES_unitTest tests/test-BufferedFileReader.cpp tests/test-EncodedVariableInterpreter.cpp tests/test-encoding_methods.cpp + tests/test-end_to_end.cpp tests/test-ffi_IrUnitHandlerInterface.cpp tests/test-ffi_KeyValuePairLogEvent.cpp tests/test-ffi_SchemaTree.cpp @@ -542,6 +592,8 @@ target_link_libraries(unitTest log_surgeon::log_surgeon LibArchive::LibArchive MariaDBClient::MariaDBClient + ${MONGOCXX_TARGET} + simdjson spdlog::spdlog OpenSSL::Crypto ${sqlite_LIBRARY_DEPENDENCIES} diff --git a/components/core/tests/test-end_to_end.cpp b/components/core/tests/test-end_to_end.cpp new file mode 100644 index 000000000..c2192f9ce --- /dev/null +++ b/components/core/tests/test-end_to_end.cpp @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include "../src/clp/BufferReader.hpp" +#include "../src/clp/ffi/ir_stream/decoding_methods.hpp" +#include "../src/clp/ffi/ir_stream/Deserializer.hpp" +#include "../src/clp/ffi/ir_stream/Serializer.hpp" +#include "../src/clp/ffi/KeyValuePairLogEvent.hpp" +#include "../src/clp/ir/types.hpp" +#include "../src/clp/time_types.hpp" +#include "../src/clp_s/JsonConstructor.hpp" +#include "../src/clp_s/JsonParser.hpp" + +using clp::BufferReader; +using clp::ffi::ir_stream::Deserializer; +using clp::ffi::ir_stream::IRErrorCode; +using clp::ffi::ir_stream::Serializer; +using clp::ffi::KeyValuePairLogEvent; +using clp::ir::eight_byte_encoded_variable_t; +using clp::ir::four_byte_encoded_variable_t; +using clp::size_checked_pointer_cast; +using clp::UtcOffset; +using std::string; +using std::string_view; +using std::vector; + +auto const cDefaultTargetEncodedSize = 8ULL * 1024 * 1024 * 1024; +auto const cDefaultMaxDocumentSize = 512ULL * 1024 * 1024; +auto const cDefaultMinTableSize = 1ULL * 1024 * 1024; +auto const cDeaultCompressionLevel = 3; +auto const cDefaultPrintArchiveStats = false; +auto const cDefaultStructurizeArrays = false; + +namespace { +auto get_test_input_path_relative_to_tests_dir() -> std::filesystem::path { + return std::filesystem::path{"test_log_files"} / "test_no_floats_sorted.json"; +} + +auto get_test_input_local_path() -> std::string { + std::filesystem::path const current_file_path{__FILE__}; + auto const tests_dir{current_file_path.parent_path()}; + return (tests_dir / get_test_input_path_relative_to_tests_dir()).string(); +} +} // namespace + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +TEMPLATE_TEST_CASE( + "clp-s_compression_and_extraction_no_floats", + "[clp-s][end-to-end]", + four_byte_encoded_variable_t, + eight_byte_encoded_variable_t +) { + std::filesystem::remove_all("test-end-to-end-archive"); + std::filesystem::remove_all("test-end-to-end-out"); + std::filesystem::remove("test-end-to-end_sorted.json"); + std::filesystem::remove("diff_out.txt"); + + std::filesystem::create_directory("test-end-to-end-archive"); + REQUIRE(std::filesystem::is_directory("test-end-to-end-archive")); + + clp_s::JsonParserOption parser_option{}; + parser_option.file_paths.push_back(get_test_input_local_path()); + parser_option.archives_dir = "test-end-to-end-archive"; + parser_option.target_encoded_size = cDefaultTargetEncodedSize; + parser_option.max_document_size = cDefaultMaxDocumentSize; + parser_option.min_table_size = cDefaultMinTableSize; + parser_option.compression_level = cDeaultCompressionLevel; + parser_option.print_archive_stats = cDefaultPrintArchiveStats; + parser_option.structurize_arrays = cDefaultStructurizeArrays; + + clp_s::JsonParser parser(parser_option); + REQUIRE(parser.parse()); + parser.store(); + + REQUIRE(false == std::filesystem::is_empty("test-end-to-end-archive")); + + std::filesystem::create_directory("test-end-to-end-out"); + REQUIRE(std::filesystem::is_directory("test-end-to-end-out")); + + clp_s::JsonConstructorOption constructor_option{}; + constructor_option.output_dir = "test-end-to-end-out"; + constructor_option.ordered = false; + constructor_option.archives_dir = parser_option.archives_dir; + constructor_option.ordered_chunk_size = 0; + for (auto const& entry : std::filesystem::directory_iterator(constructor_option.archives_dir)) { + if (false == entry.is_directory()) { + // Skip non-directories + continue; + } + + constructor_option.archive_id = entry.path().filename(); + clp_s::JsonConstructor constructor(constructor_option); + constructor.store(); + } + + REQUIRE(std::filesystem::exists("test-end-to-end-out/original")); + + std::system("jq -S -c '.' test-end-to-end-out/original | sort > test-end-to-end_sorted.json"); + + REQUIRE(false == std::filesystem::is_empty("test-end-to-end_sorted.json")); + + std::string const command = "diff -u test-end-to-end_sorted.json " + get_test_input_local_path() + + " > diff_out.txt"; + std::system(command.c_str()); + REQUIRE(std::filesystem::is_empty("diff_out.txt")); +} diff --git a/components/core/tests/test_log_files/test_no_floats_sorted.json b/components/core/tests/test_log_files/test_no_floats_sorted.json new file mode 100644 index 000000000..db4a54fea --- /dev/null +++ b/components/core/tests/test_log_files/test_no_floats_sorted.json @@ -0,0 +1,4 @@ +{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true} +{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true} +{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"non_empty_object2":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true} +{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_object":{},"false":false,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"nonempty_array":[1,2,3,4,5],"null":null,"string":"short_string","true":true} diff --git a/components/core/tests/test_log_files/test_sorted.json b/components/core/tests/test_log_files/test_sorted.json new file mode 100644 index 000000000..0ac463d7b --- /dev/null +++ b/components/core/tests/test_log_files/test_sorted.json @@ -0,0 +1,4 @@ +{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true} +{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true} +{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"nonempty_object":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"non_empty_object2":{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_array":[],"empty_object":{},"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true},"null":null,"string":"short_string","true":true} +{"clp_string":"uid=0, CPU usage:99.99%, \"user_name\"=YScope","empty_object":{},"false":false,"float_neg":-1.01,"float_pos":1.01,"float_zero":0.0,"int16_max":32767,"int16_min":-32768,"int32_max":2147483647,"int32_min":-2147483648,"int64_max":0,"int64_min":1,"int8_max":127,"int8_min":-128,"nonempty_array":[1,2,3,4,5],"null":null,"string":"short_string","true":true}