Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(clp-s): json to irv2 #657

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 102 additions & 1 deletion components/core/src/clp_s/CommandLineArguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,13 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
std::cerr << " c - compress" << std::endl;
std::cerr << " x - decompress" << std::endl;
std::cerr << " s - search" << std::endl;
std::cerr << " r - JSON to IR Format" << std::endl;
std::cerr << std::endl;
std::cerr << "Try "
<< " c --help OR"
<< " x --help OR"
<< " s --help for command-specific details." << std::endl;
<< " s --help OR"
<< " r --help for command-specific details." << std::endl;

po::options_description visible_options;
visible_options.add(general_options);
Expand All @@ -125,6 +127,7 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
case (char)Command::Compress:
case (char)Command::Extract:
case (char)Command::Search:
case (char)Command::JsonToIr:
m_command = (Command)command_input;
break;
default:
Expand Down Expand Up @@ -727,6 +730,100 @@ CommandLineArguments::parse_arguments(int argc, char const** argv) {
"The --count-by-time and --count options are mutually exclusive."
);
}
} else if ((char)Command::JsonToIr == command_input) {
po::options_description compression_positional_options;
// clang-format off
compression_positional_options.add_options()(
"ir-dir",
po::value<std::string>(&m_archives_dir)->value_name("DIR"),
"output directory"
)(
"input-paths",
po::value<std::vector<std::string>>(&m_file_paths)->value_name("PATHS"),
"input paths"
);
// clang-format on

po::options_description compression_options("Compression options");
std::string input_path_list_file_path;
// clang-format off
compression_options.add_options()(
"compression-level",
po::value<int>(&m_compression_level)->value_name("LEVEL")->
default_value(m_compression_level),
"1 (fast/low compression) to 9 (slow/high compression)."
)(
"max-document-size",
po::value<size_t>(&m_max_document_size)->value_name("DOC_SIZE")->
default_value(m_max_document_size),
"Maximum allowed size (B) for a single document before ir generation fails."
)(
"max-ir-buffer-size",
po::value<size_t>(&m_max_ir_buffer_size)->value_name("BUFFER_SIZE")->
default_value(m_max_ir_buffer_size),
"Maximum allowed size (B) for an in memory IR buffer befroe being written to file."
)(
Comment on lines +762 to +765
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add input validation for maximum IR buffer size.

The max-ir-buffer-size should be validated to ensure it is greater than zero to prevent potential division by zero or other unexpected behaviours.

Apply this diff to add input validation:

+                if (0 == m_max_ir_buffer_size) {
+                    throw std::invalid_argument("max-ir-buffer-size must be greater than zero.");
+                }

Committable suggestion skipped: line range outside the PR's diff.

"encoding-type",
po::value<int>(&m_encoding_type)->value_name("ENCODING_TYPE")->
default_value(m_encoding_type),
"4 (four byte encoding) or 8 (eight byte encoding)"
Comment on lines +768 to +769
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add input validation for encoding type.

The encoding-type parameter should be validated to ensure that it is either 4 or 8, preventing unexpected behaviour or errors when invalid values are provided.

Apply this diff to add input validation:

+                if (m_encoding_type != 4 && m_encoding_type != 8) {
+                    throw std::invalid_argument("encoding-type must be either 4 or 8.");
+                }

Committable suggestion skipped: line range outside the PR's diff.

)(
"files-from,f",
po::value<std::string>(&input_path_list_file_path)
->value_name("FILE")
->default_value(input_path_list_file_path),
"Compress files specified in FILE"
);
// clang-format on

po::positional_options_description positional_options;
positional_options.add("ir-dir", 1);
positional_options.add("input-paths", -1);

po::options_description all_compression_options;
all_compression_options.add(compression_options);
all_compression_options.add(compression_positional_options);

std::vector<std::string> unrecognized_options
= po::collect_unrecognized(parsed.options, po::include_positional);
unrecognized_options.erase(unrecognized_options.begin());
po::store(
po::command_line_parser(unrecognized_options)
.options(all_compression_options)
.positional(positional_options)
.run(),
parsed_command_line_options
);
po::notify(parsed_command_line_options);

if (parsed_command_line_options.count("help")) {
print_json_to_ir_usage();

std::cerr << "Examples:\n";
std::cerr << " # Parse file1.json and dir1 into irs-dir\n";
std::cerr << " " << m_program_name << " r irs-dir file1.json dir1\n";

po::options_description visible_options;
visible_options.add(general_options);
visible_options.add(compression_options);
std::cerr << visible_options << '\n';
return ParsingResult::InfoCommand;
}

if (m_archives_dir.empty()) {
throw std::invalid_argument("No IRs directory specified.");
}

if (false == input_path_list_file_path.empty()) {
if (false == read_paths_from_file(input_path_list_file_path, m_file_paths)) {
SPDLOG_ERROR("Failed to read paths from {}", input_path_list_file_path);
return ParsingResult::Failure;
}
}

if (m_file_paths.empty()) {
throw std::invalid_argument("No input paths specified.");
}
}
} catch (std::exception& e) {
SPDLOG_ERROR("{}", e.what());
Expand Down Expand Up @@ -834,6 +931,10 @@ void CommandLineArguments::print_decompression_usage() const {
std::cerr << "Usage: " << m_program_name << " x [OPTIONS] ARCHIVES_DIR OUTPUT_DIR" << std::endl;
}

void CommandLineArguments::print_json_to_ir_usage() const {
std::cerr << "Usage: " << m_program_name << " r [OPTIONS] IRS_DIR [FILE/DIR ...]\n";
}

void CommandLineArguments::print_search_usage() const {
std::cerr << "Usage: " << m_program_name
<< " s [OPTIONS] ARCHIVES_DIR KQL_QUERY"
Expand Down
11 changes: 10 additions & 1 deletion components/core/src/clp_s/CommandLineArguments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ class CommandLineArguments {
enum class Command : char {
Compress = 'c',
Extract = 'x',
Search = 's'
Search = 's',
JsonToIr = 'r'
};

enum class OutputHandlerType : uint8_t {
Expand Down Expand Up @@ -65,6 +66,10 @@ class CommandLineArguments {

size_t get_max_document_size() const { return m_max_document_size; }

[[nodiscard]] auto get_max_ir_buffer_size() const -> size_t { return m_max_ir_buffer_size; }

[[nodiscard]] auto get_encoding_type() const -> int { return m_encoding_type; }

[[nodiscard]] bool print_archive_stats() const { return m_print_archive_stats; }

std::string const& get_mongodb_uri() const { return m_mongodb_uri; }
Expand Down Expand Up @@ -170,6 +175,8 @@ class CommandLineArguments {

void print_decompression_usage() const;

void print_json_to_ir_usage() const;

void print_search_usage() const;

// Variables
Expand All @@ -192,6 +199,8 @@ class CommandLineArguments {
size_t m_minimum_table_size{1ULL * 1024 * 1024}; // 1 MB
bool m_disable_log_order{false};
FileType m_file_type{FileType::Json};
int m_encoding_type{8};
size_t m_max_ir_buffer_size{512ULL * 1024 * 1024};

// Metadata db variables
std::optional<clp::GlobalMetadataDBConfig> m_metadata_db_config;
Expand Down
9 changes: 9 additions & 0 deletions components/core/src/clp_s/JsonParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ struct JsonParserOption {
std::shared_ptr<clp::GlobalMySQLMetadataDB> metadata_db;
};

struct JsonToIrParserOption {
std::vector<std::string> file_paths;
std::string irs_dir;
size_t max_document_size;
size_t max_ir_buffer_size;
int compression_level;
int encoding;
};

class JsonParser {
public:
class OperationFailed : public TraceableException {
Expand Down
Loading
Loading