-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add MySql support for storage backend #20
Changes from 12 commits
fc3f9e6
3e9df9f
6b3ab0c
0015645
b94ed8d
4b52fb4
59f8450
7240d4c
2660e3a
d6bef0f
e490e31
a1986e3
7db8925
6c68a95
8b84f98
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
#include <optional> | ||
#include <string> | ||
#include <utility> | ||
#include <vector> | ||
|
||
namespace spider::core { | ||
class Data { | ||
|
@@ -16,16 +17,33 @@ class Data { | |
init_id(); | ||
} | ||
|
||
Data(boost::uuids::uuid id, std::string key, std::string value) | ||
: m_id(id), | ||
m_key(std::move(key)), | ||
m_value(std::move(value)) {} | ||
|
||
[[nodiscard]] auto get_id() const -> boost::uuids::uuid { return m_id; } | ||
|
||
[[nodiscard]] auto get_key() const -> std::optional<std::string> { return m_key; } | ||
|
||
[[nodiscard]] auto get_value() const -> std::string { return m_value; } | ||
|
||
[[nodiscard]] auto get_locality() const -> std::vector<std::string> const& { | ||
return m_locality; | ||
} | ||
|
||
[[nodiscard]] auto is_hard_locality() const -> bool { return m_hard_locality; } | ||
|
||
void set_locality(std::vector<std::string>& locality) { m_locality = locality; } | ||
|
||
void set_hard_locality(bool hard) { m_hard_locality = hard; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Improve setter methods for better safety and clarity The current implementation has several potential improvements:
Consider this improvement: - void set_locality(std::vector<std::string>& locality) { m_locality = locality; }
+ void set_locality(std::vector<std::string> locality) { m_locality = std::move(locality); } Also, please add documentation comments explaining:
|
||
|
||
private: | ||
boost::uuids::uuid m_id; | ||
std::optional<std::string> m_key; | ||
std::string m_value; | ||
std::vector<std::string> m_locality; | ||
bool m_hard_locality = false; | ||
Comment on lines
+45
to
+46
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Codebase verification Add documentation for locality-related member variables These members are used in MySQL storage implementation for managing data locality:
Please add documentation above these member variables to explain:
🔗 Analysis chainAdd documentation for new member variables The new member variables need documentation explaining their role in MySQL storage implementation. Let's verify how these members are used: Let me gather more context about how these members are used. 🏁 Scripts executedThe following scripts were executed for the analysis: Script: #!/bin/bash
# Search for usage of locality in the codebase
rg -l "m_locality" --type cpp
rg -l "set_locality" --type cpp
Length of output: 147 Script: #!/bin/bash
# Check the implementation details
rg "m_locality" -A 5 -B 5 --type cpp
rg "set_locality" -A 5 -B 5 --type cpp
Length of output: 2797 |
||
|
||
void init_id() { | ||
boost::uuids::random_generator gen; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,10 @@ class TaskInput { | |
|
||
[[nodiscard]] auto get_type() const -> std::string { return m_type; } | ||
|
||
void set_value(std::string const& value) { m_value = value; } | ||
|
||
void set_data_id(boost::uuids::uuid data_id) { m_data_id = data_id; } | ||
|
||
private: | ||
std::optional<std::tuple<boost::uuids::uuid, std::uint8_t>> m_task_output; | ||
std::optional<std::string> m_value; | ||
|
@@ -46,6 +50,8 @@ class TaskInput { | |
|
||
class TaskOutput { | ||
public: | ||
explicit TaskOutput(std::string type) : m_type(std::move(type)) {} | ||
|
||
TaskOutput(std::string value, std::string type) | ||
: m_value(std::move(value)), | ||
m_type(std::move(type)) {} | ||
|
@@ -62,13 +68,27 @@ class TaskOutput { | |
|
||
[[nodiscard]] auto get_type() const -> std::string { return m_type; } | ||
|
||
void set_value(std::string const& value) { m_value = value; } | ||
|
||
void set_data_id(boost::uuids::uuid data_id) { m_data_id = data_id; } | ||
|
||
private: | ||
std::optional<std::string> m_value; | ||
std::optional<boost::uuids::uuid> m_data_id; | ||
std::string m_type; | ||
}; | ||
|
||
class TaskInstance {}; | ||
struct TaskInstance { | ||
boost::uuids::uuid id; | ||
boost::uuids::uuid task_id; | ||
|
||
explicit TaskInstance(boost::uuids::uuid task_id) : task_id(task_id) { | ||
boost::uuids::random_generator gen; | ||
id = gen(); | ||
} | ||
|
||
TaskInstance(boost::uuids::uuid id, boost::uuids::uuid task_id) : id(id), task_id(task_id) {} | ||
}; | ||
Comment on lines
+81
to
+91
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reconsider struct usage and add error handling The conversion to struct and current implementation raises several concerns:
Consider this alternative: -struct TaskInstance {
+class TaskInstance {
+public:
boost::uuids::uuid id;
boost::uuids::uuid task_id;
explicit TaskInstance(boost::uuids::uuid task_id) : task_id(task_id) {
+ try {
boost::uuids::random_generator gen;
id = gen();
+ } catch (const std::exception& e) {
+ throw std::runtime_error("Failed to generate UUID: " + std::string(e.what()));
+ }
}
|
||
|
||
enum class TaskState : std::uint8_t { | ||
Pending, | ||
|
@@ -94,6 +114,19 @@ class Task { | |
m_id = gen(); | ||
} | ||
|
||
Task(boost::uuids::uuid id, | ||
std::string function_name, | ||
TaskState state, | ||
TaskCreatorType creator_type, | ||
boost::uuids::uuid creator_id, | ||
float timeout) | ||
: m_id(id), | ||
m_function_name(std::move(function_name)), | ||
m_state(state), | ||
m_creator_type(creator_type), | ||
m_creator_id(creator_id), | ||
m_timeout(timeout) {} | ||
|
||
Comment on lines
+117
to
+129
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Add parameter validation and documentation The new constructor needs:
Example implementation: + /**
+ * @brief Constructs a Task with specified parameters
+ * @param id Unique identifier for the task
+ * @param function_name Name of the function to execute
+ * @param state Initial state of the task
+ * @param creator_type Type of the creator
+ * @param creator_id ID of the creator
+ * @param timeout Timeout in seconds (must be positive)
+ * @throws std::invalid_argument if timeout is negative
+ */
Task(boost::uuids::uuid id,
std::string function_name,
TaskState state,
TaskCreatorType creator_type,
boost::uuids::uuid creator_id,
float timeout)
+ {
+ if (timeout < 0) {
+ throw std::invalid_argument("Timeout must be non-negative");
+ }
: m_id(id),
m_function_name(std::move(function_name)),
m_state(state),
m_creator_type(creator_type),
m_creator_id(creator_id),
m_timeout(timeout) {}
|
||
void add_input(TaskInput const& input) { m_inputs.emplace_back(input); } | ||
|
||
void add_output(TaskOutput const& output) { m_outputs.emplace_back(output); } | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,9 @@ | |
#define SPIDER_CORE_TASKGRAPH_HPP | ||
|
||
#include <absl/container/flat_hash_map.h> | ||
#include <absl/container/flat_hash_set.h> | ||
|
||
#include <boost/uuid/random_generator.hpp> | ||
#include <boost/uuid/uuid.hpp> | ||
#include <optional> | ||
#include <utility> | ||
|
@@ -13,14 +15,21 @@ | |
namespace spider::core { | ||
class TaskGraph { | ||
public: | ||
TaskGraph() { | ||
boost::uuids::random_generator gen; | ||
m_id = gen(); | ||
} | ||
|
||
explicit TaskGraph(boost::uuids::uuid id) : m_id(id) {} | ||
|
||
auto add_child_task(Task const& task, std::vector<boost::uuids::uuid> const& parents) -> bool { | ||
boost::uuids::uuid const task_id = task.get_id(); | ||
boost::uuids::uuid task_id = task.get_id(); | ||
for (boost::uuids::uuid const parent_id : parents) { | ||
if (!m_tasks.contains(parent_id)) { | ||
return false; | ||
} | ||
} | ||
if (m_tasks.contains(task.get_id())) { | ||
if (m_tasks.contains(task_id)) { | ||
return false; | ||
} | ||
|
||
|
@@ -31,6 +40,22 @@ class TaskGraph { | |
return true; | ||
} | ||
|
||
// User is responsible to add the dependencies | ||
auto add_task(Task const& task) -> bool { | ||
boost::uuids::uuid const task_id = task.get_id(); | ||
if (m_tasks.contains(task_id)) { | ||
return false; | ||
} | ||
m_tasks.emplace(task_id, task); | ||
return true; | ||
} | ||
|
||
void add_dependencies(boost::uuids::uuid parent, boost::uuids::uuid child) { | ||
m_dependencies.emplace_back(parent, child); | ||
} | ||
Comment on lines
+53
to
+55
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add validation for task existence. The method currently allows adding dependencies for non-existent tasks, which could lead to integrity issues. void add_dependencies(boost::uuids::uuid parent, boost::uuids::uuid child) {
+ if (!m_tasks.contains(parent) || !m_tasks.contains(child)) {
+ throw std::invalid_argument("Both parent and child tasks must exist in the graph");
+ }
m_dependencies.emplace_back(parent, child);
}
|
||
|
||
[[nodiscard]] auto get_id() const -> boost::uuids::uuid { return m_id; } | ||
|
||
[[nodiscard]] auto get_task(boost::uuids::uuid id) const -> std::optional<Task> { | ||
if (m_tasks.contains(id)) { | ||
return m_tasks.at(id); | ||
|
@@ -64,12 +89,24 @@ class TaskGraph { | |
return m_tasks; | ||
} | ||
|
||
[[nodiscard]] auto get_head_tasks() const -> absl::flat_hash_set<boost::uuids::uuid> { | ||
absl::flat_hash_set<boost::uuids::uuid> heads; | ||
for (auto const& pair : m_tasks) { | ||
heads.emplace(pair.first); | ||
} | ||
for (auto const& pair : m_dependencies) { | ||
heads.erase(pair.second); | ||
} | ||
return heads; | ||
} | ||
|
||
[[nodiscard]] auto get_dependencies( | ||
) const -> std::vector<std::pair<boost::uuids::uuid, boost::uuids::uuid>> const& { | ||
return m_dependencies; | ||
} | ||
|
||
private: | ||
boost::uuids::uuid m_id; | ||
absl::flat_hash_map<boost::uuids::uuid, Task> m_tasks; | ||
std::vector<std::pair<boost::uuids::uuid, boost::uuids::uuid>> m_dependencies; | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
💡 Codebase verification
Dropping macOS CI checks requires attention to development dependencies
The codebase shows active macOS support requirements:
dep-tasks.yaml
tools/scripts/macOS
Removing macOS from the CI matrix while maintaining macOS-specific development dependencies could lead to undetected issues for macOS developers. Consider either:
🔗 Analysis chain
Verify if dropping macOS CI checks aligns with development needs.
The simplification to use only Ubuntu for CI is reasonable for reducing resource usage. However, please confirm that this change won't impact developers working on macOS systems.
Let's verify if there are any macOS-specific development requirements:
🏁 Scripts executed
The following scripts were executed for the analysis:
Script:
Length of output: 373
Script:
Length of output: 436
Script:
Length of output: 84
Script:
Length of output: 161