Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: Add unit tests for metadata storage and fix bugs in MySQL storage backend #23

Merged
merged 9 commits into from
Nov 10, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 6 additions & 23 deletions src/spider/core/Task.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,32 +99,17 @@ enum class TaskState : std::uint8_t {
Canceled,
};

enum class TaskCreatorType : std::uint8_t {
Client = 0,
Task,
};

class Task {
public:
Task(std::string function_name, TaskCreatorType creator_type, boost::uuids::uuid creator_id)
: m_function_name(std::move(function_name)),
m_creator_type(creator_type),
m_creator_id(creator_id) {
explicit Task(std::string function_name) : m_function_name(std::move(function_name)) {
boost::uuids::random_generator gen;
m_id = gen();
}

Task(boost::uuids::uuid id,
std::string function_name,
TaskState state,
TaskCreatorType creator_type,
boost::uuids::uuid creator_id,
float timeout)
Task(boost::uuids::uuid id, std::string function_name, TaskState state, float timeout)
: m_id(id),
m_function_name(std::move(function_name)),
m_state(state),
m_creator_type(creator_type),
m_creator_id(creator_id),
m_timeout(timeout) {}

void add_input(TaskInput const& input) { m_inputs.emplace_back(input); }
Expand All @@ -137,10 +122,6 @@ class Task {

[[nodiscard]] auto get_state() const -> TaskState { return m_state; }

[[nodiscard]] auto get_creator_type() const -> TaskCreatorType { return m_creator_type; }

[[nodiscard]] auto get_creator_id() const -> boost::uuids::uuid { return m_creator_id; }

[[nodiscard]] auto get_timeout() const -> float { return m_timeout; }

[[nodiscard]] auto get_num_inputs() const -> size_t { return m_inputs.size(); }
Expand All @@ -151,12 +132,14 @@ class Task {

[[nodiscard]] auto get_output(uint64_t index) const -> TaskOutput { return m_outputs[index]; }

[[nodiscard]] auto get_inputs() const -> std::vector<TaskInput> const& { return m_inputs; }

[[nodiscard]] auto get_outputs() const -> std::vector<TaskOutput> const& { return m_outputs; }

private:
boost::uuids::uuid m_id;
std::string m_function_name;
TaskState m_state = TaskState::Pending;
TaskCreatorType m_creator_type;
boost::uuids::uuid m_creator_id;
float m_timeout = 0;
std::vector<TaskInput> m_inputs;
std::vector<TaskOutput> m_outputs;
Expand Down
13 changes: 10 additions & 3 deletions src/spider/core/TaskGraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,20 @@ class TaskGraph {
return true;
}

void add_dependencies(boost::uuids::uuid parent, boost::uuids::uuid child) {
void add_dependency(boost::uuids::uuid parent, boost::uuids::uuid child) {
m_dependencies.emplace_back(parent, child);
}
Comment on lines +47 to 49
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Consider adding task existence validation

The method should verify that both parent and child tasks exist in m_tasks before adding the dependency to prevent orphaned dependencies.

 void add_dependency(boost::uuids::uuid parent, boost::uuids::uuid child) {
+    if (!m_tasks.contains(parent) || !m_tasks.contains(child)) {
+        throw std::invalid_argument("Both parent and child tasks must exist");
+    }
     m_dependencies.emplace_back(parent, child);
 }

Committable suggestion skipped: line range outside the PR's diff.


[[nodiscard]] auto get_task(boost::uuids::uuid id) const -> std::optional<Task> {
[[nodiscard]] auto get_task(boost::uuids::uuid id) const -> std::optional<Task const*> {
if (m_tasks.contains(id)) {
return m_tasks.at(id);
return &m_tasks.at(id);
}
return std::nullopt;
}

[[nodiscard]] auto get_task(boost::uuids::uuid id) -> std::optional<Task*> {
if (m_tasks.contains(id)) {
return &m_tasks.at(id);
}
return std::nullopt;
}
Expand Down
8 changes: 6 additions & 2 deletions src/spider/storage/MetadataStorage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class MetadataStorage {
virtual auto add_driver(boost::uuids::uuid id, std::string const& addr) -> StorageErr = 0;
virtual auto add_driver(boost::uuids::uuid id, std::string const& addr, int port) -> StorageErr
= 0;
virtual auto get_driver(boost::uuids::uuid id, std::string* addr) -> StorageErr = 0;

virtual auto
add_job(boost::uuids::uuid job_id, boost::uuids::uuid client_id, TaskGraph const& task_graph
Expand All @@ -43,14 +44,17 @@ class MetadataStorage {
virtual auto add_task_instance(TaskInstance const& instance) -> StorageErr = 0;
virtual auto task_finish(TaskInstance const& instance) -> StorageErr = 0;
virtual auto get_task_timeout(std::vector<TaskInstance>* tasks) -> StorageErr = 0;
virtual auto get_child_task(boost::uuids::uuid id, Task* child) -> StorageErr = 0;
virtual auto get_child_tasks(boost::uuids::uuid id, std::vector<Task>* children) -> StorageErr
= 0;
virtual auto get_parent_tasks(boost::uuids::uuid id, std::vector<Task>* tasks) -> StorageErr
= 0;

virtual auto update_heartbeat(boost::uuids::uuid id) -> StorageErr = 0;
virtual auto
heartbeat_timeout(float timeout, std::vector<boost::uuids::uuid>* ids) -> StorageErr = 0;
heartbeat_timeout(double timeout, std::vector<boost::uuids::uuid>* ids) -> StorageErr = 0;
virtual auto get_scheduler_state(boost::uuids::uuid id, std::string* state) -> StorageErr = 0;
virtual auto
get_scheduler_addr(boost::uuids::uuid id, std::string* addr, int* port) -> StorageErr = 0;
virtual auto set_scheduler_state(boost::uuids::uuid id, std::string const& state) -> StorageErr
= 0;
};
Expand Down
Loading
Loading