Skip to content

Commit

Permalink
feat: Add MySql support for storage backend (#20)
Browse files Browse the repository at this point in the history
Add MySql implementation for data and metadata storage. This change has not been unit tested yet.
  • Loading branch information
sitaowang1998 authored Nov 5, 2024
1 parent 3c89bb5 commit 41df760
Show file tree
Hide file tree
Showing 12 changed files with 1,153 additions and 198 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/code-linting-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
lint:
strategy:
matrix:
os: ["macos-latest", "ubuntu-latest"]
os: ["ubuntu-latest"]
runs-on: "${{matrix.os}}"
steps:
- uses: "actions/checkout@v4"
Expand Down
123 changes: 0 additions & 123 deletions cmake/Modules/FindMariaDBClient.cmake

This file was deleted.

2 changes: 2 additions & 0 deletions cmake/Modules/FindMariaDBClientCpp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ if(NOT TARGET MariaDBClientCpp::MariaDBClientCpp)
PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES
"${MariaDBClientCpp_INCLUDE_DIR}"
"${MariaDBClientCpp_INCLUDE_DIR}/conncpp"
"${MariaDBClientCpp_INCLUDE_DIR}/conncpp/compat"
)
endif()

Expand Down
8 changes: 5 additions & 3 deletions src/spider/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ endif()
target_sources(spider_core PRIVATE ${SPIDER_CORE_SOURCES})
target_link_libraries(
spider_core
Boost::boost
absl::flat_hash_map
MariaDBClientCpp::MariaDBClientCpp
PUBLIC
Boost::boost
absl::flat_hash_map
MariaDBClientCpp::MariaDBClientCpp
)
target_link_libraries(spider_core PRIVATE fmt::fmt)

set(SPIDER_WORKER_SOURCES worker/worker.cpp CACHE INTERNAL "spider worker source files")

Expand Down
18 changes: 18 additions & 0 deletions src/spider/core/Data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <optional>
#include <string>
#include <utility>
#include <vector>

namespace spider::core {
class Data {
Expand All @@ -16,16 +17,33 @@ class Data {
init_id();
}

Data(boost::uuids::uuid id, std::string key, std::string value)
: m_id(id),
m_key(std::move(key)),
m_value(std::move(value)) {}

[[nodiscard]] auto get_id() const -> boost::uuids::uuid { return m_id; }

[[nodiscard]] auto get_key() const -> std::optional<std::string> { return m_key; }

[[nodiscard]] auto get_value() const -> std::string { return m_value; }

[[nodiscard]] auto get_locality() const -> std::vector<std::string> const& {
return m_locality;
}

[[nodiscard]] auto is_hard_locality() const -> bool { return m_hard_locality; }

void set_locality(std::vector<std::string> const& locality) { m_locality = locality; }

void set_hard_locality(bool hard) { m_hard_locality = hard; }

private:
boost::uuids::uuid m_id;
std::optional<std::string> m_key;
std::string m_value;
std::vector<std::string> m_locality;
bool m_hard_locality = false;

void init_id() {
boost::uuids::random_generator gen;
Expand Down
5 changes: 4 additions & 1 deletion src/spider/core/Error.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ enum class StorageErrType : std::uint8_t {
DbNotFound,
KeyNotFoundErr,
DuplicateKeyErr,
ConstraintViolationErr
ConstraintViolationErr,
OtherErr
};

struct StorageErr {
Expand All @@ -24,6 +25,8 @@ struct StorageErr {
StorageErr(StorageErrType type, std::string description)
: type(type),
description(std::move(description)) {}

explicit operator bool() const { return StorageErrType::Success != type; }
};

} // namespace spider::core
Expand Down
35 changes: 34 additions & 1 deletion src/spider/core/Task.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ class TaskInput {

[[nodiscard]] auto get_type() const -> std::string { return m_type; }

void set_value(std::string const& value) { m_value = value; }

void set_data_id(boost::uuids::uuid data_id) { m_data_id = data_id; }

private:
std::optional<std::tuple<boost::uuids::uuid, std::uint8_t>> m_task_output;
std::optional<std::string> m_value;
Expand All @@ -46,6 +50,8 @@ class TaskInput {

class TaskOutput {
public:
explicit TaskOutput(std::string type) : m_type(std::move(type)) {}

TaskOutput(std::string value, std::string type)
: m_value(std::move(value)),
m_type(std::move(type)) {}
Expand All @@ -62,13 +68,27 @@ class TaskOutput {

[[nodiscard]] auto get_type() const -> std::string { return m_type; }

void set_value(std::string const& value) { m_value = value; }

void set_data_id(boost::uuids::uuid data_id) { m_data_id = data_id; }

private:
std::optional<std::string> m_value;
std::optional<boost::uuids::uuid> m_data_id;
std::string m_type;
};

class TaskInstance {};
struct TaskInstance {
boost::uuids::uuid id;
boost::uuids::uuid task_id;

explicit TaskInstance(boost::uuids::uuid task_id) : task_id(task_id) {
boost::uuids::random_generator gen;
id = gen();
}

TaskInstance(boost::uuids::uuid id, boost::uuids::uuid task_id) : id(id), task_id(task_id) {}
};

enum class TaskState : std::uint8_t {
Pending,
Expand All @@ -94,6 +114,19 @@ class Task {
m_id = gen();
}

Task(boost::uuids::uuid id,
std::string function_name,
TaskState state,
TaskCreatorType creator_type,
boost::uuids::uuid creator_id,
float timeout)
: m_id(id),
m_function_name(std::move(function_name)),
m_state(state),
m_creator_type(creator_type),
m_creator_id(creator_id),
m_timeout(timeout) {}

void add_input(TaskInput const& input) { m_inputs.emplace_back(input); }

void add_output(TaskOutput const& output) { m_outputs.emplace_back(output); }
Expand Down
41 changes: 39 additions & 2 deletions src/spider/core/TaskGraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
#define SPIDER_CORE_TASKGRAPH_HPP

#include <absl/container/flat_hash_map.h>
#include <absl/container/flat_hash_set.h>

#include <boost/uuid/random_generator.hpp>
#include <boost/uuid/uuid.hpp>
#include <optional>
#include <utility>
Expand All @@ -13,14 +15,21 @@
namespace spider::core {
class TaskGraph {
public:
TaskGraph() {
boost::uuids::random_generator gen;
m_id = gen();
}

explicit TaskGraph(boost::uuids::uuid id) : m_id(id) {}

auto add_child_task(Task const& task, std::vector<boost::uuids::uuid> const& parents) -> bool {
boost::uuids::uuid const task_id = task.get_id();
boost::uuids::uuid task_id = task.get_id();
for (boost::uuids::uuid const parent_id : parents) {
if (!m_tasks.contains(parent_id)) {
return false;
}
}
if (m_tasks.contains(task.get_id())) {
if (m_tasks.contains(task_id)) {
return false;
}

Expand All @@ -31,6 +40,22 @@ class TaskGraph {
return true;
}

// User is responsible to add the dependencies
auto add_task(Task const& task) -> bool {
boost::uuids::uuid const task_id = task.get_id();
if (m_tasks.contains(task_id)) {
return false;
}
m_tasks.emplace(task_id, task);
return true;
}

void add_dependencies(boost::uuids::uuid parent, boost::uuids::uuid child) {
m_dependencies.emplace_back(parent, child);
}

[[nodiscard]] auto get_id() const -> boost::uuids::uuid { return m_id; }

[[nodiscard]] auto get_task(boost::uuids::uuid id) const -> std::optional<Task> {
if (m_tasks.contains(id)) {
return m_tasks.at(id);
Expand Down Expand Up @@ -64,12 +89,24 @@ class TaskGraph {
return m_tasks;
}

[[nodiscard]] auto get_head_tasks() const -> absl::flat_hash_set<boost::uuids::uuid> {
absl::flat_hash_set<boost::uuids::uuid> heads;
for (auto const& pair : m_tasks) {
heads.emplace(pair.first);
}
for (auto const& pair : m_dependencies) {
heads.erase(pair.second);
}
return heads;
}

[[nodiscard]] auto get_dependencies(
) const -> std::vector<std::pair<boost::uuids::uuid, boost::uuids::uuid>> const& {
return m_dependencies;
}

private:
boost::uuids::uuid m_id;
absl::flat_hash_map<boost::uuids::uuid, Task> m_tasks;
std::vector<std::pair<boost::uuids::uuid, boost::uuids::uuid>> m_dependencies;
};
Expand Down
4 changes: 2 additions & 2 deletions src/spider/storage/DataStorage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ class DataStorage {
auto operator=(DataStorage&&) -> DataStorage& = default;
virtual ~DataStorage() = default;

virtual auto connect(std::string url, boost::uuids::uuid id) -> StorageErr = 0;
virtual auto connect(std::string const& url) -> StorageErr = 0;
virtual void close() = 0;
virtual auto initialize() -> StorageErr = 0;

virtual auto add_data(Data const& data) -> StorageErr = 0;
virtual auto get_data(boost::uuids::uuid id, Data& data) -> StorageErr = 0;
virtual auto get_data(boost::uuids::uuid id, Data* data) -> StorageErr = 0;
virtual auto add_task_reference(boost::uuids::uuid id, boost::uuids::uuid task_id) -> StorageErr
= 0;
virtual auto
Expand Down
Loading

0 comments on commit 41df760

Please sign in to comment.