From 7fa2c3e1d076834c3c334ba4332af4814508b2f3 Mon Sep 17 00:00:00 2001 From: longfar Date: Tue, 27 Feb 2024 23:11:55 +0800 Subject: [PATCH 001/116] feat: add configs for rocksdb-cloud in pika and floyd --- conf/pika.conf | 28 +++++++++++++++++++++++++++ include/pika_conf.h | 28 +++++++++++++++++++++++++++ src/pika_conf.cc | 13 +++++++++++++ src/pika_server.cc | 15 ++++++++++++++ src/storage/include/storage/storage.h | 6 ++++++ tests/conf/pika.conf | 28 +++++++++++++++++++++++++++ 6 files changed, 118 insertions(+) diff --git a/conf/pika.conf b/conf/pika.conf index 2f6990b959..37f652300e 100644 --- a/conf/pika.conf +++ b/conf/pika.conf @@ -512,3 +512,31 @@ cache-lfu-decay-time: 1 # # aclfile : ../conf/users.acl +###################################################################### +# rocksdb-cloud options +####################################################################### + +# Normally, the AWS SDK will automatically determine the endpoint based on the selected region. +# However, in special cases, you can manually specify the URL of the endpoint through this configuration, +# such as local development. +# Default: "" +# cloud-endpoint-override : + +# The aws access key id and aws secret key used for authentication when accessing aws s3. +cloud-access-key : +cloud-secret-key : + +# The source bucket name prefix and suffix to use for storage on s3 +# The final bucket name is [prefix][suffix] +# Default: "pika." +# cloud-src-bucket-prefix : +# Default: "database" +# cloud-src-bucket-suffix : + +# The source bucket region +# cloud-src-bucket-region : + +# Configuration information of the destination bucket +# cloud-dest-bucket-prefix : +# cloud-dest-bucket-suffix : +# cloud-dest-bucket-region : \ No newline at end of file diff --git a/include/pika_conf.h b/include/pika_conf.h index 6b0917ee76..7fd063ae43 100644 --- a/include/pika_conf.h +++ b/include/pika_conf.h @@ -396,6 +396,19 @@ class PikaConf : public pstd::BaseConf { uint32_t acl_pubsub_default() { return acl_pubsub_default_.load(); } uint32_t acl_log_max_len() { return acl_Log_max_len_.load(); } +#ifdef USE_S3 + // rocksdb-cloud options + std::string cloud_endpoint_override() { return cloud_endpoint_override_; } + std::string cloud_access_key() { return cloud_access_key_; } + std::string cloud_secret_key() { return cloud_secret_key_; } + std::string cloud_src_bucket_prefix() { return cloud_src_bucket_prefix_; } + std::string cloud_src_bucket_suffix() { return cloud_src_bucket_suffix_; } + std::string cloud_src_bucket_region() { return cloud_src_bucket_region_; } + std::string cloud_dest_bucket_prefix() { return cloud_dest_bucket_prefix_; } + std::string cloud_dest_bucket_suffix() { return cloud_dest_bucket_suffix_; } + std::string cloud_dest_bucket_region() { return cloud_dest_bucket_region_; } +#endif + // Setter void SetPort(const int value) { std::lock_guard l(rwlock_); @@ -809,6 +822,21 @@ class PikaConf : public pstd::BaseConf { int64_t blob_file_size_ = 256 * 1024 * 1024; // 256M std::string blob_compression_type_ = "none"; +#ifdef USE_S3 + // rocksdb-cloud options + std::string cloud_endpoint_override_; + std::string cloud_access_key_; + std::string cloud_secret_key_; + // rocksdb-cloud src bucket + std::string cloud_src_bucket_prefix_ = "pika."; + std::string cloud_src_bucket_suffix_ = "database"; + std::string cloud_src_bucket_region_; + // rocksdb-cloud dest bucket + std::string cloud_dest_bucket_prefix_ = "pika."; + std::string cloud_dest_bucket_suffix_ = "database"; + std::string cloud_dest_bucket_region_; +#endif + std::shared_mutex rwlock_; // Rsync Rate limiting configuration diff --git a/src/pika_conf.cc b/src/pika_conf.cc index 8086bb2285..0c708f8b6b 100644 --- a/src/pika_conf.cc +++ b/src/pika_conf.cc @@ -578,6 +578,19 @@ int PikaConf::Load() { max_rsync_parallel_num_ = 4; } +#ifdef USE_S3 + // rocksdb-cloud options + GetConfStr("cloud-endpoint-override", &cloud_endpoint_override_); + GetConfStr("cloud-access-key", &cloud_access_key_); + GetConfStr("cloud-secret-key", &cloud_secret_key_); + GetConfStr("cloud-src-bucket-prefix", &cloud_src_bucket_prefix_); + GetConfStr("cloud-src-bucket-suffix", &cloud_src_bucket_suffix_); + GetConfStr("cloud-src-bucket-region", &cloud_src_bucket_region_); + GetConfStr("cloud-dest-bucket-prefix", &cloud_dest_bucket_prefix_); + GetConfStr("cloud-dest-bucket-suffix", &cloud_dest_bucket_suffix_); + GetConfStr("cloud-dest-bucket-region", &cloud_dest_bucket_region_); +#endif + return ret; } diff --git a/src/pika_server.cc b/src/pika_server.cc index 57224c5c09..dcc2b890f9 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1377,6 +1377,21 @@ void PikaServer::InitStorageOptions() { // for column-family options storage_options_.options.ttl = g_pika_conf->rocksdb_ttl_second(); storage_options_.options.periodic_compaction_seconds = g_pika_conf->rocksdb_periodic_compaction_second(); + +#ifdef USE_S3 + // rocksdb-cloud + auto& cloud_fs_opts = storage_options_.cloud_fs_options; + storage_options_.options.max_log_file_size = 0; // TODO: better handles of `assert(cloud_manifest)` + cloud_fs_opts.endpoint_override = g_pika_conf->cloud_endpoint_override(); + cloud_fs_opts.credentials.InitializeSimple(g_pika_conf->cloud_access_key(), g_pika_conf->cloud_secret_key()); + if (!cloud_fs_opts.credentials.HasValid().ok()) { + LOG(FATAL) << "Please set the right aws access key and secret key to access s3"; + } + cloud_fs_opts.src_bucket.SetBucketName(g_pika_conf->cloud_src_bucket_suffix(), g_pika_conf->cloud_src_bucket_prefix()); + cloud_fs_opts.src_bucket.SetRegion(g_pika_conf->cloud_src_bucket_region()); + cloud_fs_opts.dest_bucket.SetBucketName(g_pika_conf->cloud_dest_bucket_suffix(), g_pika_conf->cloud_dest_bucket_prefix()); + cloud_fs_opts.dest_bucket.SetRegion(g_pika_conf->cloud_dest_bucket_region()); +#endif } storage::Status PikaServer::RewriteStorageOptions(const storage::OptionType& option_type, diff --git a/src/storage/include/storage/storage.h b/src/storage/include/storage/storage.h index 296882f510..5f3aeb3ed2 100644 --- a/src/storage/include/storage/storage.h +++ b/src/storage/include/storage/storage.h @@ -21,6 +21,9 @@ #include "rocksdb/slice.h" #include "rocksdb/status.h" #include "rocksdb/table.h" +#ifdef USE_S3 +#include "rocksdb/cloud/cloud_file_system.h" +#endif #include "slot_indexer.h" #include "pstd/include/pstd_mutex.h" @@ -70,6 +73,9 @@ struct StorageOptions { size_t statistics_max_size = 0; size_t small_compaction_threshold = 5000; size_t small_compaction_duration_threshold = 10000; +#ifdef USE_S3 + rocksdb::CloudFileSystemOptions cloud_fs_options; // rocksdb-cloud option +#endif Status ResetOptions(const OptionType& option_type, const std::unordered_map& options_map); }; diff --git a/tests/conf/pika.conf b/tests/conf/pika.conf index 5f9167d96a..976f0ad2b6 100644 --- a/tests/conf/pika.conf +++ b/tests/conf/pika.conf @@ -499,3 +499,31 @@ cache-lfu-decay-time: 1 # # aclfile : ../conf/users.acl +###################################################################### +# rocksdb-cloud options +####################################################################### + +# Normally, the AWS SDK will automatically determine the endpoint based on the selected region. +# However, in special cases, you can manually specify the URL of the endpoint through this configuration, +# such as local development. +# Default: "" +cloud-endpoint-override : http://127.0.0.1:9000 + +# The aws access key id and aws secret key used for authentication when accessing aws s3. +cloud-access-key : minioadmin +cloud-secret-key : minioadmin + +# The source bucket name prefix and suffix to use for storage on s3 +# The final bucket name is [prefix][suffix] +# Default: "pika." +# cloud-src-bucket-prefix : +# Default: "database" +cloud-src-bucket-suffix : integration.test + +# The source bucket region +# cloud-src-bucket-region : + +# Configuration information of the destination bucket +# cloud-dest-bucket-prefix : +cloud-dest-bucket-suffix : integration.test +# cloud-dest-bucket-region : From f02470e686e8d80b3a1aa819a86d2851a5b057c3 Mon Sep 17 00:00:00 2001 From: longfar Date: Wed, 28 Feb 2024 09:30:51 +0800 Subject: [PATCH 002/116] feat: add aws-sdk in cmake to make compile successfull --- CMakeLists.txt | 68 ++++++++++++++++++++++++++++++++ src/storage/tests/CMakeLists.txt | 8 ++++ 2 files changed, 76 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 485ff0fe25..aec8e4acff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -576,6 +576,60 @@ set(PROTOBUF_INCLUDE_DIR ${INSTALL_INCLUDEDIR}) set(PROTOBUF_LIBRARY ${INSTALL_LIBDIR}/${LIB_PROTOBUF}) set(PROTOBUF_PROTOC ${STAGED_INSTALL_PREFIX}/bin/protoc) +if (USE_S3) +ExternalProject_Add(rocksdb + DEPENDS + gflags + gtest + snappy + zstd + lz4 + zlib + ${LIBGPERF_NAME} + ${LIBJEMALLOC_NAME} + URL + https://github.com/longfar-ncy/rocksdb-cloud/archive/refs/heads/pika.zip + URL_HASH + MD5=761d1f7ccd6ea9aa86c1f2ce0e246a26 + DOWNLOAD_NO_PROGRESS + 1 + UPDATE_COMMAND + "" + LOG_CONFIGURE + 1 + LOG_BUILD + 1 + LOG_INSTALL + 1 + BUILD_ALWAYS + 1 + CMAKE_ARGS + -DCMAKE_INSTALL_PREFIX=${STAGED_INSTALL_PREFIX} + -DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH} + -DCMAKE_BUILD_TYPE=${LIB_BUILD_TYPE} + -DUSE_RTTI=1 + -DWITH_BENCHMARK=OFF + -DWITH_BENCHMARK_TOOLS=OFF + -DWITH_TOOLS=OFF + -DWITH_CORE_TOOLS=OFF + -DWITH_TESTS=OFF + -DWITH_TRACE_TOOLS=OFF + -DWITH_EXAMPLES=OFF + -DROCKSDB_BUILD_SHARED=OFF + -DWITH_JEMALLOC=${JEMALLOC_ON} + -DWITH_LZ4=ON + -DWITH_SNAPPY=ON + -DWITH_ZLIB=ON + -DWITH_ZSTD=ON + -DWITH_GFLAGS=ON + -DFAIL_ON_WARNINGS=OFF + -DWITH_LIBURING=OFF + -DPORTABLE=1 + -DWITH_AWS=ON + BUILD_COMMAND + make -j${CPU_CORE} +) +else() ExternalProject_Add(rocksdb DEPENDS gflags @@ -627,6 +681,7 @@ ExternalProject_Add(rocksdb BUILD_COMMAND make -j${CPU_CORE} ) +endif() ExternalProject_Add(rediscache URL @@ -725,6 +780,10 @@ endif() set(ROCKSDB_INCLUDE_DIR ${INSTALL_INCLUDEDIR}) set(ROCKSDB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/${EP_BASE_SUFFIX}/Source/rocksdb) +if(USE_S3) + add_compile_definitions(USE_S3) +endif() + add_subdirectory(src/pstd) add_subdirectory(src/net) add_subdirectory(src/storage) @@ -829,6 +888,15 @@ target_link_libraries(${PROJECT_NAME} ${LIBUNWIND_LIBRARY} ${JEMALLOC_LIBRARY}) +if (USE_S3) + target_link_libraries(${PROJECT_NAME} + libaws-cpp-sdk-core.so + libaws-cpp-sdk-transfer.so + libaws-cpp-sdk-kinesis.so + libaws-cpp-sdk-s3.so + ) +endif() + option(USE_SSL "Enable SSL support" OFF) add_custom_target( clang-tidy diff --git a/src/storage/tests/CMakeLists.txt b/src/storage/tests/CMakeLists.txt index 09dc7f32cc..3f7ede4439 100644 --- a/src/storage/tests/CMakeLists.txt +++ b/src/storage/tests/CMakeLists.txt @@ -33,6 +33,14 @@ foreach(blackwindow_test_source ${BLACKWINDOW_TEST_SOURCE}) PUBLIC ${GFLAGS_LIBRARY} PUBLIC ${LIBUNWIND_LIBRARY} ) + if (USE_S3) + target_link_libraries(${blackwindow_test_name} + PUBLIC libaws-cpp-sdk-core.so + PUBLIC libaws-cpp-sdk-transfer.so + PUBLIC libaws-cpp-sdk-kinesis.so + PUBLIC libaws-cpp-sdk-s3.so + ) + endif() add_test(NAME ${blackwindow_test_name} COMMAND ${blackwindow_test_name} WORKING_DIRECTORY .) From fd3e0a52cea6a0b7a6ee04d44de10a0a7a90d423 Mon Sep 17 00:00:00 2001 From: longfar Date: Wed, 28 Feb 2024 09:56:55 +0800 Subject: [PATCH 003/116] feat: replace rocksdb::DB with rocksdb::DBCloud in storage --- src/storage/include/storage/storage.h | 5 ++++ src/storage/src/base_filter.h | 21 +++++++++++++++++ src/storage/src/lists_filter.h | 20 +++++++++++++--- src/storage/src/redis.cc | 34 ++++++++++++++++++++++++++- src/storage/src/redis.h | 18 ++++++++++++++ src/storage/src/storage.cc | 4 ++++ src/storage/src/zsets_filter.h | 8 +++++++ 7 files changed, 106 insertions(+), 4 deletions(-) diff --git a/src/storage/include/storage/storage.h b/src/storage/include/storage/storage.h index 5f3aeb3ed2..c4cb36aa54 100644 --- a/src/storage/include/storage/storage.h +++ b/src/storage/include/storage/storage.h @@ -22,6 +22,7 @@ #include "rocksdb/status.h" #include "rocksdb/table.h" #ifdef USE_S3 +#include "rocksdb/cloud/db_cloud.h" #include "rocksdb/cloud/cloud_file_system.h" #endif @@ -1101,7 +1102,11 @@ class Storage { Status GetKeyNum(std::vector* key_infos); Status StopScanKeyNum(); +#ifdef USE_S3 + rocksdb::DBCloud* GetDBByIndex(int index); +#else rocksdb::DB* GetDBByIndex(int index); +#endif Status SetOptions(const OptionType& option_type, const std::string& db_type, const std::unordered_map& options); diff --git a/src/storage/src/base_filter.h b/src/storage/src/base_filter.h index 093f3f4761..3df7d7e3d8 100644 --- a/src/storage/src/base_filter.h +++ b/src/storage/src/base_filter.h @@ -15,6 +15,11 @@ #include "src/base_data_key_format.h" #include "src/base_meta_value_format.h" #include "src/debug.h" +#ifdef USE_S3 +#include "rocksdb/cloud/db_cloud.h" +#else +#include "rocksdb/db.h" +#endif namespace storage { @@ -60,7 +65,11 @@ class BaseMetaFilterFactory : public rocksdb::CompactionFilterFactory { class BaseDataFilter : public rocksdb::CompactionFilter { public: +#ifdef USE_S3 + BaseDataFilter(rocksdb::DBCloud* db, std::vector* cf_handles_ptr, int meta_cf_index) +#else BaseDataFilter(rocksdb::DB* db, std::vector* cf_handles_ptr, int meta_cf_index) +#endif : db_(db), cf_handles_ptr_(cf_handles_ptr), meta_cf_index_(meta_cf_index) @@ -146,7 +155,11 @@ class BaseDataFilter : public rocksdb::CompactionFilter { const char* Name() const override { return "BaseDataFilter"; } private: +#ifdef USE_S3 + rocksdb::DBCloud* db_ = nullptr; +#else rocksdb::DB* db_ = nullptr; +#endif std::vector* cf_handles_ptr_ = nullptr; rocksdb::ReadOptions default_read_options_; mutable std::string cur_key_; @@ -158,7 +171,11 @@ class BaseDataFilter : public rocksdb::CompactionFilter { class BaseDataFilterFactory : public rocksdb::CompactionFilterFactory { public: +#ifdef USE_S3 + BaseDataFilterFactory(rocksdb::DBCloud** db_ptr, std::vector* handles_ptr, int meta_cf_index) +#else BaseDataFilterFactory(rocksdb::DB** db_ptr, std::vector* handles_ptr, int meta_cf_index) +#endif : db_ptr_(db_ptr), cf_handles_ptr_(handles_ptr), meta_cf_index_(meta_cf_index) {} std::unique_ptr CreateCompactionFilter( const rocksdb::CompactionFilter::Context& context) override { @@ -167,7 +184,11 @@ class BaseDataFilterFactory : public rocksdb::CompactionFilterFactory { const char* Name() const override { return "BaseDataFilterFactory"; } private: +#ifdef USE_S3 + rocksdb::DBCloud** db_ptr_ = nullptr; +#else rocksdb::DB** db_ptr_ = nullptr; +#endif std::vector* cf_handles_ptr_ = nullptr; int meta_cf_index_ = 0; }; diff --git a/src/storage/src/lists_filter.h b/src/storage/src/lists_filter.h index b31b01c441..1e6ce8a607 100644 --- a/src/storage/src/lists_filter.h +++ b/src/storage/src/lists_filter.h @@ -10,9 +10,7 @@ #include #include -#include "rocksdb/compaction_filter.h" -#include "rocksdb/db.h" -#include "src/debug.h" +#include "base_filter.h" #include "src/lists_data_key_format.h" #include "src/lists_meta_value_format.h" @@ -60,7 +58,11 @@ class ListsMetaFilterFactory : public rocksdb::CompactionFilterFactory { class ListsDataFilter : public rocksdb::CompactionFilter { public: +#ifdef USE_S3 + ListsDataFilter(rocksdb::DBCloud* db, std::vector* cf_handles_ptr, int meta_cf_index) +#else ListsDataFilter(rocksdb::DB* db, std::vector* cf_handles_ptr, int meta_cf_index) +#endif : db_(db), cf_handles_ptr_(cf_handles_ptr), meta_cf_index_(meta_cf_index) @@ -145,7 +147,11 @@ class ListsDataFilter : public rocksdb::CompactionFilter { const char* Name() const override { return "ListsDataFilter"; } private: +#ifdef USE_S3 + rocksdb::DBCloud* db_ = nullptr; +#else rocksdb::DB* db_ = nullptr; +#endif std::vector* cf_handles_ptr_ = nullptr; rocksdb::ReadOptions default_read_options_; mutable std::string cur_key_; @@ -157,7 +163,11 @@ class ListsDataFilter : public rocksdb::CompactionFilter { class ListsDataFilterFactory : public rocksdb::CompactionFilterFactory { public: +#ifdef USE_S3 + ListsDataFilterFactory(rocksdb::DBCloud** db_ptr, std::vector* handles_ptr, int meta_cf_index) +#else ListsDataFilterFactory(rocksdb::DB** db_ptr, std::vector* handles_ptr, int meta_cf_index) +#endif : db_ptr_(db_ptr), cf_handles_ptr_(handles_ptr), meta_cf_index_(meta_cf_index) {} std::unique_ptr CreateCompactionFilter( @@ -167,7 +177,11 @@ class ListsDataFilterFactory : public rocksdb::CompactionFilterFactory { const char* Name() const override { return "ListsDataFilterFactory"; } private: +#ifdef USE_S3 + rocksdb::DBCloud** db_ptr_ = nullptr; +#else rocksdb::DB** db_ptr_ = nullptr; +#endif std::vector* cf_handles_ptr_ = nullptr; int meta_cf_index_ = 0; }; diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 94c85ecbca..4c4f940ba2 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -8,6 +8,7 @@ #include "rocksdb/env.h" #include "src/redis.h" +#include "rocksdb/options.h" #include "src/strings_filter.h" #include "src/lists_filter.h" #include "src/base_filter.h" @@ -62,7 +63,7 @@ Status Redis::Open(const StorageOptions& storage_options, const std::string& db_ rocksdb::BlockBasedTableOptions table_ops(storage_options.table_options); table_ops.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); - rocksdb::DBOptions db_ops(storage_options.options); + rocksdb::Options db_ops(storage_options.options); db_ops.create_missing_column_families = true; // db_ops.env = env_; @@ -175,7 +176,18 @@ Status Redis::Open(const StorageOptions& storage_options, const std::string& db_ // stream CF column_families.emplace_back("stream_meta_cf", stream_meta_cf_ops); column_families.emplace_back("stream_data_cf", stream_data_cf_ops); + +#ifdef USE_S3 + Status s = OpenCloudEnv(storage_options.cloud_fs_options, db_path); + if (!s.ok()) { + LOG(ERROR) << "Failed to create AWS S3 cloud environment"; + return s; + } + db_ops.env = cloud_env_.get(); + return rocksdb::DBCloud::Open(db_ops, db_path, column_families, "", 0, &handles_, &db_); +#else return rocksdb::DB::Open(db_ops, db_path, column_families, &handles_, &db_); +#endif } Status Redis::GetScanStartPoint(const DataType& type, const Slice& key, const Slice& pattern, int64_t cursor, std::string* start_point) { @@ -465,4 +477,24 @@ void Redis::ScanDatabase() { ScanSets(); } +#ifdef USE_S3 +Status Redis::OpenCloudEnv(rocksdb::CloudFileSystemOptions opts, const std::string& db_path) { + std::string s3_path = db_path[0] == '.' ? db_path.substr(1) : db_path; + opts.src_bucket.SetObjectPath(s3_path); + opts.dest_bucket.SetObjectPath(s3_path); + rocksdb::CloudFileSystem* cfs = nullptr; + Status s = rocksdb::CloudFileSystem::NewAwsFileSystem( + rocksdb::FileSystem::Default(), + opts, + nullptr, + &cfs + ); + if (s.ok()) { + std::shared_ptr cloud_fs(cfs); + cloud_env_ = NewCompositeEnv(cloud_fs); + } + return s; +} +#endif + } // namespace storage diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 2e28743aae..878fa9ae0c 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -10,7 +10,11 @@ #include #include +#ifdef USE_S3 +#include "rocksdb/cloud/db_cloud.h" +#else #include "rocksdb/db.h" +#endif #include "rocksdb/slice.h" #include "rocksdb/status.h" @@ -38,7 +42,11 @@ class Redis { Redis(Storage* storage, int32_t index); virtual ~Redis(); +#ifdef USE_S3 + rocksdb::DBCloud* GetDB() { return db_; } +#else rocksdb::DB* GetDB() { return db_; } +#endif struct KeyStatistics { size_t window_size; @@ -420,7 +428,11 @@ class Redis { int32_t index_ = 0; Storage* const storage_; std::shared_ptr lock_mgr_; +#ifdef USE_S3 + rocksdb::DBCloud* db_ = nullptr; +#else rocksdb::DB* db_ = nullptr; +#endif //TODO(wangshaoyi): seperate env for each rocksdb instance // rocksdb::Env* env_ = nullptr; @@ -444,6 +456,12 @@ class Redis { Status UpdateSpecificKeyStatistics(const DataType& dtype, const std::string& key, uint64_t count); Status UpdateSpecificKeyDuration(const DataType& dtype, const std::string& key, uint64_t duration); Status AddCompactKeyTaskIfNeeded(const DataType& dtype, const std::string& key, uint64_t count, uint64_t duration); + +#ifdef USE_S3 + // rocksdb-cloud + Status OpenCloudEnv(rocksdb::CloudFileSystemOptions opts, const std::string& db_path); + std::unique_ptr cloud_env_; +#endif }; } // namespace storage diff --git a/src/storage/src/storage.cc b/src/storage/src/storage.cc index 9fb252a7d0..56cee932c9 100644 --- a/src/storage/src/storage.cc +++ b/src/storage/src/storage.cc @@ -2315,7 +2315,11 @@ Status Storage::StopScanKeyNum() { return Status::OK(); } +#ifdef USE_S3 +rocksdb::DBCloud* Storage::GetDBByIndex(int index) { +#else rocksdb::DB* Storage::GetDBByIndex(int index) { +#endif if (index < 0 || index >= db_instance_num_) { LOG(WARNING) << "Invalid DB Index: " << index << "total: " << db_instance_num_; diff --git a/src/storage/src/zsets_filter.h b/src/storage/src/zsets_filter.h index 8de0e6612b..e28f2d4f5e 100644 --- a/src/storage/src/zsets_filter.h +++ b/src/storage/src/zsets_filter.h @@ -115,7 +115,11 @@ class ZSetsScoreFilter : public rocksdb::CompactionFilter { class ZSetsScoreFilterFactory : public rocksdb::CompactionFilterFactory { public: +#ifdef USE_S3 + ZSetsScoreFilterFactory(rocksdb::DBCloud** db_ptr, std::vector* handles_ptr, int meta_cf_index) +#else ZSetsScoreFilterFactory(rocksdb::DB** db_ptr, std::vector* handles_ptr, int meta_cf_index) +#endif : db_ptr_(db_ptr), cf_handles_ptr_(handles_ptr), meta_cf_index_(meta_cf_index) {} std::unique_ptr CreateCompactionFilter( @@ -126,7 +130,11 @@ class ZSetsScoreFilterFactory : public rocksdb::CompactionFilterFactory { const char* Name() const override { return "ZSetsScoreFilterFactory"; } private: +#ifdef USE_S3 + rocksdb::DBCloud** db_ptr_ = nullptr; +#else rocksdb::DB** db_ptr_ = nullptr; +#endif std::vector* cf_handles_ptr_ = nullptr; int meta_cf_index_ = 0; }; From ba4539088a2fd229a16d589a5220524950df16fc Mon Sep 17 00:00:00 2001 From: longfar Date: Wed, 28 Feb 2024 10:17:40 +0800 Subject: [PATCH 004/116] feat: replace rocksdb::DB with rocksdb::DBCloud in tests of storage --- src/storage/tests/hashes_test.cc | 10 +++++++ src/storage/tests/hyperloglog_test.cc | 10 +++++++ src/storage/tests/keys_test.cc | 10 +++++++ src/storage/tests/lists_filter_test.cc | 41 +++++++++++++++++++++++++- src/storage/tests/lists_test.cc | 9 ++++++ src/storage/tests/sets_test.cc | 10 +++++++ src/storage/tests/strings_test.cc | 10 +++++++ src/storage/tests/zsets_test.cc | 10 ++++++- 8 files changed, 108 insertions(+), 2 deletions(-) diff --git a/src/storage/tests/hashes_test.cc b/src/storage/tests/hashes_test.cc index 50d2207256..d4a1b952fd 100644 --- a/src/storage/tests/hashes_test.cc +++ b/src/storage/tests/hashes_test.cc @@ -29,7 +29,17 @@ class HashesTest : public ::testing::Test { pstd::DeleteDirIfExist(path); mkdir(path.c_str(), 0755); storage_options.options.create_if_missing = true; +#ifdef USE_S3 + auto& cloud_fs_opts = storage_options.cloud_fs_options; + cloud_fs_opts.endpoint_override = "http://127.0.0.1:9000"; + cloud_fs_opts.credentials.InitializeSimple("minioadmin", "minioadmin"); + ASSERT_TRUE(cloud_fs_opts.credentials.HasValid().ok()); + cloud_fs_opts.src_bucket.SetBucketName("database.unit.test", "pika."); + cloud_fs_opts.dest_bucket.SetBucketName("database.unit.test", "pika."); + storage_options.options.max_log_file_size = 0; +#endif s = db.Open(storage_options, path); + ASSERT_TRUE(s.ok()); } void TearDown() override { diff --git a/src/storage/tests/hyperloglog_test.cc b/src/storage/tests/hyperloglog_test.cc index eb03a39514..452e325fb4 100644 --- a/src/storage/tests/hyperloglog_test.cc +++ b/src/storage/tests/hyperloglog_test.cc @@ -23,7 +23,17 @@ class HyperLogLogTest : public ::testing::Test { mkdir(path.c_str(), 0755); } storage_options.options.create_if_missing = true; +#ifdef USE_S3 + auto& cloud_fs_opts = storage_options.cloud_fs_options; + cloud_fs_opts.endpoint_override = "http://127.0.0.1:9000"; + cloud_fs_opts.credentials.InitializeSimple("minioadmin", "minioadmin"); + ASSERT_TRUE(cloud_fs_opts.credentials.HasValid().ok()); + cloud_fs_opts.src_bucket.SetBucketName("database.unit.test", "pika."); + cloud_fs_opts.dest_bucket.SetBucketName("database.unit.test", "pika."); + storage_options.options.max_log_file_size = 0; +#endif s = db.Open(storage_options, path); + ASSERT_TRUE(s.ok()); } void TearDown() override { diff --git a/src/storage/tests/keys_test.cc b/src/storage/tests/keys_test.cc index 8acff5c12f..6395156e19 100644 --- a/src/storage/tests/keys_test.cc +++ b/src/storage/tests/keys_test.cc @@ -29,7 +29,17 @@ class KeysTest : public ::testing::Test { pstd::DeleteDirIfExist(path); mkdir(path.c_str(), 0755); storage_options.options.create_if_missing = true; +#ifdef USE_S3 + auto& cloud_fs_opts = storage_options.cloud_fs_options; + cloud_fs_opts.endpoint_override = "http://127.0.0.1:9000"; + cloud_fs_opts.credentials.InitializeSimple("minioadmin", "minioadmin"); + ASSERT_TRUE(cloud_fs_opts.credentials.HasValid().ok()); + cloud_fs_opts.src_bucket.SetBucketName("database.unit.test", "pika."); + cloud_fs_opts.dest_bucket.SetBucketName("database.unit.test", "pika."); + storage_options.options.max_log_file_size = 0; +#endif s = db.Open(storage_options, path); + ASSERT_TRUE(s.ok()); } void TearDown() override { diff --git a/src/storage/tests/lists_filter_test.cc b/src/storage/tests/lists_filter_test.cc index 694fe66bb6..73c0fa7f83 100644 --- a/src/storage/tests/lists_filter_test.cc +++ b/src/storage/tests/lists_filter_test.cc @@ -27,8 +27,37 @@ class ListsFilterTest : public ::testing::Test { if (access(db_path.c_str(), F_OK) != 0) { mkdir(db_path.c_str(), 0755); } - options.create_if_missing = true; + + options.create_if_missing = true; +#ifdef USE_S3 + // rocksdb-cloud env + rocksdb::CloudFileSystemOptions cloud_fs_opts; + cloud_fs_opts.endpoint_override = "http://127.0.0.1:9000"; + cloud_fs_opts.credentials.InitializeSimple("minioadmin", "minioadmin"); + assert(cloud_fs_opts.credentials.HasValid().ok()); + std::string s3_path = db_path[0] == '.' ? db_path.substr(1) : db_path; + cloud_fs_opts.src_bucket.SetBucketName("database.unit.test", "pika."); + cloud_fs_opts.src_bucket.SetObjectPath(s3_path); + cloud_fs_opts.dest_bucket.SetBucketName("database.unit.test", "pika."); + cloud_fs_opts.dest_bucket.SetObjectPath(s3_path); + rocksdb::CloudFileSystem* cfs = nullptr; + Status s = rocksdb::CloudFileSystem::NewAwsFileSystem( + rocksdb::FileSystem::Default(), + cloud_fs_opts, + nullptr, + &cfs + ); + assert(s.ok()); + std::shared_ptr cloud_fs(cfs); + cloud_env = NewCompositeEnv(cloud_fs); + assert(cloud_env); + options.env = cloud_env.get(); + s = rocksdb::DBCloud::Open(options, db_path, "", 0, &meta_db); +#else s = rocksdb::DB::Open(options, db_path, &meta_db); +#endif + + if (s.ok()) { // create column family rocksdb::ColumnFamilyHandle* cf; @@ -45,7 +74,12 @@ class ListsFilterTest : public ::testing::Test { // Data CF column_families.emplace_back("data_cf", data_cf_ops); +#ifdef USE_S3 + s = rocksdb::DBCloud::Open(options, db_path, column_families, "", 0, &handles, &meta_db); +#else s = rocksdb::DB::Open(options, db_path, column_families, &handles, &meta_db); +#endif + assert(s.ok()); } ~ListsFilterTest() override = default; @@ -58,7 +92,12 @@ class ListsFilterTest : public ::testing::Test { } storage::Options options; +#ifdef USE_S3 + rocksdb::DBCloud* meta_db; + std::unique_ptr cloud_env; +#else rocksdb::DB* meta_db; +#endif storage::Status s; std::vector column_families; diff --git a/src/storage/tests/lists_test.cc b/src/storage/tests/lists_test.cc index ed3325a316..1309c73572 100644 --- a/src/storage/tests/lists_test.cc +++ b/src/storage/tests/lists_test.cc @@ -84,6 +84,15 @@ class ListsTest : public ::testing::Test { pstd::DeleteDirIfExist(path); mkdir(path.c_str(), 0755); storage_options.options.create_if_missing = true; +#ifdef USE_S3 + auto& cloud_fs_opts = storage_options.cloud_fs_options; + cloud_fs_opts.endpoint_override = "http://127.0.0.1:9000"; + cloud_fs_opts.credentials.InitializeSimple("minioadmin", "minioadmin"); + ASSERT_TRUE(cloud_fs_opts.credentials.HasValid().ok()); + cloud_fs_opts.src_bucket.SetBucketName("database.unit.test", "pika."); + cloud_fs_opts.dest_bucket.SetBucketName("database.unit.test", "pika."); + storage_options.options.max_log_file_size = 0; +#endif s = db.Open(storage_options, path); if (!s.ok()) { printf("Open db failed, exit...\n"); diff --git a/src/storage/tests/sets_test.cc b/src/storage/tests/sets_test.cc index c6c4dd220e..1b696c3e9b 100644 --- a/src/storage/tests/sets_test.cc +++ b/src/storage/tests/sets_test.cc @@ -26,7 +26,17 @@ class SetsTest : public ::testing::Test { pstd::DeleteDirIfExist(path); mkdir(path.c_str(), 0755); storage_options.options.create_if_missing = true; +#ifdef USE_S3 + auto& cloud_fs_opts = storage_options.cloud_fs_options; + cloud_fs_opts.endpoint_override = "http://127.0.0.1:9000"; + cloud_fs_opts.credentials.InitializeSimple("minioadmin", "minioadmin"); + ASSERT_TRUE(cloud_fs_opts.credentials.HasValid().ok()); + cloud_fs_opts.src_bucket.SetBucketName("database.unit.test", "pika."); + cloud_fs_opts.dest_bucket.SetBucketName("database.unit.test", "pika."); + storage_options.options.max_log_file_size = 0; +#endif s = db.Open(storage_options, path); + ASSERT_TRUE(s.ok()); } void TearDown() override { diff --git a/src/storage/tests/strings_test.cc b/src/storage/tests/strings_test.cc index 33e15c67ef..724ca6175c 100644 --- a/src/storage/tests/strings_test.cc +++ b/src/storage/tests/strings_test.cc @@ -26,7 +26,17 @@ class StringsTest : public ::testing::Test { pstd::DeleteDirIfExist(path); mkdir(path.c_str(), 0755); storage_options.options.create_if_missing = true; +#ifdef USE_S3 + auto& cloud_fs_opts = storage_options.cloud_fs_options; + cloud_fs_opts.endpoint_override = "http://127.0.0.1:9000"; + cloud_fs_opts.credentials.InitializeSimple("minioadmin", "minioadmin"); + ASSERT_TRUE(cloud_fs_opts.credentials.HasValid().ok()); + cloud_fs_opts.src_bucket.SetBucketName("database.unit.test", "pika."); + cloud_fs_opts.dest_bucket.SetBucketName("database.unit.test", "pika."); + storage_options.options.max_log_file_size = 0; +#endif s = db.Open(storage_options, path); + ASSERT_TRUE(s.ok()); } void TearDown() override { diff --git a/src/storage/tests/zsets_test.cc b/src/storage/tests/zsets_test.cc index 465c48f00e..572289050e 100644 --- a/src/storage/tests/zsets_test.cc +++ b/src/storage/tests/zsets_test.cc @@ -31,7 +31,15 @@ class ZSetsTest : public ::testing::Test { pstd::DeleteDirIfExist(path); mkdir(path.c_str(), 0755); storage_options.options.create_if_missing = true; - s = db.Open(storage_options, path); +#ifdef USE_S3 + auto& cloud_fs_opts = storage_options.cloud_fs_options; + cloud_fs_opts.endpoint_override = "http://127.0.0.1:9000"; + cloud_fs_opts.credentials.InitializeSimple("minioadmin", "minioadmin"); + ASSERT_TRUE(cloud_fs_opts.credentials.HasValid().ok()); + cloud_fs_opts.src_bucket.SetBucketName("database.unit.test", "pika."); + cloud_fs_opts.dest_bucket.SetBucketName("database.unit.test", "pika."); + storage_options.options.max_log_file_size = 0; +#endif if (!s.ok()) { printf("Open db failed, exit...\n"); exit(1); From 42909a2ec8c93f5c16402a34552e195d58dff1f1 Mon Sep 17 00:00:00 2001 From: longfar Date: Wed, 28 Feb 2024 19:53:02 +0800 Subject: [PATCH 005/116] fix: forget open storage in zset tests --- src/storage/tests/zsets_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/storage/tests/zsets_test.cc b/src/storage/tests/zsets_test.cc index 572289050e..d1d470ec80 100644 --- a/src/storage/tests/zsets_test.cc +++ b/src/storage/tests/zsets_test.cc @@ -40,6 +40,7 @@ class ZSetsTest : public ::testing::Test { cloud_fs_opts.dest_bucket.SetBucketName("database.unit.test", "pika."); storage_options.options.max_log_file_size = 0; #endif + s = db.Open(storage_options, path); if (!s.ok()) { printf("Open db failed, exit...\n"); exit(1); From c8dcd536525fd117ab6c70dad04f838d2ec1e170 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E9=91=AB?= Date: Fri, 1 Mar 2024 14:53:27 +0800 Subject: [PATCH 006/116] add cloud binlogitem --- src/pika_cloud_binlog.proto | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 src/pika_cloud_binlog.proto diff --git a/src/pika_cloud_binlog.proto b/src/pika_cloud_binlog.proto new file mode 100644 index 0000000000..62b67f8d22 --- /dev/null +++ b/src/pika_cloud_binlog.proto @@ -0,0 +1,16 @@ +syntax = "proto3"; + +package cloud; + +message BinlogCloudItem { + //belong to which db + required uint32 db_id = 1; + //belong to whicn rocksdb + required uint32 rocksdb_id = 2; + //data write time + required uint64 exec_time = 3; + required uint64 term_id = 4; + required uint64 file_num = 5; + required uint64 offset = 6; + required bytes content = 7; +} From 5d4882d9fa4634cf06b9dddf03344394a281a546 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E9=91=AB?= Date: Tue, 5 Mar 2024 20:47:41 +0800 Subject: [PATCH 007/116] pika command can write to cloud binlog --- CMakeLists.txt | 4 +- include/pika_binlog.h | 14 +- include/pika_cloud_binlog.h | 110 ++++++++ include/pika_cloud_binlog_transverter.h | 39 +++ include/pika_conf.h | 5 + include/pika_define.h | 5 + src/pika_admin.cc | 5 +- src/pika_cloud_binlog.cc | 341 ++++++++++++++++++++++++ src/pika_cloud_binlog.proto | 14 +- src/pika_cloud_binlog_transverter.cc | 79 ++++++ src/pika_conf.cc | 2 + src/pika_consensus.cc | 14 +- src/pika_stable_log.cc | 7 +- 13 files changed, 617 insertions(+), 22 deletions(-) create mode 100644 include/pika_cloud_binlog.h create mode 100644 include/pika_cloud_binlog_transverter.h create mode 100644 src/pika_cloud_binlog.cc create mode 100644 src/pika_cloud_binlog_transverter.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 485ff0fe25..1c17c026b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -769,7 +769,9 @@ set(PIKA_BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/pika_build_version.cc message("PIKA_BUILD_VERSION_CC : " ${PIKA_BUILD_VERSION_CC}) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/build_version.cc.in ${PIKA_BUILD_VERSION_CC} @ONLY) -set(PROTO_FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/pika_inner_message.proto ${CMAKE_CURRENT_SOURCE_DIR}/src/rsync_service.proto) +set(PROTO_FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/pika_inner_message.proto + ${CMAKE_CURRENT_SOURCE_DIR}/src/rsync_service.proto + ${CMAKE_CURRENT_SOURCE_DIR}/src/pika_cloud_binlog.proto) custom_protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS ${PROTO_FILES}) message("pika PROTO_SRCS = ${PROTO_SRCS}") message("pika PROTO_HDRS = ${PROTO_HDRS}") diff --git a/include/pika_binlog.h b/include/pika_binlog.h index 84127fb535..fa39f2dbaa 100644 --- a/include/pika_binlog.h +++ b/include/pika_binlog.h @@ -46,31 +46,31 @@ class Version final : public pstd::noncopyable { class Binlog : public pstd::noncopyable { public: Binlog(std::string Binlog_path, int file_size = 100 * 1024 * 1024); - ~Binlog(); + virtual ~Binlog(); void Lock() { mutex_.lock(); } void Unlock() { mutex_.unlock(); } - pstd::Status Put(const std::string& item); + virtual pstd::Status Put(const std::string& item); - pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); + virtual pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); /* * Set Producer pro_num and pro_offset with lock */ - pstd::Status SetProducerStatus(uint32_t pro_num, uint64_t pro_offset, uint32_t term = 0, uint64_t index = 0); + virtual pstd::Status SetProducerStatus(uint32_t pro_num, uint64_t pro_offset, uint32_t term = 0, uint64_t index = 0); // Need to hold Lock(); - pstd::Status Truncate(uint32_t pro_num, uint64_t pro_offset, uint64_t index); + virtual pstd::Status Truncate(uint32_t pro_num, uint64_t pro_offset, uint64_t index); std::string filename() { return filename_; } // need to hold mutex_ - void SetTerm(uint32_t term) { + virtual void SetTerm(uint32_t term) { std::lock_guard l(version_->rwlock_); version_->term_ = term; version_->StableSave(); } - uint32_t term() { + virtual uint32_t term() { std::shared_lock l(version_->rwlock_); return version_->term_; } diff --git a/include/pika_cloud_binlog.h b/include/pika_cloud_binlog.h new file mode 100644 index 0000000000..3ccbf09e6d --- /dev/null +++ b/include/pika_cloud_binlog.h @@ -0,0 +1,110 @@ +// Copyright (c) 2015-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#ifndef PIKA_CLOUD_BINLOG_H_ +#define PIKA_CLOUD_BINLOG_H_ + +#include + +#include "pstd/include/env.h" +#include "pstd/include/pstd_mutex.h" +#include "pstd/include/pstd_status.h" +#include "pstd/include/noncopyable.h" +#include "include/pika_define.h" +#include "include/pika_binlog.h" + +std::string NewFileName(const std::string& name, uint32_t current); + +class CloudVersion final : public pstd::noncopyable { + public: + CloudVersion(const std::shared_ptr& save); + ~CloudVersion(); + + pstd::Status Init(); + + // RWLock should be held when access members. + pstd::Status StableSave(); + + uint32_t pro_num_ = 0; + uint64_t pro_offset_ = 0; + uint32_t term_ = 0; + + std::shared_mutex rwlock_; + + void debug() { + std::shared_lock l(rwlock_); + printf("Current pro_num %u pro_offset %llu\n", pro_num_, pro_offset_); + } + + private: + // shared with versionfile_ + std::shared_ptr save_; +}; + +class CloudBinlog : public Binlog { + public: + CloudBinlog(std::string Binlog_path, int file_size = 100 * 1024 * 1024); + ~CloudBinlog() {} + + pstd::Status Put(const std::string& item); + + pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); + /* + * Set Producer pro_num and pro_offset with lock + */ + pstd::Status SetProducerStatus(uint32_t pro_num, uint64_t pro_offset, uint32_t term = 0, uint64_t index = 0); + // Need to hold Lock(); + pstd::Status Truncate(uint32_t pro_num, uint64_t pro_offset, uint64_t index = 0); + + std::string filename() { return filename_; } + + // need to hold mutex_ + void SetTerm(uint32_t term) { + std::lock_guard l(version_->rwlock_); + version_->term_ = term; + version_->StableSave(); + } + + uint32_t term() { + std::shared_lock l(version_->rwlock_); + return version_->term_; + } + + void Close(); + + private: + pstd::Status Put(const char* item, int len); + pstd::Status EmitPhysicalRecord(RecordType t, const char* ptr, size_t n, int* temp_pro_offset); + static pstd::Status AppendPadding(pstd::WritableFile* file, uint64_t* len); + void InitLogFile(); + + /* + * Produce + */ + pstd::Status Produce(const pstd::Slice& item, int* pro_offset); + + std::atomic opened_; + + std::unique_ptr version_; + std::unique_ptr queue_; + // versionfile_ can only be used as a shared_ptr, and it will be used as a variable version_ in the ~Version() function. + std::shared_ptr versionfile_; + + pstd::Mutex mutex_; + + uint32_t pro_num_ = 0; + + int block_offset_ = 0; + + const std::string binlog_path_; + + uint64_t file_size_ = 0; + + std::string filename_; + + std::atomic binlog_io_error_; +}; + +#endif diff --git a/include/pika_cloud_binlog_transverter.h b/include/pika_cloud_binlog_transverter.h new file mode 100644 index 0000000000..8d023e569f --- /dev/null +++ b/include/pika_cloud_binlog_transverter.h @@ -0,0 +1,39 @@ +// Copyright (c) 2018-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#ifndef PIKA_CLOUD_BINLOG_TRANSVERTER_H_ +#define PIKA_CLOUD_BINLOG_TRANSVERTER_H_ + +#include +#include +#include +#include + +#include "output/pika_cloud_binlog.pb.h" + +//#define CLOUD_BINLOG_ENCODE_LEN 34 + +enum BinlogType { + TypeFirst = 1, +}; + +const int PADDING_BINLOG_PROTOCOL_SIZE = 22; +const int SPACE_STROE_PARAMETER_LENGTH = 5; + +class PikaCloudBinlogTransverter { + public: + PikaCloudBinlogTransverter() = default; + static std::string BinlogEncode(uint32_t db_id, uint32_t rocksdb_id, uint32_t exec_time, uint32_t term_id, + uint32_t filenum, uint64_t offset, const std::string& content); + + static bool BinlogDecode(const std::string& binlog, cloud::BinlogCloudItem* binlog_item); + + static std::string ConstructPaddingBinlog(uint32_t size); + + static bool BinlogItemWithoutContentDecode(const std::string& binlog, + cloud::BinlogCloudItem* binlog_item); +}; + +#endif diff --git a/include/pika_conf.h b/include/pika_conf.h index 6b0917ee76..a9315704a5 100644 --- a/include/pika_conf.h +++ b/include/pika_conf.h @@ -314,6 +314,7 @@ class PikaConf : public pstd::BaseConf { return network_interface_; } int cache_model() { return cache_model_; } + int pika_model() { return pika_model_; } int sync_window_size() { return sync_window_size_.load(); } int max_conn_rbuf_size() { return max_conn_rbuf_size_.load(); } int consensus_level() { return consensus_level_.load(); } @@ -350,6 +351,7 @@ class PikaConf : public pstd::BaseConf { void SetCacheMaxmemoryPolicy(const int value) { cache_maxmemory_policy_ = value; } void SetCacheMaxmemorySamples(const int value) { cache_maxmemory_samples_ = value; } void SetCacheLFUDecayTime(const int value) { cache_lfu_decay_time_ = value; } + void SetPikaModel(const int value) { pika_model_ = value; } void UnsetCacheDisableFlag() { tmp_cache_disable_flag_ = false; } bool enable_blob_files() { return enable_blob_files_; } int64_t min_blob_size() { return min_blob_size_; } @@ -798,6 +800,9 @@ class PikaConf : public pstd::BaseConf { std::atomic_int cache_maxmemory_samples_; std::atomic_int cache_lfu_decay_time_; + //pika model + int32_t pika_model_; + // rocksdb blob bool enable_blob_files_ = false; bool enable_blob_garbage_collection_ = false; diff --git a/include/pika_define.h b/include/pika_define.h index 176b371111..75e5b0d3ea 100644 --- a/include/pika_define.h +++ b/include/pika_define.h @@ -309,6 +309,11 @@ const int PIKA_ROLE_MASTER = 2; */ constexpr int PIKA_CACHE_NONE = 0; constexpr int PIKA_CACHE_READ = 1; +/* + * cloud model + */ +constexpr int PIKA_LOCAL = 0; +constexpr int PIKA_CLOUD = 1; /* * cache size diff --git a/src/pika_admin.cc b/src/pika_admin.cc index d25b9459e4..ea5d8b801a 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -2899,9 +2899,12 @@ void PaddingCmd::DoInitial() { void PaddingCmd::Do() { res_.SetRes(CmdRes::kOk); } std::string PaddingCmd::ToRedisProtocol() { - return PikaBinlogTransverter::ConstructPaddingBinlog( + if (g_pika_conf->pika_model() == PIKA_LOCAL) + return PikaBinlogTransverter::ConstructPaddingBinlog( BinlogType::TypeFirst, argv_[1].size() + BINLOG_ITEM_HEADER_SIZE + PADDING_BINLOG_PROTOCOL_SIZE + SPACE_STROE_PARAMETER_LENGTH); + else if (g_pika_conf->pika_model() == PIKA_CLOUD) + return PikaBinlogTransverter::ConstructPaddingBinlog(BinlogType::TypeFirst, argv_[1].size()); } void PKPatternMatchDelCmd::DoInitial() { diff --git a/src/pika_cloud_binlog.cc b/src/pika_cloud_binlog.cc new file mode 100644 index 0000000000..fddaafdc45 --- /dev/null +++ b/src/pika_cloud_binlog.cc @@ -0,0 +1,341 @@ +// Copyright (c) 2015-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include +#include +#include +#include + +#include "include/pika_cloud_binlog.h" +#include "include/pika_cloud_binlog_transverter.h" +#include "pstd/include/pstd_defer.h" +#include "pstd_status.h" + +using pstd::Status; + +std::string NewCloudFileName(const std::string& name, const uint32_t current) { + char buf[256]; + snprintf(buf, sizeof(buf), "%s%u", name.c_str(), current); + return {buf}; +} + +/* + * CloudVersion + */ +CloudVersion::CloudVersion(const std::shared_ptr& save) : save_(save) { assert(save_ != nullptr); } + +CloudVersion::~CloudVersion() { StableSave(); } + +Status CloudVersion::StableSave() { + char* p = save_->GetData(); + memcpy(p, &pro_num_, sizeof(uint32_t)); + p += 4; + memcpy(p, &pro_offset_, sizeof(uint64_t)); + p += 8; + memcpy(p, &term_, sizeof(uint32_t)); + return Status::OK(); +} + +Status CloudVersion::Init() { + Status s; + if (save_->GetData()) { + memcpy(reinterpret_cast(&pro_num_), save_->GetData(), sizeof(uint32_t)); + memcpy(reinterpret_cast(&pro_offset_), save_->GetData() + 4, sizeof(uint64_t)); + memcpy(reinterpret_cast(&term_), save_->GetData() + 12, sizeof(uint32_t)); + return Status::OK(); + } else { + return Status::Corruption("version init error"); + } +} + +/* + * Binlog + */ + +CloudBinlog::CloudBinlog(std::string binlog_path, const int file_size) + : Binlog(binlog_path, file_size), + opened_(false), + binlog_path_(std::move(binlog_path)), + file_size_(file_size), + binlog_io_error_(false) {} + +void CloudBinlog::InitLogFile() { + assert(queue_ != nullptr); + uint64_t filesize = queue_->Filesize(); + block_offset_ = static_cast(filesize % kBlockSize); + opened_.store(true); +} + +Status CloudBinlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term, uint64_t* logic_id) { + if (!opened_.load()) { + return Status::Busy("Binlog is not open yet"); + } + + std::shared_lock l(version_->rwlock_); + + *filenum = version_->pro_num_; + *pro_offset = version_->pro_offset_; + if (term) { + *term = version_->term_; + } + + return Status::OK(); +} + +// Note: mutex lock should be held +Status CloudBinlog::Put(const std::string& item) { + if (!opened_.load()) { + return Status::Busy("Cloud Binlog is not open yet"); + } + uint32_t filenum = 0; + uint32_t term = 0; + uint64_t offset = 0; + + Lock(); + DEFER { Unlock(); }; + + Status s = GetProducerStatus(&filenum, &offset, &term, nullptr); + if (!s.ok()) { + return s; + } + // bx check 暂时默认db_id ,rocksdb_id为0 + std::string data = PikaCloudBinlogTransverter::BinlogEncode(0, 0, time(nullptr), term, filenum, offset, item); + + s = Put(data.c_str(), static_cast(data.size())); + if (!s.ok()) { + binlog_io_error_.store(true); + } + return s; +} + +// Note: mutex lock should be held +Status CloudBinlog::Put(const char* item, int len) { + Status s; + /* Check to roll log file */ + uint64_t filesize = queue_->Filesize(); + if (filesize > file_size_) { + std::unique_ptr queue; + std::string profile = NewCloudFileName(filename_, pro_num_ + 1); + s = pstd::NewWritableFile(profile, queue); + if (!s.ok()) { + LOG(ERROR) << "Cloud Binlog: new " << filename_ << " " << s.ToString(); + return s; + } + queue_.reset(); + queue_ = std::move(queue); + pro_num_++; + + { + std::lock_guard l(version_->rwlock_); + version_->pro_offset_ = 0; + version_->pro_num_ = pro_num_; + version_->StableSave(); + } + InitLogFile(); + } + + int pro_offset; + s = Produce(pstd::Slice(item, len), &pro_offset); + if (s.ok()) { + std::lock_guard l(version_->rwlock_); + version_->pro_offset_ = pro_offset; + version_->StableSave(); + } + + return s; +} + +Status CloudBinlog::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n, int* temp_pro_offset) { + Status s; + assert(n <= 0xffffff); + assert(block_offset_ + kHeaderSize + n <= kBlockSize); + char buf[kHeaderSize]; + + uint64_t now; + struct timeval tv; + gettimeofday(&tv, nullptr); + now = tv.tv_sec; + buf[0] = static_cast(n & 0xff); + buf[1] = static_cast((n & 0xff00) >> 8); + buf[2] = static_cast(n >> 16); + buf[3] = static_cast(now & 0xff); + buf[4] = static_cast((now & 0xff00) >> 8); + buf[5] = static_cast((now & 0xff0000) >> 16); + buf[6] = static_cast((now & 0xff000000) >> 24); + buf[7] = static_cast(t); + + s = queue_->Append(pstd::Slice(buf, kHeaderSize)); + if (s.ok()) { + s = queue_->Append(pstd::Slice(ptr, n)); + if (s.ok()) { + s = queue_->Flush(); + } + } + block_offset_ += static_cast(kHeaderSize + n); + + *temp_pro_offset += static_cast(kHeaderSize + n); + return s; +} + +Status CloudBinlog::Produce(const pstd::Slice& item, int* temp_pro_offset) { + Status s; + const char* ptr = item.data(); + size_t left = item.size(); + bool begin = true; + + *temp_pro_offset = static_cast(version_->pro_offset_); + do { + const int leftover = static_cast(kBlockSize) - block_offset_; + assert(leftover >= 0); + if (static_cast(leftover) < kHeaderSize) { + if (leftover > 0) { + s = queue_->Append(pstd::Slice("\x00\x00\x00\x00\x00\x00\x00", leftover)); + if (!s.ok()) { + return s; + } + *temp_pro_offset += leftover; + } + block_offset_ = 0; + } + + const size_t avail = kBlockSize - block_offset_ - kHeaderSize; + const size_t fragment_length = (left < avail) ? left : avail; + RecordType type; + const bool end = (left == fragment_length); + if (begin && end) { + type = kFullType; + } else if (begin) { + type = kFirstType; + } else if (end) { + type = kLastType; + } else { + type = kMiddleType; + } + + s = EmitPhysicalRecord(type, ptr, fragment_length, temp_pro_offset); + ptr += fragment_length; + left -= fragment_length; + begin = false; + } while (s.ok() && left > 0); + + return s; +} + +Status CloudBinlog::AppendPadding(pstd::WritableFile* file, uint64_t* len) { + if (*len < kHeaderSize) { + return Status::OK(); + } + + Status s; + char buf[kBlockSize]; + uint64_t now; + struct timeval tv; + gettimeofday(&tv, nullptr); + now = tv.tv_sec; + + uint64_t left = *len; + while (left > 0 && s.ok()) { + uint32_t size = (left >= kBlockSize) ? kBlockSize : left; + if (size < kHeaderSize) { + break; + } else { + uint32_t bsize = size - kHeaderSize; + std::string binlog(bsize, '*'); + buf[0] = static_cast(bsize & 0xff); + buf[1] = static_cast((bsize & 0xff00) >> 8); + buf[2] = static_cast(bsize >> 16); + buf[3] = static_cast(now & 0xff); + buf[4] = static_cast((now & 0xff00) >> 8); + buf[5] = static_cast((now & 0xff0000) >> 16); + buf[6] = static_cast((now & 0xff000000) >> 24); + // kBadRecord here + buf[7] = static_cast(kBadRecord); + s = file->Append(pstd::Slice(buf, kHeaderSize)); + if (s.ok()) { + s = file->Append(pstd::Slice(binlog.data(), binlog.size())); + if (s.ok()) { + s = file->Flush(); + left -= size; + } + } + } + } + *len -= left; + if (left != 0) { + LOG(WARNING) << "Cloud AppendPadding left bytes: " << left << " is less then kHeaderSize"; + } + return s; +} + +Status CloudBinlog::SetProducerStatus(uint32_t pro_num, uint64_t pro_offset, uint32_t term, uint64_t index) { + if (!opened_.load()) { + return Status::Busy("Cloud Binlog is not open yet"); + } + + std::lock_guard l(mutex_); + + // offset smaller than the first header + if (pro_offset < 4) { + pro_offset = 0; + } + + queue_.reset(); + + std::string init_profile = NewCloudFileName(filename_, 0); + if (pstd::FileExists(init_profile)) { + pstd::DeleteFile(init_profile); + } + + std::string profile = NewCloudFileName(filename_, pro_num); + if (pstd::FileExists(profile)) { + pstd::DeleteFile(profile); + } + + pstd::NewWritableFile(profile, queue_); + CloudBinlog::AppendPadding(queue_.get(), &pro_offset); + + pro_num_ = pro_num; + + { + std::lock_guard l(version_->rwlock_); + version_->pro_num_ = pro_num; + version_->pro_offset_ = pro_offset; + version_->term_ = term; + version_->StableSave(); + } + + InitLogFile(); + return Status::OK(); +} + +Status CloudBinlog::Truncate(uint32_t pro_num, uint64_t pro_offset, uint64_t index) { + queue_.reset(); + std::string profile = NewCloudFileName(filename_, pro_num); + const int fd = open(profile.c_str(), O_RDWR | O_CLOEXEC, 0644); + if (fd < 0) { + return Status::IOError("fd open failed"); + } + if (ftruncate(fd, static_cast(pro_offset)) != 0) { + return Status::IOError("ftruncate failed"); + } + close(fd); + + pro_num_ = pro_num; + { + std::lock_guard l(version_->rwlock_); + version_->pro_num_ = pro_num; + version_->pro_offset_ = pro_offset; + version_->StableSave(); + } + + Status s = pstd::AppendWritableFile(profile, queue_, version_->pro_offset_); + if (!s.ok()) { + return s; + } + + InitLogFile(); + + return Status::OK(); +} diff --git a/src/pika_cloud_binlog.proto b/src/pika_cloud_binlog.proto index 62b67f8d22..efd7a470bb 100644 --- a/src/pika_cloud_binlog.proto +++ b/src/pika_cloud_binlog.proto @@ -4,13 +4,13 @@ package cloud; message BinlogCloudItem { //belong to which db - required uint32 db_id = 1; + uint32 db_id = 1; //belong to whicn rocksdb - required uint32 rocksdb_id = 2; + uint32 rocksdb_id = 2; //data write time - required uint64 exec_time = 3; - required uint64 term_id = 4; - required uint64 file_num = 5; - required uint64 offset = 6; - required bytes content = 7; + uint32 exec_time = 3; + uint32 term_id = 4; + uint32 file_num = 5; + uint64 offset = 6; + bytes content = 7; } diff --git a/src/pika_cloud_binlog_transverter.cc b/src/pika_cloud_binlog_transverter.cc new file mode 100644 index 0000000000..a034b04fc4 --- /dev/null +++ b/src/pika_cloud_binlog_transverter.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2018-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "include/pika_cloud_binlog_transverter.h" +#include +#include +#include +#include "include/pika_command.h" +#include "pstd/include/pstd_coding.h" + +std::string PikaCloudBinlogTransverter::BinlogEncode(uint32_t db_id, uint32_t rocksdb_id, uint32_t exec_time, + uint32_t term_id, uint32_t filenum, uint64_t offset, + const std::string& content) { + std::string serialize_binlog; + cloud::BinlogCloudItem binlog_item; + binlog_item.set_db_id(db_id); + binlog_item.set_rocksdb_id(rocksdb_id); + binlog_item.set_exec_time(exec_time); + binlog_item.set_term_id(term_id); + binlog_item.set_file_num(filenum); + binlog_item.set_offset(offset); + binlog_item.set_content(content); + binlog_item.SerializeToString(&serialize_binlog); + return serialize_binlog; +} + +bool PikaCloudBinlogTransverter::BinlogDecode(const std::string& binlog, cloud::BinlogCloudItem* binlog_item) { + auto res = binlog_item->ParseFromString(binlog); + if (!res) { + LOG(ERROR) << "Failed to deserialize cloud binlog item"; + return false; + } + return true; +} + +std::string PikaCloudBinlogTransverter::ConstructPaddingBinlog(uint32_t parameter_len) { + std::string binlog; + cloud::BinlogCloudItem binlog_item; + if (parameter_len < 0) { + return {}; + } + + std::string content; + RedisAppendLen(content, 2, "*"); + RedisAppendLen(content, 7, "$"); + RedisAppendContent(content, "padding"); + + std::string parameter_len_str; + std::ostringstream os; + os << parameter_len; + std::istringstream is(os.str()); + is >> parameter_len_str; + if (parameter_len_str.size() > SPACE_STROE_PARAMETER_LENGTH) { + return {}; + } + + content.append("$"); + content.append(SPACE_STROE_PARAMETER_LENGTH - parameter_len_str.size(), '0'); + content.append(parameter_len_str); + content.append(kNewLine); + RedisAppendContent(content, std::string(parameter_len, '*')); + + BinlogEncode(0, 0, 0, 0, 0, 0, content); + return binlog; +} + +bool PikaCloudBinlogTransverter::BinlogItemWithoutContentDecode(const std::string& binlog, + cloud::BinlogCloudItem* binlog_item) { + auto res = binlog_item->ParseFromString(binlog); + if (!res) { + LOG(ERROR) << "Failed to deserialize cloud binlog item"; + return false; + } + // bx check + binlog_item->set_content(""); + return true; +} diff --git a/src/pika_conf.cc b/src/pika_conf.cc index 8086bb2285..16fd421a9a 100644 --- a/src/pika_conf.cc +++ b/src/pika_conf.cc @@ -486,6 +486,8 @@ int PikaConf::Load() { int cache_num = 16; GetConfInt("cache-num", &cache_num); cache_num_ = (0 >= cache_num || 48 < cache_num) ? 16 : cache_num; + //bx pika init pika_model + pika_model_ = PIKA_CLOUD; int cache_model = 0; GetConfInt("cache-model", &cache_model); diff --git a/src/pika_consensus.cc b/src/pika_consensus.cc index 3d08a4a642..1549ea9472 100644 --- a/src/pika_consensus.cc +++ b/src/pika_consensus.cc @@ -31,8 +31,10 @@ Status Context::StableSave() { memcpy(p, &(applied_index_.b_offset.offset), sizeof(uint64_t)); p += 8; memcpy(p, &(applied_index_.l_offset.term), sizeof(uint32_t)); - p += 4; - memcpy(p, &(applied_index_.l_offset.index), sizeof(uint64_t)); + if (g_pika_conf->pika_model() == PIKA_LOCAL) { + p += 4; + memcpy(p, &(applied_index_.l_offset.index), sizeof(uint64_t)); + } return Status::OK(); } @@ -55,13 +57,15 @@ Status Context::Init() { memcpy(reinterpret_cast(&(applied_index_.b_offset.filenum)), save_->GetData(), sizeof(uint32_t)); memcpy(reinterpret_cast(&(applied_index_.b_offset.offset)), save_->GetData() + 4, sizeof(uint64_t)); memcpy(reinterpret_cast(&(applied_index_.l_offset.term)), save_->GetData() + 12, sizeof(uint32_t)); - memcpy(reinterpret_cast(&(applied_index_.l_offset.index)), save_->GetData() + 16, sizeof(uint64_t)); + if (g_pika_conf->pika_model() == PIKA_LOCAL) { + memcpy(reinterpret_cast(&(applied_index_.l_offset.index)), save_->GetData() + 16, sizeof(uint64_t)); + } return Status::OK(); } else { return Status::Corruption("Context init error"); } } - +//not used func void Context::UpdateAppliedIndex(const LogOffset& offset) { std::lock_guard l(rwlock_); LogOffset cur_offset; @@ -71,7 +75,7 @@ void Context::UpdateAppliedIndex(const LogOffset& offset) { StableSave(); } } - +//bx check 查看是否需要在外层修改 void Context::Reset(const LogOffset& offset) { std::lock_guard l(rwlock_); applied_index_ = offset; diff --git a/src/pika_stable_log.cc b/src/pika_stable_log.cc index ba51d9171c..e965ee4494 100644 --- a/src/pika_stable_log.cc +++ b/src/pika_stable_log.cc @@ -13,6 +13,7 @@ #include "include/pika_stable_log.h" #include "pstd/include/env.h" #include "include/pika_conf.h" +#include "include/pika_cloud_binlog.h" using pstd::Status; @@ -21,7 +22,11 @@ extern std::unique_ptr g_pika_rm; StableLog::StableLog(std::string db_name, std::string log_path) : purging_(false), db_name_(std::move(db_name)), log_path_(std::move(log_path)) { - stable_logger_ = std::make_shared(log_path_, g_pika_conf->binlog_file_size()); + if (g_pika_conf->pika_model() == PIKA_LOCAL) { + stable_logger_ = std::make_shared(log_path_, g_pika_conf->binlog_file_size()); + } else if (g_pika_conf->pika_model() == PIKA_CLOUD) { + stable_logger_ = std::make_shared(log_path_, g_pika_conf->binlog_file_size()); + } std::map binlogs; if (!GetBinlogFiles(&binlogs)) { LOG(FATAL) << log_path_ << " Could not get binlog files!"; From 454b87a2fe5a7e2c967f4a921cbd54bea68f2db3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E9=91=AB?= Date: Thu, 7 Mar 2024 17:55:34 +0800 Subject: [PATCH 008/116] not init binlog while init cloudBinlog --- src/pika_binlog.cc | 3 +- src/pika_cloud_binlog.cc | 65 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/src/pika_binlog.cc b/src/pika_binlog.cc index d0a612f24c..59e1591988 100644 --- a/src/pika_binlog.cc +++ b/src/pika_binlog.cc @@ -68,9 +68,10 @@ Binlog::Binlog(std::string binlog_path, const int file_size) // To intergrate with old version, we don't set mmap file size to 100M; // pstd::SetMmapBoundSize(file_size); // pstd::kMmapBoundSize = 1024 * 1024 * 100; + // bin log not init + if (binlog_path_ == "" || file_size_ == 0) return; Status s; - pstd::CreateDir(binlog_path_); filename_ = binlog_path_ + kBinlogPrefix; diff --git a/src/pika_cloud_binlog.cc b/src/pika_cloud_binlog.cc index fddaafdc45..fb9901e517 100644 --- a/src/pika_cloud_binlog.cc +++ b/src/pika_cloud_binlog.cc @@ -55,11 +55,70 @@ Status CloudVersion::Init() { */ CloudBinlog::CloudBinlog(std::string binlog_path, const int file_size) - : Binlog(binlog_path, file_size), + : Binlog("", 0), opened_(false), binlog_path_(std::move(binlog_path)), file_size_(file_size), - binlog_io_error_(false) {} + binlog_io_error_(false) { + // To intergrate with old version, we don't set mmap file size to 100M; + // pstd::SetMmapBoundSize(file_size); + // pstd::kMmapBoundSize = 1024 * 1024 * 100; + // bin log not init + if (binlog_path_ == "" || file_size_ == 0) return; + + Status s; + pstd::CreateDir(binlog_path_); + + filename_ = binlog_path_ + kBinlogPrefix; + const std::string manifest = binlog_path_ + kManifest; + std::string profile; + + if (!pstd::FileExists(manifest)) { + LOG(INFO) << "Cloud Binlog: Manifest file not exist, we create a new one."; + + profile = NewFileName(filename_, pro_num_); + s = pstd::NewWritableFile(profile, queue_); + if (!s.ok()) { + LOG(FATAL) << "Cloud Binlog: new " << filename_ << " " << s.ToString(); + } + std::unique_ptr tmp_file; + s = pstd::NewRWFile(manifest, tmp_file); + versionfile_.reset(tmp_file.release()); + if (!s.ok()) { + LOG(FATAL) << "Cloud Binlog: new versionfile error " << s.ToString(); + } + + version_ = std::make_unique(versionfile_); + version_->StableSave(); + } else { + LOG(INFO) << "Cloud Binlog: Find the exist file."; + std::unique_ptr tmp_file; + s = pstd::NewRWFile(manifest, tmp_file); + versionfile_.reset(tmp_file.release()); + if (s.ok()) { + version_ = std::make_unique(versionfile_); + version_->Init(); + pro_num_ = version_->pro_num_; + + // Debug + // version_->debug(); + } else { + LOG(FATAL) << "Cloud Binlog: open versionfile error"; + } + + profile = NewFileName(filename_, pro_num_); + DLOG(INFO) << "Cloud Binlog: open profile " << profile; + s = pstd::AppendWritableFile(profile, queue_, version_->pro_offset_); + if (!s.ok()) { + LOG(FATAL) << "Cloud Binlog: Open file " << profile << " error " << s.ToString(); + } + + uint64_t filesize = queue_->Filesize(); + DLOG(INFO) << "Cloud Binlog: filesize is " << filesize; + } + + InitLogFile(); +} void CloudBinlog::InitLogFile() { assert(queue_ != nullptr); @@ -70,7 +129,7 @@ void CloudBinlog::InitLogFile() { Status CloudBinlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term, uint64_t* logic_id) { if (!opened_.load()) { - return Status::Busy("Binlog is not open yet"); + return Status::Busy("Cloud Binlog is not open yet"); } std::shared_lock l(version_->rwlock_); From c8b4bbec7aef9eacac40d489ba53f15f9fdf539d Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Thu, 7 Mar 2024 15:32:26 +0800 Subject: [PATCH 009/116] compare local and remote's version by manifest_update_sequence --- src/storage/include/storage/storage.h | 4 ++ src/storage/src/redis.cc | 72 +++++++++++++++++++++++++-- src/storage/src/redis.h | 9 +++- src/storage/src/storage.cc | 15 ++++++ 4 files changed, 96 insertions(+), 4 deletions(-) diff --git a/src/storage/include/storage/storage.h b/src/storage/include/storage/storage.h index c4cb36aa54..95b174cc8e 100644 --- a/src/storage/include/storage/storage.h +++ b/src/storage/include/storage/storage.h @@ -1104,6 +1104,10 @@ class Storage { #ifdef USE_S3 rocksdb::DBCloud* GetDBByIndex(int index); + + // called when switch master-slave + Status SwitchMaster(bool is_old_master, bool is_new_master); + #else rocksdb::DB* GetDBByIndex(int index); #endif diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 4c4f940ba2..59cde54269 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -482,19 +482,85 @@ Status Redis::OpenCloudEnv(rocksdb::CloudFileSystemOptions opts, const std::stri std::string s3_path = db_path[0] == '.' ? db_path.substr(1) : db_path; opts.src_bucket.SetObjectPath(s3_path); opts.dest_bucket.SetObjectPath(s3_path); - rocksdb::CloudFileSystem* cfs = nullptr; Status s = rocksdb::CloudFileSystem::NewAwsFileSystem( rocksdb::FileSystem::Default(), opts, nullptr, - &cfs + &cfs_ ); if (s.ok()) { - std::shared_ptr cloud_fs(cfs); + std::shared_ptr cloud_fs(cfs_); cloud_env_ = NewCompositeEnv(cloud_fs); } return s; } + +Status Redis::ReOpenRocksDB(const std::unordered_map& db_options, + const std::unordered_map& cfs_options) { + return Status::OK(); +} + +Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { + if (is_old_master && is_new_master) { + // Do nothing + return Status::OK(); + } + + std::unordered_map db_options, cfs_options; + if (is_old_master && !is_new_master) { + db_options["disable_auto_compactions"] = "true"; + db_options["disable_auto_flush"] = "true"; + for (const auto& cf : handles_) { + db_->SetOptions(cf, db_options); + } + rocksdb::FlushOptions fops; + fops.wait = true; + db_->Flush(fops, handles_); + cfs_->SwitchMaster(false); + cfs_options["is_master"] = "false"; + return ReOpenRocksDB(db_options, cfs_options); + } + + // slaveof another pika master, just reopen + if (!is_old_master && !is_new_master) { + return ReOpenRocksDB(db_options, cfs_options); + } + + // slave promotes to master + if (!is_old_master && is_new_master) { + db_options["disable_auto_compactions"] = "true"; + db_options["disable_auto_flush"] = "true"; + cfs_options["is_master"] = "true"; + + // compare manifest_sequence + uint64_t local_manifest_sequence = db_->GetManifestUpdateSequence(); + uint64_t remote_manifest_sequence = 0; + cfs_->GetMaxManifestSequenceFromCurrentManifest(db_->GetName(), &remote_manifest_sequence); + // local version behind remote, directly reopen + if (local_manifest_sequence < remote_manifest_sequence) { + return ReOpenRocksDB(db_options, cfs_options); + } + // local's version cannot beyond remote's, just holding extra data in memtables + assert(local_manifest_sequence == remote_manifest_sequence); + + db_options["disable_auto_compactions"] = "false"; + db_options["disable_auto_flush"] = "false"; + for (const auto& cf : handles_) { + db_->SetOptions(cf, db_options); + } + db_->NewManifestOnNextUpdate(); + cfs_options["is_master"] = "master"; + + rocksdb::FlushOptions fops; + fops.wait = true; + db_->Flush(fops, handles_); + cfs_->SwitchMaster(false); + //TODO + //cfs_->UploadManifest(); + return Status::OK(); + } + return Status::OK(); +} #endif } // namespace storage diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 878fa9ae0c..87320f31cc 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -391,6 +391,10 @@ class Redis { return nullptr; } +#ifdef USE_S3 + Status SwitchMaster(bool is_old_master, bool is_new_master); +#endif + private: Status GenerateStreamID(const StreamMetaValue& stream_meta, StreamAddTrimArgs& args); @@ -456,11 +460,14 @@ class Redis { Status UpdateSpecificKeyStatistics(const DataType& dtype, const std::string& key, uint64_t count); Status UpdateSpecificKeyDuration(const DataType& dtype, const std::string& key, uint64_t duration); Status AddCompactKeyTaskIfNeeded(const DataType& dtype, const std::string& key, uint64_t count, uint64_t duration); - + #ifdef USE_S3 // rocksdb-cloud Status OpenCloudEnv(rocksdb::CloudFileSystemOptions opts, const std::string& db_path); std::unique_ptr cloud_env_; + rocksdb::CloudFileSystem* cfs_; + Status ReOpenRocksDB(const std::unordered_map& db_options, + const std::unordered_map& cfs_options); #endif }; diff --git a/src/storage/src/storage.cc b/src/storage/src/storage.cc index 56cee932c9..7c3e80aa55 100644 --- a/src/storage/src/storage.cc +++ b/src/storage/src/storage.cc @@ -2448,4 +2448,19 @@ void Storage::DisableWal(const bool is_wal_disable) { } } +#ifdef USE_S3 +Status Storage::SwitchMaster(bool is_old_master, bool is_new_master) { + Status s = Status::OK(); + for (const auto& inst : insts_) { + s = inst->SwitchMaster(is_old_master, is_new_master); + if (!s.ok()) { + LOG(WARNING) << "switch mode failed, when switch from " + << (is_old_master ? "master" : "slave") << " to " + << (is_new_master ? "master" : "slave"); + return s; + } + } + return s; +} +#endif } // namespace storage From b9716553a5279f8967116af2a0a66914a6ea66f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E9=91=AB?= Date: Fri, 8 Mar 2024 14:15:32 +0800 Subject: [PATCH 010/116] pass db_id, rocksdb_id parameters --- include/pika_binlog.h | 2 ++ include/pika_cloud_binlog.h | 2 ++ include/pika_cloud_binlog_transverter.h | 4 ---- src/pika_binlog.cc | 4 +++- src/pika_cloud_binlog.cc | 8 +++++--- src/pika_consensus.cc | 20 ++++++++++++++++++-- 6 files changed, 30 insertions(+), 10 deletions(-) diff --git a/include/pika_binlog.h b/include/pika_binlog.h index fa39f2dbaa..bca0904c7c 100644 --- a/include/pika_binlog.h +++ b/include/pika_binlog.h @@ -53,6 +53,8 @@ class Binlog : public pstd::noncopyable { virtual pstd::Status Put(const std::string& item); + virtual pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id); + virtual pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); /* * Set Producer pro_num and pro_offset with lock diff --git a/include/pika_cloud_binlog.h b/include/pika_cloud_binlog.h index 3ccbf09e6d..0afa401a19 100644 --- a/include/pika_cloud_binlog.h +++ b/include/pika_cloud_binlog.h @@ -50,6 +50,8 @@ class CloudBinlog : public Binlog { pstd::Status Put(const std::string& item); + pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id); + pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); /* * Set Producer pro_num and pro_offset with lock diff --git a/include/pika_cloud_binlog_transverter.h b/include/pika_cloud_binlog_transverter.h index 8d023e569f..b85f8d5686 100644 --- a/include/pika_cloud_binlog_transverter.h +++ b/include/pika_cloud_binlog_transverter.h @@ -15,10 +15,6 @@ //#define CLOUD_BINLOG_ENCODE_LEN 34 -enum BinlogType { - TypeFirst = 1, -}; - const int PADDING_BINLOG_PROTOCOL_SIZE = 22; const int SPACE_STROE_PARAMETER_LENGTH = 5; diff --git a/src/pika_binlog.cc b/src/pika_binlog.cc index 59e1591988..5bbb6aaf9d 100644 --- a/src/pika_binlog.cc +++ b/src/pika_binlog.cc @@ -164,7 +164,9 @@ Status Binlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32 return Status::OK(); } - +Status Binlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) { + Put(item); +} // Note: mutex lock should be held Status Binlog::Put(const std::string& item) { if (!opened_.load()) { diff --git a/src/pika_cloud_binlog.cc b/src/pika_cloud_binlog.cc index fb9901e517..a8899dcf9d 100644 --- a/src/pika_cloud_binlog.cc +++ b/src/pika_cloud_binlog.cc @@ -143,8 +143,11 @@ Status CloudBinlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, u return Status::OK(); } -// Note: mutex lock should be held Status CloudBinlog::Put(const std::string& item) { + Put(item, 0, 0); +} +// Note: mutex lock should be held +Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) { if (!opened_.load()) { return Status::Busy("Cloud Binlog is not open yet"); } @@ -159,8 +162,7 @@ Status CloudBinlog::Put(const std::string& item) { if (!s.ok()) { return s; } - // bx check 暂时默认db_id ,rocksdb_id为0 - std::string data = PikaCloudBinlogTransverter::BinlogEncode(0, 0, time(nullptr), term, filenum, offset, item); + std::string data = PikaCloudBinlogTransverter::BinlogEncode(db_id, rocksdb_id, time(nullptr), term, filenum, offset, item); s = Put(data.c_str(), static_cast(data.size())); if (!s.ok()) { diff --git a/src/pika_consensus.cc b/src/pika_consensus.cc index 1549ea9472..bc266edc88 100644 --- a/src/pika_consensus.cc +++ b/src/pika_consensus.cc @@ -12,6 +12,7 @@ #include "include/pika_conf.h" #include "include/pika_rm.h" #include "include/pika_server.h" +#include "pika_codis_slot.h" using pstd::Status; @@ -364,10 +365,25 @@ Status ConsensusCoordinator::UpdateSlave(const std::string& ip, int port, const } Status ConsensusCoordinator::InternalAppendBinlog(const std::shared_ptr& cmd_ptr) { + Status s; std::string content = cmd_ptr->ToRedisProtocol(); - Status s = stable_logger_->Logger()->Put(content); + std::string db_name = cmd_ptr->db_name().empty() ? g_pika_conf->default_db() : cmd_ptr->db_name(); + uint32_t db_id = std::stoi(db_name.substr(strlen("db"))); + std::vector keys = cmd_ptr->current_key(); + + if (keys.empty()) { + //需要特殊处理,比如flushdb bx check + } else { + //多key也需要特殊处理 bx check + uint32_t slot_id = GetSlotsID(g_pika_conf->default_slot_num(), keys[0], nullptr, nullptr); + if (g_pika_conf->pika_model() == PIKA_LOCAL) { + s = stable_logger_->Logger()->Put(content, db_id, slot_id); + }else if (g_pika_conf->pika_model() == PIKA_CLOUD) { + s = stable_logger_->Logger()->Put(content); + } + } + if (!s.ok()) { - std::string db_name = cmd_ptr->db_name().empty() ? g_pika_conf->default_db() : cmd_ptr->db_name(); std::shared_ptr db = g_pika_server->GetDB(db_name); if (db) { db->SetBinlogIoError(); From 8f41c460f8d323f205bf9e94113b400c80d05502 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E9=91=AB?= Date: Fri, 8 Mar 2024 16:16:54 +0800 Subject: [PATCH 011/116] clean code --- src/pika_binlog.cc | 2 ++ src/pika_consensus.cc | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/pika_binlog.cc b/src/pika_binlog.cc index 5bbb6aaf9d..0d765bb69a 100644 --- a/src/pika_binlog.cc +++ b/src/pika_binlog.cc @@ -164,9 +164,11 @@ Status Binlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32 return Status::OK(); } + Status Binlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) { Put(item); } + // Note: mutex lock should be held Status Binlog::Put(const std::string& item) { if (!opened_.load()) { diff --git a/src/pika_consensus.cc b/src/pika_consensus.cc index bc266edc88..1a4b48b5f4 100644 --- a/src/pika_consensus.cc +++ b/src/pika_consensus.cc @@ -374,12 +374,12 @@ Status ConsensusCoordinator::InternalAppendBinlog(const std::shared_ptr& cm if (keys.empty()) { //需要特殊处理,比如flushdb bx check } else { - //多key也需要特殊处理 bx check + //多key也需要特殊处理 带hashtag bx check uint32_t slot_id = GetSlotsID(g_pika_conf->default_slot_num(), keys[0], nullptr, nullptr); if (g_pika_conf->pika_model() == PIKA_LOCAL) { - s = stable_logger_->Logger()->Put(content, db_id, slot_id); + s = stable_logger_->Logger()->Put(content); }else if (g_pika_conf->pika_model() == PIKA_CLOUD) { - s = stable_logger_->Logger()->Put(content); + s = stable_logger_->Logger()->Put(content, db_id, slot_id); } } From 18443e56774b7a2b36f6081bd8ff63d3d1fe706b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E9=91=AB?= Date: Fri, 8 Mar 2024 16:56:59 +0800 Subject: [PATCH 012/116] clean code --- include/pika_cloud_binlog_transverter.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/pika_cloud_binlog_transverter.h b/include/pika_cloud_binlog_transverter.h index b85f8d5686..0b7b3864f2 100644 --- a/include/pika_cloud_binlog_transverter.h +++ b/include/pika_cloud_binlog_transverter.h @@ -10,10 +10,7 @@ #include #include #include - -#include "output/pika_cloud_binlog.pb.h" - -//#define CLOUD_BINLOG_ENCODE_LEN 34 +#include "pika_cloud_binlog.pb.h" const int PADDING_BINLOG_PROTOCOL_SIZE = 22; const int SPACE_STROE_PARAMETER_LENGTH = 5; From 050a3a5febe277d7d5a552238c73923d10c2b222 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E9=91=AB?= Date: Sun, 10 Mar 2024 11:55:37 +0800 Subject: [PATCH 013/116] add S3 in DBStrcut --- include/pika_cloud_binlog.h | 18 +++++++++--------- include/pika_define.h | 27 ++++++++++++++++++++++++++- src/pika_cloud_binlog.cc | 12 ++++++++++++ src/pika_server.cc | 6 ++++++ 4 files changed, 53 insertions(+), 10 deletions(-) diff --git a/include/pika_cloud_binlog.h b/include/pika_cloud_binlog.h index 0afa401a19..5e879ecc2e 100644 --- a/include/pika_cloud_binlog.h +++ b/include/pika_cloud_binlog.h @@ -46,30 +46,30 @@ class CloudVersion final : public pstd::noncopyable { class CloudBinlog : public Binlog { public: CloudBinlog(std::string Binlog_path, int file_size = 100 * 1024 * 1024); - ~CloudBinlog() {} + ~CloudBinlog(); - pstd::Status Put(const std::string& item); + pstd::Status Put(const std::string& item) override; - pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id); + pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) override; - pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); + pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr) override; /* * Set Producer pro_num and pro_offset with lock */ - pstd::Status SetProducerStatus(uint32_t pro_num, uint64_t pro_offset, uint32_t term = 0, uint64_t index = 0); + pstd::Status SetProducerStatus(uint32_t pro_num, uint64_t pro_offset, uint32_t term = 0, uint64_t index = 0) override; // Need to hold Lock(); - pstd::Status Truncate(uint32_t pro_num, uint64_t pro_offset, uint64_t index = 0); + pstd::Status Truncate(uint32_t pro_num, uint64_t pro_offset, uint64_t index = 0) override; - std::string filename() { return filename_; } + std::string filename() { return filename_; } // need to hold mutex_ - void SetTerm(uint32_t term) { + void SetTerm(uint32_t term) override{ std::lock_guard l(version_->rwlock_); version_->term_ = term; version_->StableSave(); } - uint32_t term() { + uint32_t term() override{ std::shared_lock l(version_->rwlock_); return version_->term_; } diff --git a/include/pika_define.h b/include/pika_define.h index 75e5b0d3ea..39869313fe 100644 --- a/include/pika_define.h +++ b/include/pika_define.h @@ -45,12 +45,37 @@ const int kMaxRsyncParallelNum = 4; struct DBStruct { DBStruct(std::string tn, int32_t inst_num) : db_name(std::move(tn)), db_instance_num(inst_num) {} - +#ifdef USE_S3 + DBStruct(std::string tn, int32_t inst_num, std::string cloud_endpoint_override, std::string cloud_bucket_prefix, + std::string cloud_bucket_suffix, std::string cloud_bucket_region) + : db_name(std::move(tn)), + db_instance_num(inst_num), + cloud_endpoint_override(std::move(cloud_endpoint_override)), + cloud_bucket_prefix(std::move(cloud_bucket_prefix)), + cloud_bucket_suffix(std::move(cloud_bucket_suffix)), + cloud_bucket_region(std::move(cloud_bucket_region)) {} +#endif bool operator==(const DBStruct& db_struct) const { return db_name == db_struct.db_name && db_instance_num == db_struct.db_instance_num; } + +#ifdef USE_S3 + bool operator==(const DBStruct& db_struct) const { + return db_name == db_struct.db_name && db_instance_num == db_struct.db_instance_num && + cloud_endpoint_override == db_struct.cloud_endpoint_override && + cloud_bucket_prefix == db_struct.cloud_bucket_prefix && + cloud_bucket_suffix == db_struct.cloud_bucket_suffix && cloud_bucket_region == db_struct.cloud_bucket_region; + } +#endif std::string db_name; int32_t db_instance_num = 0; +#ifdef USE_S3 + // s3 meta + std::string cloud_endpoint_override; + std::string cloud_bucket_prefix; + std::string cloud_bucket_suffix; + std::string cloud_bucket_region; +#endif }; struct SlaveItem { diff --git a/src/pika_cloud_binlog.cc b/src/pika_cloud_binlog.cc index a8899dcf9d..7c2471aa6c 100644 --- a/src/pika_cloud_binlog.cc +++ b/src/pika_cloud_binlog.cc @@ -120,6 +120,18 @@ CloudBinlog::CloudBinlog(std::string binlog_path, const int file_size) InitLogFile(); } +CloudBinlog::~CloudBinlog() { + std::lock_guard l(mutex_); + Close(); +} + +void CloudBinlog::Close() { + if (!opened_.load()) { + return; + } + opened_.store(false); +} + void CloudBinlog::InitLogFile() { assert(queue_ != nullptr); uint64_t filesize = queue_->Filesize(); diff --git a/src/pika_server.cc b/src/pika_server.cc index dcc2b890f9..5a36ffd539 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -315,6 +315,12 @@ void PikaServer::InitDBStruct() { std::shared_ptr db_ptr = std::make_shared(name, db_path, log_path); db_ptr->Init(); dbs_.emplace(name, db_ptr); +#ifdef USE_S3 + db.cloud_endpoint_override = g_pika_conf->cloud_endpoint_override(); + db.cloud_bucket_prefix = g_pika_conf->cloud_bucket_prefix(); + db.cloud_bucket_suffix = g_pika_conf->cloud_bucket_suffix(); + db.cloud_bucket_region = g_pika_conf->cloud_bucket_region(); +#endif } } From e3e5dab6874d888714867f6cca80ae97c4e6fe29 Mon Sep 17 00:00:00 2001 From: Charlie Qiao Date: Tue, 12 Mar 2024 12:03:54 +0800 Subject: [PATCH 014/116] fix cloud binlog bug, add s3 schma in dbstruct, add gtest frame --- CMakeLists.txt | 1 + src/pika_binlog.cc | 2 +- src/pika_cloud_binlog.cc | 8 ++--- src/pika_cloud_binlog.proto | 12 ++++---- src/pika_cloud_binlog_transverter.cc | 1 - src/pika_command.cc | 2 ++ src/pika_consensus.cc | 21 ++----------- src/pika_inner_message.proto | 11 +++++-- src/pika_repl_client_conn.cc | 6 ++++ src/pika_repl_server_conn.cc | 6 ++++ tests/gtest/CMakeLists.txt | 46 ++++++++++++++++++++++++++++ tests/gtest/main.cc | 9 ++++++ 12 files changed, 92 insertions(+), 33 deletions(-) create mode 100644 tests/gtest/CMakeLists.txt create mode 100644 tests/gtest/main.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index c075c60af1..df1c58641a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -791,6 +791,7 @@ add_subdirectory(src/cache) if (USE_PIKA_TOOLS) add_subdirectory(tools) endif() +add_subdirectory(tests/gtest) aux_source_directory(src DIR_SRCS) # # generate version diff --git a/src/pika_binlog.cc b/src/pika_binlog.cc index 0d765bb69a..b1fc6d3829 100644 --- a/src/pika_binlog.cc +++ b/src/pika_binlog.cc @@ -166,7 +166,7 @@ Status Binlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32 } Status Binlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) { - Put(item); + return Status::Error("data err"); } // Note: mutex lock should be held diff --git a/src/pika_cloud_binlog.cc b/src/pika_cloud_binlog.cc index 7c2471aa6c..edb8a0a899 100644 --- a/src/pika_cloud_binlog.cc +++ b/src/pika_cloud_binlog.cc @@ -156,7 +156,7 @@ Status CloudBinlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, u } Status CloudBinlog::Put(const std::string& item) { - Put(item, 0, 0); + return Status::Error("data err: db_id and rocksdb_id empty"); } // Note: mutex lock should be held Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) { @@ -209,7 +209,7 @@ Status CloudBinlog::Put(const char* item, int len) { InitLogFile(); } - int pro_offset; + int pro_offset = 0; s = Produce(pstd::Slice(item, len), &pro_offset); if (s.ok()) { std::lock_guard l(version_->rwlock_); @@ -226,7 +226,7 @@ Status CloudBinlog::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n, assert(block_offset_ + kHeaderSize + n <= kBlockSize); char buf[kHeaderSize]; - uint64_t now; + uint64_t now = 0; struct timeval tv; gettimeofday(&tv, nullptr); now = tv.tv_sec; @@ -303,7 +303,7 @@ Status CloudBinlog::AppendPadding(pstd::WritableFile* file, uint64_t* len) { Status s; char buf[kBlockSize]; - uint64_t now; + uint64_t now = 0; struct timeval tv; gettimeofday(&tv, nullptr); now = tv.tv_sec; diff --git a/src/pika_cloud_binlog.proto b/src/pika_cloud_binlog.proto index efd7a470bb..b9ff844bf8 100644 --- a/src/pika_cloud_binlog.proto +++ b/src/pika_cloud_binlog.proto @@ -4,13 +4,13 @@ package cloud; message BinlogCloudItem { //belong to which db - uint32 db_id = 1; + uint64 db_id = 1; //belong to whicn rocksdb - uint32 rocksdb_id = 2; + uint64 rocksdb_id = 2; //data write time - uint32 exec_time = 3; - uint32 term_id = 4; - uint32 file_num = 5; + uint64 exec_time = 3; + uint64 term_id = 4; + uint64 file_num = 5; uint64 offset = 6; bytes content = 7; -} +} \ No newline at end of file diff --git a/src/pika_cloud_binlog_transverter.cc b/src/pika_cloud_binlog_transverter.cc index a034b04fc4..867af41c4a 100644 --- a/src/pika_cloud_binlog_transverter.cc +++ b/src/pika_cloud_binlog_transverter.cc @@ -73,7 +73,6 @@ bool PikaCloudBinlogTransverter::BinlogItemWithoutContentDecode(const std::strin LOG(ERROR) << "Failed to deserialize cloud binlog item"; return false; } - // bx check binlog_item->set_content(""); return true; } diff --git a/src/pika_command.cc b/src/pika_command.cc index b76baca28b..376f81dc61 100644 --- a/src/pika_command.cc +++ b/src/pika_command.cc @@ -880,7 +880,9 @@ void Cmd::InternalProcessCommand(const HintKeys& hint_keys) { do_duration_ += pstd::NowMicros() - start_us; } +#ifndef USE_S3 DoBinlog(); +#endif if (is_write()) { record_lock.Unlock(current_key()); diff --git a/src/pika_consensus.cc b/src/pika_consensus.cc index 1a4b48b5f4..9a7debefa5 100644 --- a/src/pika_consensus.cc +++ b/src/pika_consensus.cc @@ -76,7 +76,7 @@ void Context::UpdateAppliedIndex(const LogOffset& offset) { StableSave(); } } -//bx check 查看是否需要在外层修改 + void Context::Reset(const LogOffset& offset) { std::lock_guard l(rwlock_); applied_index_ = offset; @@ -365,25 +365,10 @@ Status ConsensusCoordinator::UpdateSlave(const std::string& ip, int port, const } Status ConsensusCoordinator::InternalAppendBinlog(const std::shared_ptr& cmd_ptr) { - Status s; std::string content = cmd_ptr->ToRedisProtocol(); - std::string db_name = cmd_ptr->db_name().empty() ? g_pika_conf->default_db() : cmd_ptr->db_name(); - uint32_t db_id = std::stoi(db_name.substr(strlen("db"))); - std::vector keys = cmd_ptr->current_key(); - - if (keys.empty()) { - //需要特殊处理,比如flushdb bx check - } else { - //多key也需要特殊处理 带hashtag bx check - uint32_t slot_id = GetSlotsID(g_pika_conf->default_slot_num(), keys[0], nullptr, nullptr); - if (g_pika_conf->pika_model() == PIKA_LOCAL) { - s = stable_logger_->Logger()->Put(content); - }else if (g_pika_conf->pika_model() == PIKA_CLOUD) { - s = stable_logger_->Logger()->Put(content, db_id, slot_id); - } - } - + Status s = stable_logger_->Logger()->Put(content); if (!s.ok()) { + std::string db_name = cmd_ptr->db_name().empty() ? g_pika_conf->default_db() : cmd_ptr->db_name(); std::shared_ptr db = g_pika_server->GetDB(db_name); if (db) { db->SetBinlogIoError(); diff --git a/src/pika_inner_message.proto b/src/pika_inner_message.proto index 9e2a3ef04c..c037cd9d51 100644 --- a/src/pika_inner_message.proto +++ b/src/pika_inner_message.proto @@ -35,9 +35,14 @@ message Slot { } message DBInfo { - required string db_name = 1; - required uint32 slot_num = 2; - repeated uint32 slot_ids = 3; + required string db_name = 1; + required uint32 slot_num = 2; + repeated uint32 slot_ids = 3; + //s3 info + optional string cloud_endpoint_override = 4; + optional string cloud_bucket_prefix = 5; + optional string cloud_bucket_suffix = 6; + optional string cloud_bucket_region = 7; } message PikaMeta { diff --git a/src/pika_repl_client_conn.cc b/src/pika_repl_client_conn.cc index 672648d64d..cd9482e811 100644 --- a/src/pika_repl_client_conn.cc +++ b/src/pika_repl_client_conn.cc @@ -110,7 +110,13 @@ void PikaReplClientConn::HandleMetaSyncResponse(void* arg) { std::vector master_db_structs; for (int idx = 0; idx < meta_sync.dbs_info_size(); ++idx) { const InnerMessage::InnerResponse_MetaSync_DBInfo& db_info = meta_sync.dbs_info(idx); +#ifdef USE_S3 + master_db_structs.push_back({db_info.db_name(), db_info.db_instance_num(), + db_info.cloud_endpoint_override(), db_info.cloud_bucket_prefix(), + db_info.cloud_bucket_suffix(), db_info.cloud_bucket_region()}); +#else master_db_structs.push_back({db_info.db_name(), db_info.db_instance_num()}); +#endif } std::vector self_db_structs = g_pika_conf->db_structs(); diff --git a/src/pika_repl_server_conn.cc b/src/pika_repl_server_conn.cc index 21847db3cd..677a6b7891 100644 --- a/src/pika_repl_server_conn.cc +++ b/src/pika_repl_server_conn.cc @@ -65,6 +65,12 @@ void PikaReplServerConn::HandleMetaSyncRequest(void* arg) { */ db_info->set_slot_num(1); db_info->set_db_instance_num(db_struct.db_instance_num); +#ifdef USE_S3 + db_info->set_cloud_endpoint_override(db_struct.cloud_endpoint_override); + db_info->set_cloud_bucket_prefix(db_struct.cloud_bucket_prefix); + db_info->set_cloud_bucket_suffix(db_struct.cloud_bucket_suffix); + db_info->set_cloud_bucket_region(db_struct.cloud_bucket_region); +#endif } } } diff --git a/tests/gtest/CMakeLists.txt b/tests/gtest/CMakeLists.txt new file mode 100644 index 0000000000..845f545e45 --- /dev/null +++ b/tests/gtest/CMakeLists.txt @@ -0,0 +1,46 @@ +cmake_minimum_required(VERSION 3.18) + +include(GoogleTest) + +file(GLOB_RECURSE PIKA_TEST_SOURCE "${CMAKE_SOURCE_DIR}/tests/gtest/*.cc") + +aux_source_directory(../../src TEST_SRCS) + +add_compile_definitions(PIKA_ROOT_DIR="${CMAKE_SOURCE_DIR}") + +foreach(pika_test_source ${PIKA_TEST_SOURCE}) + get_filename_component(pika_test_filename ${pika_test_source} NAME) + string(REPLACE ".cc" "" pika_test_name ${pika_test_filename}) + + # Add the test target + add_executable(${pika_test_name} ${pika_test_source}) + target_include_directories(${pika_test_name} + PUBLIC ${CMAKE_SOURCE_DIR}/include + PUBLIC ${PROJECT_SOURCE_DIR}/include + PUBLIC ${PROJECT_SOURCE_DIR}/.. + ${ROCKSDB_INCLUDE_DIR} + ${ROCKSDB_SOURCE_DIR} + ) + add_dependencies(${pika_test_name} gtest glog gflags ${LIBUNWIND_NAME}) + target_link_libraries(${pika_test_name} + PUBLIC ${GTEST_LIBRARY} + PUBLIC ${ROCKSDB_LIBRARY} + PUBLIC pstd + PUBLIC net + PUBLIC storage + PUBLIC ${GLOG_LIBRARY} + PUBLIC ${GFLAGS_LIBRARY} + PUBLIC ${LIBUNWIND_LIBRARY} + ) + if (USE_S3) + target_link_libraries(${pika_test_name} + PUBLIC libaws-cpp-sdk-core.so + PUBLIC libaws-cpp-sdk-transfer.so + PUBLIC libaws-cpp-sdk-kinesis.so + PUBLIC libaws-cpp-sdk-s3.so + ) + endif() + add_test(NAME ${pika_test_name} + COMMAND ${pika_test_name} + WORKING_DIRECTORY .) +endforeach(pika_test_source ${PIKA_TEST_SOURCE}) diff --git a/tests/gtest/main.cc b/tests/gtest/main.cc new file mode 100644 index 0000000000..791cf35d04 --- /dev/null +++ b/tests/gtest/main.cc @@ -0,0 +1,9 @@ +// +// Created by Bai Xin on 2024/3/11. +// + +#include +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file From 2d892853908bf894feb5503914e3a85067b1162b Mon Sep 17 00:00:00 2001 From: Charlie Qiao Date: Tue, 12 Mar 2024 16:08:52 +0800 Subject: [PATCH 015/116] change static define to dynamic --- src/pika_command.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pika_command.cc b/src/pika_command.cc index 376f81dc61..06e165280b 100644 --- a/src/pika_command.cc +++ b/src/pika_command.cc @@ -880,9 +880,9 @@ void Cmd::InternalProcessCommand(const HintKeys& hint_keys) { do_duration_ += pstd::NowMicros() - start_us; } -#ifndef USE_S3 - DoBinlog(); -#endif + if (g_pika_conf->pika_model() == PIKA_LOCAL) { + DoBinlog(); + } if (is_write()) { record_lock.Unlock(current_key()); From e048075d43f038602ab7d2f1bacd0670bd73f872 Mon Sep 17 00:00:00 2001 From: Charlie Qiao Date: Tue, 12 Mar 2024 18:41:32 +0800 Subject: [PATCH 016/116] clean code --- include/pika_binlog_transverter.h | 1 + include/pika_cloud_binlog_transverter.h | 2 ++ src/pika_admin.cc | 8 ++++---- src/pika_binlog_transverter.cc | 3 +-- src/pika_cloud_binlog.cc | 6 ++++-- src/pika_cloud_binlog_transverter.cc | 4 +++- 6 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/pika_binlog_transverter.h b/include/pika_binlog_transverter.h index d85d958667..a2f9b9364d 100644 --- a/include/pika_binlog_transverter.h +++ b/include/pika_binlog_transverter.h @@ -7,6 +7,7 @@ #define PIKA_BINLOG_TRANSVERTER_H_ #include + #include #include #include diff --git a/include/pika_cloud_binlog_transverter.h b/include/pika_cloud_binlog_transverter.h index 0b7b3864f2..e422725a5a 100644 --- a/include/pika_cloud_binlog_transverter.h +++ b/include/pika_cloud_binlog_transverter.h @@ -7,9 +7,11 @@ #define PIKA_CLOUD_BINLOG_TRANSVERTER_H_ #include + #include #include #include + #include "pika_cloud_binlog.pb.h" const int PADDING_BINLOG_PROTOCOL_SIZE = 22; diff --git a/src/pika_admin.cc b/src/pika_admin.cc index ea5d8b801a..8ab74d4325 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -2899,12 +2899,12 @@ void PaddingCmd::DoInitial() { void PaddingCmd::Do() { res_.SetRes(CmdRes::kOk); } std::string PaddingCmd::ToRedisProtocol() { - if (g_pika_conf->pika_model() == PIKA_LOCAL) - return PikaBinlogTransverter::ConstructPaddingBinlog( + if (g_pika_conf->pika_model() == PIKA_CLOUD) { + return PikaBinlogTransverter::ConstructPaddingBinlog(BinlogType::TypeFirst, argv_[1].size()); + } + return PikaBinlogTransverter::ConstructPaddingBinlog( BinlogType::TypeFirst, argv_[1].size() + BINLOG_ITEM_HEADER_SIZE + PADDING_BINLOG_PROTOCOL_SIZE + SPACE_STROE_PARAMETER_LENGTH); - else if (g_pika_conf->pika_model() == PIKA_CLOUD) - return PikaBinlogTransverter::ConstructPaddingBinlog(BinlogType::TypeFirst, argv_[1].size()); } void PKPatternMatchDelCmd::DoInitial() { diff --git a/src/pika_binlog_transverter.cc b/src/pika_binlog_transverter.cc index a6f3d2b271..5efd918482 100644 --- a/src/pika_binlog_transverter.cc +++ b/src/pika_binlog_transverter.cc @@ -6,14 +6,13 @@ #include "include/pika_binlog_transverter.h" #include + #include #include #include "pstd/include/pstd_coding.h" - #include "include/pika_command.h" #include "include/pika_define.h" -#include "storage/storage.h" uint32_t BinlogItem::exec_time() const { return exec_time_; } diff --git a/src/pika_cloud_binlog.cc b/src/pika_cloud_binlog.cc index edb8a0a899..589d43d219 100644 --- a/src/pika_cloud_binlog.cc +++ b/src/pika_cloud_binlog.cc @@ -2,16 +2,18 @@ // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. +#include "include/pika_cloud_binlog.h" #include #include #include + #include -#include "include/pika_cloud_binlog.h" -#include "include/pika_cloud_binlog_transverter.h" #include "pstd/include/pstd_defer.h" #include "pstd_status.h" +#include "include/pika_cloud_binlog_transverter.h" + using pstd::Status; diff --git a/src/pika_cloud_binlog_transverter.cc b/src/pika_cloud_binlog_transverter.cc index 867af41c4a..eb3e7fe0f8 100644 --- a/src/pika_cloud_binlog_transverter.cc +++ b/src/pika_cloud_binlog_transverter.cc @@ -4,9 +4,11 @@ // of patent rights can be found in the PATENTS file in the same directory. #include "include/pika_cloud_binlog_transverter.h" + #include -#include + #include + #include "include/pika_command.h" #include "pstd/include/pstd_coding.h" From fbf07eb3316c471091bd1f45bfb0cae662105d1b Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 12 Mar 2024 20:25:45 +0800 Subject: [PATCH 017/116] fix by review comments --- src/storage/src/redis.cc | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 59cde54269..23c22b3c48 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -513,9 +513,6 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { for (const auto& cf : handles_) { db_->SetOptions(cf, db_options); } - rocksdb::FlushOptions fops; - fops.wait = true; - db_->Flush(fops, handles_); cfs_->SwitchMaster(false); cfs_options["is_master"] = "false"; return ReOpenRocksDB(db_options, cfs_options); @@ -528,10 +525,9 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { // slave promotes to master if (!is_old_master && is_new_master) { - db_options["disable_auto_compactions"] = "true"; - db_options["disable_auto_flush"] = "true"; + db_options["disable_auto_compactions"] = "false"; + db_options["disable_auto_flush"] = "false"; cfs_options["is_master"] = "true"; - // compare manifest_sequence uint64_t local_manifest_sequence = db_->GetManifestUpdateSequence(); uint64_t remote_manifest_sequence = 0; @@ -543,18 +539,15 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { // local's version cannot beyond remote's, just holding extra data in memtables assert(local_manifest_sequence == remote_manifest_sequence); - db_options["disable_auto_compactions"] = "false"; - db_options["disable_auto_flush"] = "false"; + db_->NewManifestOnNextUpdate(); + cfs_->SwitchMaster(true); for (const auto& cf : handles_) { db_->SetOptions(cf, db_options); } - db_->NewManifestOnNextUpdate(); - cfs_options["is_master"] = "master"; rocksdb::FlushOptions fops; fops.wait = true; db_->Flush(fops, handles_); - cfs_->SwitchMaster(false); //TODO //cfs_->UploadManifest(); return Status::OK(); From 62b1ecc5445b00d4232daf49c6da396754299158 Mon Sep 17 00:00:00 2001 From: Charlie Qiao Date: Thu, 14 Mar 2024 18:12:42 +0800 Subject: [PATCH 018/116] add cloud binlog unit test --- include/pika_binlog.h | 4 +- include/pika_cloud_binlog.h | 8 +-- tests/gtest/CMakeLists.txt | 46 -------------- tests/gtest/cloud_binlog_test.cc | 101 +++++++++++++++++++++++++++++++ tests/gtest/main.cc | 9 --- 5 files changed, 107 insertions(+), 61 deletions(-) delete mode 100644 tests/gtest/CMakeLists.txt create mode 100644 tests/gtest/cloud_binlog_test.cc delete mode 100644 tests/gtest/main.cc diff --git a/include/pika_binlog.h b/include/pika_binlog.h index bca0904c7c..029119da55 100644 --- a/include/pika_binlog.h +++ b/include/pika_binlog.h @@ -63,7 +63,7 @@ class Binlog : public pstd::noncopyable { // Need to hold Lock(); virtual pstd::Status Truncate(uint32_t pro_num, uint64_t pro_offset, uint64_t index); - std::string filename() { return filename_; } + virtual std::string filename() { return filename_; } // need to hold mutex_ virtual void SetTerm(uint32_t term) { @@ -77,7 +77,7 @@ class Binlog : public pstd::noncopyable { return version_->term_; } - void Close(); + virtual void Close(); private: pstd::Status Put(const char* item, int len); diff --git a/include/pika_cloud_binlog.h b/include/pika_cloud_binlog.h index 5e879ecc2e..2025d3cbf1 100644 --- a/include/pika_cloud_binlog.h +++ b/include/pika_cloud_binlog.h @@ -60,21 +60,21 @@ class CloudBinlog : public Binlog { // Need to hold Lock(); pstd::Status Truncate(uint32_t pro_num, uint64_t pro_offset, uint64_t index = 0) override; - std::string filename() { return filename_; } + std::string filename() override { return filename_; } // need to hold mutex_ - void SetTerm(uint32_t term) override{ + void SetTerm(uint32_t term) override { std::lock_guard l(version_->rwlock_); version_->term_ = term; version_->StableSave(); } - uint32_t term() override{ + uint32_t term() override { std::shared_lock l(version_->rwlock_); return version_->term_; } - void Close(); + void Close() override; private: pstd::Status Put(const char* item, int len); diff --git a/tests/gtest/CMakeLists.txt b/tests/gtest/CMakeLists.txt deleted file mode 100644 index 845f545e45..0000000000 --- a/tests/gtest/CMakeLists.txt +++ /dev/null @@ -1,46 +0,0 @@ -cmake_minimum_required(VERSION 3.18) - -include(GoogleTest) - -file(GLOB_RECURSE PIKA_TEST_SOURCE "${CMAKE_SOURCE_DIR}/tests/gtest/*.cc") - -aux_source_directory(../../src TEST_SRCS) - -add_compile_definitions(PIKA_ROOT_DIR="${CMAKE_SOURCE_DIR}") - -foreach(pika_test_source ${PIKA_TEST_SOURCE}) - get_filename_component(pika_test_filename ${pika_test_source} NAME) - string(REPLACE ".cc" "" pika_test_name ${pika_test_filename}) - - # Add the test target - add_executable(${pika_test_name} ${pika_test_source}) - target_include_directories(${pika_test_name} - PUBLIC ${CMAKE_SOURCE_DIR}/include - PUBLIC ${PROJECT_SOURCE_DIR}/include - PUBLIC ${PROJECT_SOURCE_DIR}/.. - ${ROCKSDB_INCLUDE_DIR} - ${ROCKSDB_SOURCE_DIR} - ) - add_dependencies(${pika_test_name} gtest glog gflags ${LIBUNWIND_NAME}) - target_link_libraries(${pika_test_name} - PUBLIC ${GTEST_LIBRARY} - PUBLIC ${ROCKSDB_LIBRARY} - PUBLIC pstd - PUBLIC net - PUBLIC storage - PUBLIC ${GLOG_LIBRARY} - PUBLIC ${GFLAGS_LIBRARY} - PUBLIC ${LIBUNWIND_LIBRARY} - ) - if (USE_S3) - target_link_libraries(${pika_test_name} - PUBLIC libaws-cpp-sdk-core.so - PUBLIC libaws-cpp-sdk-transfer.so - PUBLIC libaws-cpp-sdk-kinesis.so - PUBLIC libaws-cpp-sdk-s3.so - ) - endif() - add_test(NAME ${pika_test_name} - COMMAND ${pika_test_name} - WORKING_DIRECTORY .) -endforeach(pika_test_source ${PIKA_TEST_SOURCE}) diff --git a/tests/gtest/cloud_binlog_test.cc b/tests/gtest/cloud_binlog_test.cc new file mode 100644 index 0000000000..82aa4fac20 --- /dev/null +++ b/tests/gtest/cloud_binlog_test.cc @@ -0,0 +1,101 @@ +// +// Created by Bai Xin on 2024/3/11. +// +#include + +#include + +#include "../../include/pika_binlog_reader.h" +#include "../../include/pika_cloud_binlog.h" +#include "include/pika_cloud_binlog_transverter.h" + +class CloudBinlogTransverterTest : public ::testing::Test {}; + +class CloudBinlogTest : public ::testing::Test { + public: + CloudBinlogTest() = default; + ~CloudBinlogTest() override = default; + + void SetUp() override { + std::string path = "./cloudbinlog/"; + pstd::DeleteDirIfExist(path); + mkdir(path.c_str(), 0755); + cloudBinlog = std::make_shared(path); + } + + void TearDown() override { + std::string path = "./cloudbinlog"; + pstd::DeleteFile(path.c_str()); + } + + static void SetUpTestSuite() {} + static void TearDownTestSuite() {} + + std::shared_ptr cloudBinlog; +}; + +TEST_F(CloudBinlogTest, GetPutTest) { + pstd::Status s = CloudBinlogTest::cloudBinlog->Put("test", 1, 1); + ASSERT_TRUE(s.ok()); + + PikaBinlogReader binlog_reader; + uint32_t filenum = 0; + uint32_t term = 0; + uint64_t offset = 0; + + s = CloudBinlogTest::cloudBinlog->GetProducerStatus(&filenum, &offset, &term, nullptr); + ASSERT_TRUE(s.ok()); + + s = CloudBinlogTest::cloudBinlog->Put("yyyy", 1, 1); + ASSERT_TRUE(s.ok()); + + int res = binlog_reader.Seek(CloudBinlogTest::cloudBinlog, filenum, offset); + ASSERT_EQ(res, 0); + + std::string binlog; + s = binlog_reader.Get(&binlog, &filenum, &offset); + ASSERT_TRUE(s.ok()); + + cloud::BinlogCloudItem* binlog_item = new cloud::BinlogCloudItem(); + PikaCloudBinlogTransverter::BinlogDecode(binlog, binlog_item); + ASSERT_EQ(1, binlog_item->db_id()); + ASSERT_EQ(1, binlog_item->rocksdb_id()); + ASSERT_STREQ("yyyy", binlog_item->content().c_str()); + + delete binlog_item; +} + +TEST_F(CloudBinlogTransverterTest, CodeTest) { + std::string binlog_item_s = + PikaCloudBinlogTransverter::BinlogEncode(1, 1, 1, 1, 4294967294, 18446744073709551615, "test"); + cloud::BinlogCloudItem* binlog_item = new cloud::BinlogCloudItem(); + PikaCloudBinlogTransverter::BinlogDecode(binlog_item_s, binlog_item); + ASSERT_EQ(1, binlog_item->db_id()); + ASSERT_EQ(1, binlog_item->rocksdb_id()); + ASSERT_EQ(1, binlog_item->exec_time()); + ASSERT_EQ(1, binlog_item->term_id()); + ASSERT_EQ(4294967294, binlog_item->file_num()); // 4294967294 = 2^32 - 1 + ASSERT_EQ(18446744073709551615, binlog_item->offset()); // 18446744073709551615 = 2^64 -1 + ASSERT_STREQ("test", binlog_item->content().c_str()); + delete binlog_item; +} + +TEST_F(CloudBinlogTransverterTest, WithoutContentDecodeTest) { + std::string binlog_item_s = + PikaCloudBinlogTransverter::BinlogEncode(1, 1, 1, 1, 4294967294, 18446744073709551615, "test"); + cloud::BinlogCloudItem* binlog_item = new cloud::BinlogCloudItem(); + PikaCloudBinlogTransverter::BinlogItemWithoutContentDecode(binlog_item_s, binlog_item); + ASSERT_EQ(1, binlog_item->db_id()); + ASSERT_EQ(1, binlog_item->rocksdb_id()); + ASSERT_EQ(1, binlog_item->exec_time()); + ASSERT_EQ(1, binlog_item->term_id()); + ASSERT_EQ(4294967294, binlog_item->file_num()); // 4294967294 = 2^32 - 1 + ASSERT_EQ(18446744073709551615, binlog_item->offset()); // 18446744073709551615 = 2^64 -1 + ASSERT_STREQ("", binlog_item->content().c_str()); + delete binlog_item; +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/tests/gtest/main.cc b/tests/gtest/main.cc deleted file mode 100644 index 791cf35d04..0000000000 --- a/tests/gtest/main.cc +++ /dev/null @@ -1,9 +0,0 @@ -// -// Created by Bai Xin on 2024/3/11. -// - -#include -int main(int argc, char **argv) { - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} \ No newline at end of file From acb61c87ac939ba9b704a067ec99db7b2e1ca991 Mon Sep 17 00:00:00 2001 From: baixin Date: Thu, 14 Mar 2024 19:17:42 +0800 Subject: [PATCH 019/116] clean code --- CMakeLists.txt | 1 - tests/gtest/cloud_binlog_test.cc | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index df1c58641a..c075c60af1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -791,7 +791,6 @@ add_subdirectory(src/cache) if (USE_PIKA_TOOLS) add_subdirectory(tools) endif() -add_subdirectory(tests/gtest) aux_source_directory(src DIR_SRCS) # # generate version diff --git a/tests/gtest/cloud_binlog_test.cc b/tests/gtest/cloud_binlog_test.cc index 82aa4fac20..d19bac1639 100644 --- a/tests/gtest/cloud_binlog_test.cc +++ b/tests/gtest/cloud_binlog_test.cc @@ -5,8 +5,8 @@ #include -#include "../../include/pika_binlog_reader.h" -#include "../../include/pika_cloud_binlog.h" +#include "include/pika_binlog_reader.h" +#include "include/pika_cloud_binlog.h" #include "include/pika_cloud_binlog_transverter.h" class CloudBinlogTransverterTest : public ::testing::Test {}; From 9c2388705d48e0a26a3ecd4034ed4b888be656d6 Mon Sep 17 00:00:00 2001 From: Charlie Qiao Date: Thu, 14 Mar 2024 18:12:42 +0800 Subject: [PATCH 020/116] add cloud binlog unit test --- include/pika_binlog.h | 4 +- include/pika_cloud_binlog.h | 8 +-- tests/gtest/CMakeLists.txt | 46 -------------- tests/gtest/cloud_binlog_test.cc | 101 +++++++++++++++++++++++++++++++ tests/gtest/main.cc | 9 --- 5 files changed, 107 insertions(+), 61 deletions(-) delete mode 100644 tests/gtest/CMakeLists.txt create mode 100644 tests/gtest/cloud_binlog_test.cc delete mode 100644 tests/gtest/main.cc diff --git a/include/pika_binlog.h b/include/pika_binlog.h index bca0904c7c..029119da55 100644 --- a/include/pika_binlog.h +++ b/include/pika_binlog.h @@ -63,7 +63,7 @@ class Binlog : public pstd::noncopyable { // Need to hold Lock(); virtual pstd::Status Truncate(uint32_t pro_num, uint64_t pro_offset, uint64_t index); - std::string filename() { return filename_; } + virtual std::string filename() { return filename_; } // need to hold mutex_ virtual void SetTerm(uint32_t term) { @@ -77,7 +77,7 @@ class Binlog : public pstd::noncopyable { return version_->term_; } - void Close(); + virtual void Close(); private: pstd::Status Put(const char* item, int len); diff --git a/include/pika_cloud_binlog.h b/include/pika_cloud_binlog.h index 5e879ecc2e..2025d3cbf1 100644 --- a/include/pika_cloud_binlog.h +++ b/include/pika_cloud_binlog.h @@ -60,21 +60,21 @@ class CloudBinlog : public Binlog { // Need to hold Lock(); pstd::Status Truncate(uint32_t pro_num, uint64_t pro_offset, uint64_t index = 0) override; - std::string filename() { return filename_; } + std::string filename() override { return filename_; } // need to hold mutex_ - void SetTerm(uint32_t term) override{ + void SetTerm(uint32_t term) override { std::lock_guard l(version_->rwlock_); version_->term_ = term; version_->StableSave(); } - uint32_t term() override{ + uint32_t term() override { std::shared_lock l(version_->rwlock_); return version_->term_; } - void Close(); + void Close() override; private: pstd::Status Put(const char* item, int len); diff --git a/tests/gtest/CMakeLists.txt b/tests/gtest/CMakeLists.txt deleted file mode 100644 index 845f545e45..0000000000 --- a/tests/gtest/CMakeLists.txt +++ /dev/null @@ -1,46 +0,0 @@ -cmake_minimum_required(VERSION 3.18) - -include(GoogleTest) - -file(GLOB_RECURSE PIKA_TEST_SOURCE "${CMAKE_SOURCE_DIR}/tests/gtest/*.cc") - -aux_source_directory(../../src TEST_SRCS) - -add_compile_definitions(PIKA_ROOT_DIR="${CMAKE_SOURCE_DIR}") - -foreach(pika_test_source ${PIKA_TEST_SOURCE}) - get_filename_component(pika_test_filename ${pika_test_source} NAME) - string(REPLACE ".cc" "" pika_test_name ${pika_test_filename}) - - # Add the test target - add_executable(${pika_test_name} ${pika_test_source}) - target_include_directories(${pika_test_name} - PUBLIC ${CMAKE_SOURCE_DIR}/include - PUBLIC ${PROJECT_SOURCE_DIR}/include - PUBLIC ${PROJECT_SOURCE_DIR}/.. - ${ROCKSDB_INCLUDE_DIR} - ${ROCKSDB_SOURCE_DIR} - ) - add_dependencies(${pika_test_name} gtest glog gflags ${LIBUNWIND_NAME}) - target_link_libraries(${pika_test_name} - PUBLIC ${GTEST_LIBRARY} - PUBLIC ${ROCKSDB_LIBRARY} - PUBLIC pstd - PUBLIC net - PUBLIC storage - PUBLIC ${GLOG_LIBRARY} - PUBLIC ${GFLAGS_LIBRARY} - PUBLIC ${LIBUNWIND_LIBRARY} - ) - if (USE_S3) - target_link_libraries(${pika_test_name} - PUBLIC libaws-cpp-sdk-core.so - PUBLIC libaws-cpp-sdk-transfer.so - PUBLIC libaws-cpp-sdk-kinesis.so - PUBLIC libaws-cpp-sdk-s3.so - ) - endif() - add_test(NAME ${pika_test_name} - COMMAND ${pika_test_name} - WORKING_DIRECTORY .) -endforeach(pika_test_source ${PIKA_TEST_SOURCE}) diff --git a/tests/gtest/cloud_binlog_test.cc b/tests/gtest/cloud_binlog_test.cc new file mode 100644 index 0000000000..82aa4fac20 --- /dev/null +++ b/tests/gtest/cloud_binlog_test.cc @@ -0,0 +1,101 @@ +// +// Created by Bai Xin on 2024/3/11. +// +#include + +#include + +#include "../../include/pika_binlog_reader.h" +#include "../../include/pika_cloud_binlog.h" +#include "include/pika_cloud_binlog_transverter.h" + +class CloudBinlogTransverterTest : public ::testing::Test {}; + +class CloudBinlogTest : public ::testing::Test { + public: + CloudBinlogTest() = default; + ~CloudBinlogTest() override = default; + + void SetUp() override { + std::string path = "./cloudbinlog/"; + pstd::DeleteDirIfExist(path); + mkdir(path.c_str(), 0755); + cloudBinlog = std::make_shared(path); + } + + void TearDown() override { + std::string path = "./cloudbinlog"; + pstd::DeleteFile(path.c_str()); + } + + static void SetUpTestSuite() {} + static void TearDownTestSuite() {} + + std::shared_ptr cloudBinlog; +}; + +TEST_F(CloudBinlogTest, GetPutTest) { + pstd::Status s = CloudBinlogTest::cloudBinlog->Put("test", 1, 1); + ASSERT_TRUE(s.ok()); + + PikaBinlogReader binlog_reader; + uint32_t filenum = 0; + uint32_t term = 0; + uint64_t offset = 0; + + s = CloudBinlogTest::cloudBinlog->GetProducerStatus(&filenum, &offset, &term, nullptr); + ASSERT_TRUE(s.ok()); + + s = CloudBinlogTest::cloudBinlog->Put("yyyy", 1, 1); + ASSERT_TRUE(s.ok()); + + int res = binlog_reader.Seek(CloudBinlogTest::cloudBinlog, filenum, offset); + ASSERT_EQ(res, 0); + + std::string binlog; + s = binlog_reader.Get(&binlog, &filenum, &offset); + ASSERT_TRUE(s.ok()); + + cloud::BinlogCloudItem* binlog_item = new cloud::BinlogCloudItem(); + PikaCloudBinlogTransverter::BinlogDecode(binlog, binlog_item); + ASSERT_EQ(1, binlog_item->db_id()); + ASSERT_EQ(1, binlog_item->rocksdb_id()); + ASSERT_STREQ("yyyy", binlog_item->content().c_str()); + + delete binlog_item; +} + +TEST_F(CloudBinlogTransverterTest, CodeTest) { + std::string binlog_item_s = + PikaCloudBinlogTransverter::BinlogEncode(1, 1, 1, 1, 4294967294, 18446744073709551615, "test"); + cloud::BinlogCloudItem* binlog_item = new cloud::BinlogCloudItem(); + PikaCloudBinlogTransverter::BinlogDecode(binlog_item_s, binlog_item); + ASSERT_EQ(1, binlog_item->db_id()); + ASSERT_EQ(1, binlog_item->rocksdb_id()); + ASSERT_EQ(1, binlog_item->exec_time()); + ASSERT_EQ(1, binlog_item->term_id()); + ASSERT_EQ(4294967294, binlog_item->file_num()); // 4294967294 = 2^32 - 1 + ASSERT_EQ(18446744073709551615, binlog_item->offset()); // 18446744073709551615 = 2^64 -1 + ASSERT_STREQ("test", binlog_item->content().c_str()); + delete binlog_item; +} + +TEST_F(CloudBinlogTransverterTest, WithoutContentDecodeTest) { + std::string binlog_item_s = + PikaCloudBinlogTransverter::BinlogEncode(1, 1, 1, 1, 4294967294, 18446744073709551615, "test"); + cloud::BinlogCloudItem* binlog_item = new cloud::BinlogCloudItem(); + PikaCloudBinlogTransverter::BinlogItemWithoutContentDecode(binlog_item_s, binlog_item); + ASSERT_EQ(1, binlog_item->db_id()); + ASSERT_EQ(1, binlog_item->rocksdb_id()); + ASSERT_EQ(1, binlog_item->exec_time()); + ASSERT_EQ(1, binlog_item->term_id()); + ASSERT_EQ(4294967294, binlog_item->file_num()); // 4294967294 = 2^32 - 1 + ASSERT_EQ(18446744073709551615, binlog_item->offset()); // 18446744073709551615 = 2^64 -1 + ASSERT_STREQ("", binlog_item->content().c_str()); + delete binlog_item; +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/tests/gtest/main.cc b/tests/gtest/main.cc deleted file mode 100644 index 791cf35d04..0000000000 --- a/tests/gtest/main.cc +++ /dev/null @@ -1,9 +0,0 @@ -// -// Created by Bai Xin on 2024/3/11. -// - -#include -int main(int argc, char **argv) { - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} \ No newline at end of file From 6469c9f6f3e1af71281541e29b20aafde8c61990 Mon Sep 17 00:00:00 2001 From: baixin Date: Thu, 14 Mar 2024 19:17:42 +0800 Subject: [PATCH 021/116] clean code --- CMakeLists.txt | 1 - tests/gtest/cloud_binlog_test.cc | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index df1c58641a..c075c60af1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -791,7 +791,6 @@ add_subdirectory(src/cache) if (USE_PIKA_TOOLS) add_subdirectory(tools) endif() -add_subdirectory(tests/gtest) aux_source_directory(src DIR_SRCS) # # generate version diff --git a/tests/gtest/cloud_binlog_test.cc b/tests/gtest/cloud_binlog_test.cc index 82aa4fac20..d19bac1639 100644 --- a/tests/gtest/cloud_binlog_test.cc +++ b/tests/gtest/cloud_binlog_test.cc @@ -5,8 +5,8 @@ #include -#include "../../include/pika_binlog_reader.h" -#include "../../include/pika_cloud_binlog.h" +#include "include/pika_binlog_reader.h" +#include "include/pika_cloud_binlog.h" #include "include/pika_cloud_binlog_transverter.h" class CloudBinlogTransverterTest : public ::testing::Test {}; From e152bb81bbcdeca1939cce16b0aa3c6ee8dad09b Mon Sep 17 00:00:00 2001 From: baixin Date: Fri, 15 Mar 2024 10:50:57 +0800 Subject: [PATCH 022/116] pika support switch master/slave role --- include/pika_db.h | 4 ++++ include/pika_server.h | 1 + src/pika_admin.cc | 13 +++++++++++++ src/pika_db.cc | 6 ++++++ 4 files changed, 24 insertions(+) diff --git a/include/pika_db.h b/include/pika_db.h index 8280b6bf38..a24dc7df88 100644 --- a/include/pika_db.h +++ b/include/pika_db.h @@ -154,6 +154,10 @@ class DB : public std::enable_shared_from_this, public pstd::noncopyable { bool IsBgSaving(); BgSaveInfo bgsave_info(); pstd::Status GetKeyNum(std::vector* key_info); + /* + * Switch Master/Slave role use + */ + pstd::Status SwitchMaster(bool is_old_master, bool is_new_master); private: bool opened_ = false; diff --git a/include/pika_server.h b/include/pika_server.h index 34145fc171..a04137dcd7 100644 --- a/include/pika_server.h +++ b/include/pika_server.h @@ -454,6 +454,7 @@ class PikaServer : public pstd::noncopyable { friend class InfoCmd; friend class PikaReplClientConn; friend class PkClusterInfoCmd; + friend class SlaveofCmd; struct BGCacheTaskArg { BGCacheTaskArg() : conf(nullptr), reenable_cache(false) {} diff --git a/src/pika_admin.cc b/src/pika_admin.cc index 8ab74d4325..85da5858e5 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -150,6 +150,12 @@ void SlaveofCmd::Do() { g_pika_server->RemoveMaster(); if (is_none_) { + if (g_pika_conf->pika_model() == PIKA_CLOUD) { + std::shared_lock rwl(g_pika_server->dbs_rw_); + for (const auto& db_item : g_pika_server->dbs_) { + db_item.second->SwitchMaster(false, true); + } + } res_.SetRes(CmdRes::kOk); g_pika_conf->SetSlaveof(std::string()); return; @@ -162,6 +168,12 @@ void SlaveofCmd::Do() { bool sm_ret = g_pika_server->SetMaster(master_ip_, static_cast(master_port_)); if (sm_ret) { + if (g_pika_conf->pika_model() == PIKA_CLOUD) { + std::shared_lock rwl(g_pika_server->dbs_rw_); + for (const auto& db_item : g_pika_server->dbs_) { + db_item.second->SwitchMaster(true, false); + } + } res_.SetRes(CmdRes::kOk); g_pika_server->ClearCacheDbAsync(db_); g_pika_conf->SetSlaveof(master_ip_ + ":" + std::to_string(master_port_)); @@ -176,6 +188,7 @@ void SlaveofCmd::Do() { * dbslaveof db[0 ~ 7] force * dbslaveof db[0 ~ 7] no one * dbslaveof db[0 ~ 7] filenum offset + * Command is deprecated. */ void DbSlaveofCmd::DoInitial() { if (!CheckArg(argv_.size())) { diff --git a/src/pika_db.cc b/src/pika_db.cc index ce51132499..1269aab988 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -638,3 +638,9 @@ bool DB::FlushDB() { std::lock_guard l(bgsave_protector_); return FlushDBWithoutLock(); } + +pstd::Status DB::SwitchMaster(bool is_old_master, bool is_new_master) { +#ifdef USE_S3 + return storage_.SwitchMaster(is_old_master, is_new_master); +#endif +} \ No newline at end of file From bee290da1c95d51484d0fdee1e0db7b4b37438f0 Mon Sep 17 00:00:00 2001 From: baixin Date: Fri, 15 Mar 2024 20:29:21 +0800 Subject: [PATCH 023/116] support full sync --- src/pika_repl_server_conn.cc | 13 ++++++++++--- src/rsync_client.cc | 4 ++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/pika_repl_server_conn.cc b/src/pika_repl_server_conn.cc index 677a6b7891..e5c529b721 100644 --- a/src/pika_repl_server_conn.cc +++ b/src/pika_repl_server_conn.cc @@ -122,9 +122,16 @@ void PikaReplServerConn::HandleTrySyncRequest(void* arg) { << ", pro_offset: " << slave_boffset.offset(); response.set_code(InnerMessage::kOk); } - - if (pre_success && TrySyncOffsetCheck(db, try_sync_request, try_sync_response)) { - TrySyncUpdateSlaveNode(db, try_sync_request, conn, try_sync_response); + //In cloud mode, only full synchronization is possible. + if (g_pika_conf->pika_model() == PIKA_CLOUD) { + if (pre_success) { + try_sync_response->set_reply_code(InnerMessage::InnerResponse::TrySync::kSyncPointBePurged); + TrySyncUpdateSlaveNode(db, try_sync_request, conn, try_sync_response); + } + } else { + if (pre_success && TrySyncOffsetCheck(db, try_sync_request, try_sync_response)) { + TrySyncUpdateSlaveNode(db, try_sync_request, conn, try_sync_response); + } } std::string reply_str; diff --git a/src/rsync_client.cc b/src/rsync_client.cc index f8ee7aeae8..b17a59c475 100644 --- a/src/rsync_client.cc +++ b/src/rsync_client.cc @@ -89,6 +89,10 @@ void* RsyncClient::ThreadMain() { } for (int i = 0; i < GetParallelNum(); i++) { + if (g_pika_conf->pika_model() == PIKA_CLOUD) { + //Waiting for interface support + //work_threads_[i] = std::move(std::thread(&RsyncS3Client::Copy, this, file_vec[i], i)); + } work_threads_[i] = std::move(std::thread(&RsyncClient::Copy, this, file_vec[i], i)); } From d32ec45396c6fd309a89ce4a14199f2274078cd2 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Mon, 11 Mar 2024 16:42:13 +0800 Subject: [PATCH 024/116] change config about rocksdb-cloud, add storage ut --- src/storage/src/redis.cc | 32 ++++- src/storage/src/redis.h | 20 +++ src/storage/tests/cloud_test.cc | 246 ++++++++++++++++++++++++++++++++ 3 files changed, 295 insertions(+), 3 deletions(-) create mode 100644 src/storage/tests/cloud_test.cc diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 23c22b3c48..772fb58abd 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -35,9 +35,12 @@ Redis::Redis(Storage* const s, int32_t index) spop_counts_store_ = std::make_unique>(); default_compact_range_options_.exclusive_manual_compaction = false; default_compact_range_options_.change_level = true; + default_write_options_.disableWAL = true; spop_counts_store_->SetCapacity(1000); scan_cursors_store_->SetCapacity(5000); //env_ = rocksdb::Env::Instance(); + + listener_ = std::make_shared(index_, this); handles_.clear(); } @@ -54,9 +57,26 @@ Redis::~Redis() { if (default_compact_range_options_.canceled) { delete default_compact_range_options_.canceled; } + opened_ = false; } -Status Redis::Open(const StorageOptions& storage_options, const std::string& db_path) { +Status Redis::Open(const StorageOptions& tmp_storage_options, const std::string& db_path) { + + StorageOptions storage_options(tmp_storage_options); +#ifdef USE_S3 + storage_options.cloud_fs_options.roll_cloud_manifest_on_open = true; + storage_options.cloud_fs_options.resync_on_open = true; + storage_options.cloud_fs_options.resync_manifest_on_open = true; + storage_options.cloud_fs_options.skip_dbid_verification = true; + if (tmp_storage_options.cloud_fs_options.is_master) { + storage_options.options.replication_log_listener = listener_; + } else { + storage_options.options.disable_auto_flush = true; + storage_options.options.disable_auto_compactions = true; + } + storage_options.options.atomic_flush = true; +#endif + statistics_store_->SetCapacity(storage_options.statistics_max_size); small_compaction_threshold_ = storage_options.small_compaction_threshold; @@ -186,7 +206,9 @@ Status Redis::Open(const StorageOptions& storage_options, const std::string& db_ db_ops.env = cloud_env_.get(); return rocksdb::DBCloud::Open(db_ops, db_path, column_families, "", 0, &handles_, &db_); #else - return rocksdb::DB::Open(db_ops, db_path, column_families, &handles_, &db_); + auto s = rocksdb::DB::Open(db_ops, db_path, column_families, &handles_, &db_); + opened_ = true; + return s; #endif } @@ -529,7 +551,11 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { db_options["disable_auto_flush"] = "false"; cfs_options["is_master"] = "true"; // compare manifest_sequence - uint64_t local_manifest_sequence = db_->GetManifestUpdateSequence(); + uint64_t local_manifest_sequence = 0; + auto s = db_->GetManifestUpdateSequence(&local_manifest_sequence); + if (!s.ok()) { + LOG(ERROR) << "get manifestupdatesequence error: " << s.ToString(); + } uint64_t remote_manifest_sequence = 0; cfs_->GetMaxManifestSequenceFromCurrentManifest(db_->GetName(), &remote_manifest_sequence); // local version behind remote, directly reopen diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 87320f31cc..e6ae3f30d8 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -37,6 +37,7 @@ namespace storage { using Status = rocksdb::Status; using Slice = rocksdb::Slice; +class Listener; class Redis { public: Redis(Storage* storage, int32_t index); @@ -393,6 +394,9 @@ class Redis { #ifdef USE_S3 Status SwitchMaster(bool is_old_master, bool is_new_master); + void ResetListener(std::shared_ptr handle) { + listener_ = handle; + } #endif private: @@ -429,11 +433,13 @@ class Redis { private: + bool opened_ = false; int32_t index_ = 0; Storage* const storage_; std::shared_ptr lock_mgr_; #ifdef USE_S3 rocksdb::DBCloud* db_ = nullptr; + std::shared_ptr listener_; #else rocksdb::DB* db_ = nullptr; #endif @@ -471,5 +477,19 @@ class Redis { #endif }; +// TODO(wangshaoyi): implement details +class Listener : public rocksdb::ReplicationLogListener { +public: + Listener(int rocksdb_id, void* inst) : rocksdb_id_(rocksdb_id), counter_(0), inst_(inst) {} + std::string OnReplicationLogRecord(rocksdb::ReplicationLogRecord record) override { + auto id = counter_.fetch_add(1); + return std::to_string(id); + } +private: + int rocksdb_id_ = 0; + std::atomic counter_ = {0}; + void* inst_; +}; + } // namespace storage #endif // SRC_REDIS_H_ diff --git a/src/storage/tests/cloud_test.cc b/src/storage/tests/cloud_test.cc new file mode 100644 index 0000000000..65b43164a6 --- /dev/null +++ b/src/storage/tests/cloud_test.cc @@ -0,0 +1,246 @@ +#include +#include +#include +#include +#include +#include "glog/logging.h" + +#include "pstd/include/env.h" +#include "storage/storage.h" +#include "src/redis.h" +#include "storage/util.h" + +using namespace storage; + +std::queue> items; + +struct MockReplicationListener : public rocksdb::ReplicationLogListener{ + MockReplicationListener() = default; + ~MockReplicationListener() = default; + std::string OnReplicationLogRecord(rocksdb::ReplicationLogRecord record) override { + std::string cnt = std::to_string(counter_.fetch_add(1)); + items.push(std::make_pair(cnt, record)); + LOG(WARNING) << "write binlog, replication_sequence: " << cnt << " type: " << record.type << " items count:" << items.size(); + return cnt; + } + std::atomic counter_ = {0}; +}; + +class CloudTest : public ::testing::Test { +public: + CloudTest() = default; + ~CloudTest() override = default; + + void SetUp() override { + storage_options.options.create_if_missing = true; + storage_options.options.avoid_flush_during_shutdown = true; + auto& cloud_fs_opts = storage_options.cloud_fs_options; + cloud_fs_opts.endpoint_override = "http://127.0.0.1:9000"; + cloud_fs_opts.credentials.InitializeSimple("minioadmin", "minioadmin"); + ASSERT_TRUE(cloud_fs_opts.credentials.HasValid().ok()); + cloud_fs_opts.src_bucket.SetBucketName("database.unit.test", "pika."); + cloud_fs_opts.dest_bucket.SetBucketName("database.unit.test", "pika."); + storage_options.options.max_log_file_size = 0; + } + + void TearDown() override { + } + + static void SetUpTestSuite() {} + static void TearDownTestSuite() {} + + StorageOptions storage_options; + storage::Status s; + std::string path; +}; + +Status OpenMaster(storage::Redis*& inst, StorageOptions storage_options) { + storage::Storage str; + while (!items.empty()) + { + items.pop(); + } + + inst = new storage::Redis(&str, 0); + auto listener = std::make_shared(); + inst->ResetListener(listener); + storage_options.cloud_fs_options.is_master = true; + auto s = inst->Open(storage_options, "cloud_test"); + return s; +} + +Status OpenSlave(storage::Redis*& inst, StorageOptions storage_options) { + storage::Storage str; + inst = new storage::Redis(&str, 0); + storage_options.cloud_fs_options.is_master = false; + auto s = inst->Open(storage_options, "cloud_test"); + return s; +} + +TEST_F(CloudTest, simple_master) { + storage::Redis* inst; + auto s = OpenMaster(inst, storage_options); + ASSERT_TRUE(s.ok()); + for (int i = 0; i < 10000; i++) { + if (i + 1 % 100 == 0) { + sleep(1); + } + s = inst->Set(std::to_string(i), std::to_string(i)); + ASSERT_TRUE(s.ok()); + } + rocksdb::FlushOptions fo; + fo.wait = true; + inst->GetDB()->Flush(fo); + delete inst; + inst = nullptr; +} + +Status SlaveCatchUp(storage::Redis* slave) { + Status s; + LOG(WARNING) << "SlaveCatchUp, items.size: " << items.size(); + while (!items.empty()) { + std::string replication_sequence = items.front().first; + auto record = items.front().second; + items.pop(); + LOG(WARNING) << "replication_sequence: " << replication_sequence << " type: " << record.type; + // slave catchup + rocksdb::DB::ApplyReplicationLogRecordInfo info; + s = slave->GetDB()->ApplyReplicationLogRecord(record, replication_sequence, nullptr, true, &info, rocksdb::DB::AR_EVICT_OBSOLETE_FILES); + if (!s.ok()) { + LOG(WARNING) << "reapply log error: " << s.ToString(); + return s; + } + } + return s; +} + +TEST_F(CloudTest, master_slave) { + storage::Redis* inst_master, *inst_slave; + auto s = OpenMaster(inst_master, storage_options); + ASSERT_TRUE(s.ok()); + // master write + for (int i = 0; i < 20000; i++) { + if (i + 1 % 100 == 0) { + sleep(1); + } + s = inst_master->Set(std::to_string(i), std::to_string(i)); + ASSERT_TRUE(s.ok()); + } + + rocksdb::FlushOptions fo; + fo.wait = true; + inst_master->GetDB()->Flush(fo); + delete inst_master; + inst_master = nullptr; + + std::vector children; + pstd::GetChildren("cloud_test", children); + std::for_each(children.begin(), children.end(), [](auto& file) { + if (file.find("sst") != std::string::npos) { + std::string path = "cloud_test/"; + path = path + file; + pstd::DeleteFile(path); + } + }); + + s = OpenSlave(inst_slave, storage_options); + ASSERT_TRUE(s.ok()); + for (int i = 0; i < 20000; i++) { + std::string val; + s = inst_slave->Get(std::to_string(i), &val); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(val, std::to_string(i)); + } + SlaveCatchUp(inst_slave); + + delete inst_slave; + inst_slave = nullptr; + + s = OpenMaster(inst_master, storage_options); + ASSERT_TRUE(s.ok()); + for (int i = 0; i < 20000; i++) { + std::string val; + s = inst_master->Get(std::to_string(i), &val); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(val, std::to_string(i)); + } + delete inst_master; + inst_master = nullptr; +} + +TEST_F(CloudTest, switch_master) { + storage::Redis* inst_master, *inst_slave; + auto s = OpenMaster(inst_master, storage_options); + ASSERT_TRUE(s.ok()); + // master write + for (int i = 0; i < 20000; i++) { + if (i + 1 % 100 == 0) { + sleep(1); + } + s = inst_master->Set(std::to_string(i), std::to_string(i)); + ASSERT_TRUE(s.ok()); + } + + delete inst_master; + inst_master = nullptr; + LOG(WARNING) << "close master already"; + sleep(20); + + std::vector children; + pstd::GetChildren("cloud_test", children); + std::for_each(children.begin(), children.end(), [](auto& file) { + if (file.find("sst") != std::string::npos) { + std::string path = "cloud_test/"; + path = path + file; + pstd::DeleteFile(path); + } + }); + + s = OpenSlave(inst_slave, storage_options); + ASSERT_TRUE(s.ok()); + s = SlaveCatchUp(inst_slave); + ASSERT_TRUE(s.ok()); + for (int i = 0; i < 20000; i++) { + std::string val; + s = inst_slave->Get(std::to_string(i), &val); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(val, std::to_string(i)); + } + s = inst_slave->SwitchMaster(false, true); + ASSERT_TRUE(s.ok()); + delete inst_slave; + inst_slave = nullptr; + + pstd::GetChildren("cloud_test", children); + std::for_each(children.begin(), children.end(), [](auto& file) { + if (file.find("sst") != std::string::npos) { + std::string path = "cloud_test/"; + path = path + file; + pstd::DeleteFile(path); + } + }); + + s = OpenMaster(inst_master, storage_options); + ASSERT_TRUE(s.ok()); + for (int i = 0; i < 20000; i++) { + std::string val; + s = inst_master->Get(std::to_string(i), &val); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(val, std::to_string(i)); + } + delete inst_master; + inst_master = nullptr; +} + +int main(int argc, char** argv) { + if (!pstd::FileExists("./log")) { + pstd::CreatePath("./log"); + } + FLAGS_log_dir = "./log"; + FLAGS_minloglevel = 0; + FLAGS_max_log_size = 1800; + FLAGS_logbufsecs = 0; + ::google::InitGoogleLogging("cloud_test"); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From b7bcf0a50cd973e88c1db54919f4c0df4a2a6bf1 Mon Sep 17 00:00:00 2001 From: baixin Date: Sun, 17 Mar 2024 17:36:28 +0800 Subject: [PATCH 025/116] slave consume binlog --- include/pika_command.h | 5 +++++ src/pika_consensus.cc | 10 ++++++++++ src/pika_repl_bgworker.cc | 13 ++++++++++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/include/pika_command.h b/include/pika_command.h index 3d5d535971..65da368fd1 100644 --- a/include/pika_command.h +++ b/include/pika_command.h @@ -248,6 +248,11 @@ const std::string kCmdNameXInfo = "xinfo"; const std::string kClusterPrefix = "pkcluster"; +//cloud +//Waiting for interface support +const std::string kCmdNameRocksdbFlush = "rocksdbflush"; +const std::string kCmdNameRocksdbCompact = "rocksdbcompact"; + using PikaCmdArgsType = net::RedisCmdArgsType; static const int RAW_ARGS_LEN = 1024 * 1024; diff --git a/src/pika_consensus.cc b/src/pika_consensus.cc index 9a7debefa5..f046301d35 100644 --- a/src/pika_consensus.cc +++ b/src/pika_consensus.cc @@ -347,6 +347,16 @@ Status ConsensusCoordinator::ProcessLeaderLog(const std::shared_ptr& cmd_pt return Status::OK(); } + if (g_pika_conf->pika_model() == PIKA_CLOUD) { + //Waiting for interface support + //get master binlog drop point + g_pika_rm->GetSyncMasterDBByName(db_name_); + /* + * point =getpoint() + * if point.filenum>binlogitem.filenum || (point.filenum==binlogitem.filenum && point.offset>=binlogitem.offset) + * {return;}*/ + } + Status s = InternalAppendLog(cmd_ptr); InternalApplyFollower(MemLog::LogItem(LogOffset(), cmd_ptr, nullptr, nullptr)); diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index 4f372351f2..059b0b8e8f 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -208,7 +208,6 @@ void PikaReplBgWorker::HandleBGWorkerWriteDB(void* arg) { std::unique_ptr task_arg(static_cast(arg)); const std::shared_ptr c_ptr = task_arg->cmd_ptr; const PikaCmdArgsType& argv = c_ptr->argv(); - LogOffset offset = task_arg->offset; std::string db_name = task_arg->db_name; uint64_t start_us = 0; @@ -221,6 +220,18 @@ void PikaReplBgWorker::HandleBGWorkerWriteDB(void* arg) { if (!c_ptr->IsSuspend()) { c_ptr->GetDB()->DbRWLockReader(); } + if (g_pika_conf->pika_model() == PIKA_CLOUD) { + //Waiting for interface support + //Invoke the corresponding 'compact' or 'flush' interface on the db. + if (c_ptr->name() == kCmdNameRocksdbFlush) { + g_pika_rm->GetSyncMasterDBByName(db_name); + //execFlushInRocksdb(db_) + } + if (c_ptr->name() == kCmdNameRocksdbCompact) { + g_pika_rm->GetSyncMasterDBByName(db_name); + //execCompactInRocksdb(db_) + } + } if (c_ptr->IsNeedCacheDo() && PIKA_CACHE_NONE != g_pika_conf->cache_model() && c_ptr->GetDB()->cache()->CacheStatus() == PIKA_CACHE_STATUS_OK) { From 953a847fad4075d7e61cbdb2347dfa792c73c62c Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 18 Mar 2024 11:51:11 +0800 Subject: [PATCH 026/116] fix bug --- src/pika_repl_bgworker.cc | 13 +------------ src/rsync_client.cc | 4 ---- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index 059b0b8e8f..079522e7f5 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -220,18 +220,7 @@ void PikaReplBgWorker::HandleBGWorkerWriteDB(void* arg) { if (!c_ptr->IsSuspend()) { c_ptr->GetDB()->DbRWLockReader(); } - if (g_pika_conf->pika_model() == PIKA_CLOUD) { - //Waiting for interface support - //Invoke the corresponding 'compact' or 'flush' interface on the db. - if (c_ptr->name() == kCmdNameRocksdbFlush) { - g_pika_rm->GetSyncMasterDBByName(db_name); - //execFlushInRocksdb(db_) - } - if (c_ptr->name() == kCmdNameRocksdbCompact) { - g_pika_rm->GetSyncMasterDBByName(db_name); - //execCompactInRocksdb(db_) - } - } + if (c_ptr->IsNeedCacheDo() && PIKA_CACHE_NONE != g_pika_conf->cache_model() && c_ptr->GetDB()->cache()->CacheStatus() == PIKA_CACHE_STATUS_OK) { diff --git a/src/rsync_client.cc b/src/rsync_client.cc index b17a59c475..f8ee7aeae8 100644 --- a/src/rsync_client.cc +++ b/src/rsync_client.cc @@ -89,10 +89,6 @@ void* RsyncClient::ThreadMain() { } for (int i = 0; i < GetParallelNum(); i++) { - if (g_pika_conf->pika_model() == PIKA_CLOUD) { - //Waiting for interface support - //work_threads_[i] = std::move(std::thread(&RsyncS3Client::Copy, this, file_vec[i], i)); - } work_threads_[i] = std::move(std::thread(&RsyncClient::Copy, this, file_vec[i], i)); } From 3250a96938ba6eaf1f9d1af48a823cb536e027d6 Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 18 Mar 2024 12:01:05 +0800 Subject: [PATCH 027/116] fix bug --- include/pika_command.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/pika_command.h b/include/pika_command.h index 65da368fd1..3d5d535971 100644 --- a/include/pika_command.h +++ b/include/pika_command.h @@ -248,11 +248,6 @@ const std::string kCmdNameXInfo = "xinfo"; const std::string kClusterPrefix = "pkcluster"; -//cloud -//Waiting for interface support -const std::string kCmdNameRocksdbFlush = "rocksdbflush"; -const std::string kCmdNameRocksdbCompact = "rocksdbcompact"; - using PikaCmdArgsType = net::RedisCmdArgsType; static const int RAW_ARGS_LEN = 1024 * 1024; From 10f3126e7693fde24c8a26a4e136013e04f9536b Mon Sep 17 00:00:00 2001 From: baixin Date: Tue, 19 Mar 2024 14:12:10 +0800 Subject: [PATCH 028/116] fix bug --- include/pika_cloud_binlog_transverter.h | 3 -- src/pika_admin.cc | 4 +- src/pika_cloud_binlog_transverter.cc | 3 ++ src/pika_consensus.cc | 10 ----- src/pika_repl_bgworker.cc | 60 ++++++++++++++++++------- src/pika_repl_server_conn.cc | 1 - 6 files changed, 50 insertions(+), 31 deletions(-) diff --git a/include/pika_cloud_binlog_transverter.h b/include/pika_cloud_binlog_transverter.h index e422725a5a..90337315df 100644 --- a/include/pika_cloud_binlog_transverter.h +++ b/include/pika_cloud_binlog_transverter.h @@ -14,9 +14,6 @@ #include "pika_cloud_binlog.pb.h" -const int PADDING_BINLOG_PROTOCOL_SIZE = 22; -const int SPACE_STROE_PARAMETER_LENGTH = 5; - class PikaCloudBinlogTransverter { public: PikaCloudBinlogTransverter() = default; diff --git a/src/pika_admin.cc b/src/pika_admin.cc index 85da5858e5..d1481e16f1 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -150,7 +150,7 @@ void SlaveofCmd::Do() { g_pika_server->RemoveMaster(); if (is_none_) { - if (g_pika_conf->pika_model() == PIKA_CLOUD) { + if (g_pika_conf->pika_model() == PIKA_CLOUD && g_pika_server->role() == PIKA_ROLE_SLAVE) { std::shared_lock rwl(g_pika_server->dbs_rw_); for (const auto& db_item : g_pika_server->dbs_) { db_item.second->SwitchMaster(false, true); @@ -168,7 +168,7 @@ void SlaveofCmd::Do() { bool sm_ret = g_pika_server->SetMaster(master_ip_, static_cast(master_port_)); if (sm_ret) { - if (g_pika_conf->pika_model() == PIKA_CLOUD) { + if (g_pika_conf->pika_model() == PIKA_CLOUD && g_pika_server->role() == PIKA_ROLE_MASTER) { std::shared_lock rwl(g_pika_server->dbs_rw_); for (const auto& db_item : g_pika_server->dbs_) { db_item.second->SwitchMaster(true, false); diff --git a/src/pika_cloud_binlog_transverter.cc b/src/pika_cloud_binlog_transverter.cc index eb3e7fe0f8..e2c7fd1049 100644 --- a/src/pika_cloud_binlog_transverter.cc +++ b/src/pika_cloud_binlog_transverter.cc @@ -12,6 +12,9 @@ #include "include/pika_command.h" #include "pstd/include/pstd_coding.h" +const int PADDING_BINLOG_PROTOCOL_SIZE = 22; +const int SPACE_STROE_PARAMETER_LENGTH = 5; + std::string PikaCloudBinlogTransverter::BinlogEncode(uint32_t db_id, uint32_t rocksdb_id, uint32_t exec_time, uint32_t term_id, uint32_t filenum, uint64_t offset, const std::string& content) { diff --git a/src/pika_consensus.cc b/src/pika_consensus.cc index f046301d35..9a7debefa5 100644 --- a/src/pika_consensus.cc +++ b/src/pika_consensus.cc @@ -347,16 +347,6 @@ Status ConsensusCoordinator::ProcessLeaderLog(const std::shared_ptr& cmd_pt return Status::OK(); } - if (g_pika_conf->pika_model() == PIKA_CLOUD) { - //Waiting for interface support - //get master binlog drop point - g_pika_rm->GetSyncMasterDBByName(db_name_); - /* - * point =getpoint() - * if point.filenum>binlogitem.filenum || (point.filenum==binlogitem.filenum && point.offset>=binlogitem.offset) - * {return;}*/ - } - Status s = InternalAppendLog(cmd_ptr); InternalApplyFollower(MemLog::LogItem(LogOffset(), cmd_ptr, nullptr, nullptr)); diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index 079522e7f5..edaf2e05c8 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -5,13 +5,15 @@ #include -#include "include/pika_repl_bgworker.h" +#include "include/pika_cloud_binlog_transverter.h" #include "include/pika_cmd_table_manager.h" +#include "include/pika_conf.h" +#include "include/pika_repl_bgworker.h" #include "include/pika_rm.h" #include "include/pika_server.h" #include "pstd/include/pstd_defer.h" #include "src/pstd/include/scope_record_lock.h" -#include "include/pika_conf.h" +#include "pika_cloud_binlog.pb.h" extern PikaServer* g_pika_server; extern std::unique_ptr g_pika_rm; @@ -132,20 +134,48 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { if (binlog_res.binlog().empty()) { continue; } - if (!PikaBinlogTransverter::BinlogItemWithoutContentDecode(TypeFirst, binlog_res.binlog(), &worker->binlog_item_)) { - LOG(WARNING) << "Binlog item decode failed"; - slave_db->SetReplState(ReplState::kTryConnect); - return; + + if (g_pika_conf->pika_model() == PIKA_CLOUD) { + cloud::BinlogCloudItem binlog_item; + if (!PikaCloudBinlogTransverter::BinlogItemWithoutContentDecode(binlog_res.binlog(), &binlog_item)) { + LOG(WARNING) << "Cloud Binlog item decode failed"; + slave_db->SetReplState(ReplState::kTryConnect); + return; + } + //Waiting for interface support + //get master binlog drop point + /* + * point =getpoint() + * if point.filenum>binlogitem_.filenum || (point.filenum==binlogitem_.filenum && point.offset>=binlogitem_.offset) + * {continue;}*/ + } else { + if (!PikaBinlogTransverter::BinlogItemWithoutContentDecode(TypeFirst, binlog_res.binlog(), &worker->binlog_item_)) { + LOG(WARNING) << "Binlog item decode failed"; + slave_db->SetReplState(ReplState::kTryConnect); + return; + } } - const char* redis_parser_start = binlog_res.binlog().data() + BINLOG_ENCODE_LEN; - int redis_parser_len = static_cast(binlog_res.binlog().size()) - BINLOG_ENCODE_LEN; - int processed_len = 0; - net::RedisParserStatus ret = - worker->redis_parser_.ProcessInputBuffer(redis_parser_start, redis_parser_len, &processed_len); - if (ret != net::kRedisParserDone) { - LOG(WARNING) << "Redis parser failed"; - slave_db->SetReplState(ReplState::kTryConnect); - return; + + if (g_pika_conf->pika_model() == PIKA_CLOUD) { + //1.write to binlog + std::shared_ptr db = + g_pika_rm->GetSyncMasterDBByName(DBInfo(worker->db_name_)); + if (!db) { + LOG(WARNING) << worker->db_name_ << "Not found."; + } + db->Logger()->Put(binlog_res.binlog()); + //2.Waiting for interface support:write into rocksdb + } else { + const char* redis_parser_start = binlog_res.binlog().data() + BINLOG_ENCODE_LEN; + int redis_parser_len = static_cast(binlog_res.binlog().size()) - BINLOG_ENCODE_LEN; + int processed_len = 0; + net::RedisParserStatus ret = + worker->redis_parser_.ProcessInputBuffer(redis_parser_start, redis_parser_len, &processed_len); + if (ret != net::kRedisParserDone) { + LOG(WARNING) << "Redis parser failed"; + slave_db->SetReplState(ReplState::kTryConnect); + return; + } } } diff --git a/src/pika_repl_server_conn.cc b/src/pika_repl_server_conn.cc index e5c529b721..e5d0236f61 100644 --- a/src/pika_repl_server_conn.cc +++ b/src/pika_repl_server_conn.cc @@ -126,7 +126,6 @@ void PikaReplServerConn::HandleTrySyncRequest(void* arg) { if (g_pika_conf->pika_model() == PIKA_CLOUD) { if (pre_success) { try_sync_response->set_reply_code(InnerMessage::InnerResponse::TrySync::kSyncPointBePurged); - TrySyncUpdateSlaveNode(db, try_sync_request, conn, try_sync_response); } } else { if (pre_success && TrySyncOffsetCheck(db, try_sync_request, try_sync_response)) { From 55e6bc678df3edb6972de8c5df47cc4c3eab8109 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Wed, 20 Mar 2024 18:01:09 +0800 Subject: [PATCH 029/116] fix by review comments --- src/storage/src/redis.cc | 4 ++-- src/storage/src/redis.h | 14 +++++++------- src/storage/tests/cloud_test.cc | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 772fb58abd..aa938389bc 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -40,7 +40,7 @@ Redis::Redis(Storage* const s, int32_t index) scan_cursors_store_->SetCapacity(5000); //env_ = rocksdb::Env::Instance(); - listener_ = std::make_shared(index_, this); + log_listener_ = std::make_shared(index_, this); handles_.clear(); } @@ -69,7 +69,7 @@ Status Redis::Open(const StorageOptions& tmp_storage_options, const std::string& storage_options.cloud_fs_options.resync_manifest_on_open = true; storage_options.cloud_fs_options.skip_dbid_verification = true; if (tmp_storage_options.cloud_fs_options.is_master) { - storage_options.options.replication_log_listener = listener_; + storage_options.options.replication_log_listener = log_listener_; } else { storage_options.options.disable_auto_flush = true; storage_options.options.disable_auto_compactions = true; diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index e6ae3f30d8..1870ec618e 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -37,7 +37,7 @@ namespace storage { using Status = rocksdb::Status; using Slice = rocksdb::Slice; -class Listener; +class LogListener; class Redis { public: Redis(Storage* storage, int32_t index); @@ -394,8 +394,8 @@ class Redis { #ifdef USE_S3 Status SwitchMaster(bool is_old_master, bool is_new_master); - void ResetListener(std::shared_ptr handle) { - listener_ = handle; + void ResetLogListener(std::shared_ptr handle) { + log_listener_ = handle; } #endif @@ -439,7 +439,7 @@ class Redis { std::shared_ptr lock_mgr_; #ifdef USE_S3 rocksdb::DBCloud* db_ = nullptr; - std::shared_ptr listener_; + std::shared_ptr log_listener_; #else rocksdb::DB* db_ = nullptr; #endif @@ -478,9 +478,9 @@ class Redis { }; // TODO(wangshaoyi): implement details -class Listener : public rocksdb::ReplicationLogListener { +class LogListener : public rocksdb::ReplicationLogListener { public: - Listener(int rocksdb_id, void* inst) : rocksdb_id_(rocksdb_id), counter_(0), inst_(inst) {} + LogListener(int rocksdb_id, void* inst) : rocksdb_id_(rocksdb_id), counter_(0), inst_(inst) {} std::string OnReplicationLogRecord(rocksdb::ReplicationLogRecord record) override { auto id = counter_.fetch_add(1); return std::to_string(id); @@ -488,7 +488,7 @@ class Listener : public rocksdb::ReplicationLogListener { private: int rocksdb_id_ = 0; std::atomic counter_ = {0}; - void* inst_; + void* inst_ = nullptr; }; } // namespace storage diff --git a/src/storage/tests/cloud_test.cc b/src/storage/tests/cloud_test.cc index 65b43164a6..32f29d4e72 100644 --- a/src/storage/tests/cloud_test.cc +++ b/src/storage/tests/cloud_test.cc @@ -63,7 +63,7 @@ Status OpenMaster(storage::Redis*& inst, StorageOptions storage_options) { inst = new storage::Redis(&str, 0); auto listener = std::make_shared(); - inst->ResetListener(listener); + inst->ResetLogListener(listener); storage_options.cloud_fs_options.is_master = true; auto s = inst->Open(storage_options, "cloud_test"); return s; From 99333ce79f973bdef673ade52d1585bfc7668c28 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Fri, 22 Mar 2024 22:01:04 +0800 Subject: [PATCH 030/116] implement rocksdb WAL callback function --- include/pika_binlog.h | 2 +- include/pika_cloud_binlog_transverter.h | 2 +- src/pika_binlog.cc | 2 +- src/pika_cloud_binlog.cc | 4 +- src/pika_cloud_binlog.proto | 1 + src/pika_cloud_binlog_transverter.cc | 3 +- src/pika_db.cc | 5 ++ src/pika_repl_bgworker.cc | 20 +++---- src/pika_server.cc | 12 ++++ src/storage/include/storage/storage.h | 4 +- src/storage/src/redis.cc | 76 +++++++++++++++++++------ src/storage/src/redis.h | 21 ++++--- src/storage/src/storage.cc | 30 ++++------ 13 files changed, 123 insertions(+), 59 deletions(-) diff --git a/include/pika_binlog.h b/include/pika_binlog.h index 029119da55..872fe2a87a 100644 --- a/include/pika_binlog.h +++ b/include/pika_binlog.h @@ -53,7 +53,7 @@ class Binlog : public pstd::noncopyable { virtual pstd::Status Put(const std::string& item); - virtual pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id); + virtual pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, const std::string& rep_seq); virtual pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); /* diff --git a/include/pika_cloud_binlog_transverter.h b/include/pika_cloud_binlog_transverter.h index 90337315df..1c28b921c5 100644 --- a/include/pika_cloud_binlog_transverter.h +++ b/include/pika_cloud_binlog_transverter.h @@ -18,7 +18,7 @@ class PikaCloudBinlogTransverter { public: PikaCloudBinlogTransverter() = default; static std::string BinlogEncode(uint32_t db_id, uint32_t rocksdb_id, uint32_t exec_time, uint32_t term_id, - uint32_t filenum, uint64_t offset, const std::string& content); + uint32_t filenum, uint64_t offset, const std::string& content, const std::string& replication_sequence); static bool BinlogDecode(const std::string& binlog, cloud::BinlogCloudItem* binlog_item); diff --git a/src/pika_binlog.cc b/src/pika_binlog.cc index b1fc6d3829..3dddbccea2 100644 --- a/src/pika_binlog.cc +++ b/src/pika_binlog.cc @@ -165,7 +165,7 @@ Status Binlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32 return Status::OK(); } -Status Binlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) { +Status Binlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, const std::string& rep_seq) { return Status::Error("data err"); } diff --git a/src/pika_cloud_binlog.cc b/src/pika_cloud_binlog.cc index 589d43d219..80c36358ae 100644 --- a/src/pika_cloud_binlog.cc +++ b/src/pika_cloud_binlog.cc @@ -161,7 +161,7 @@ Status CloudBinlog::Put(const std::string& item) { return Status::Error("data err: db_id and rocksdb_id empty"); } // Note: mutex lock should be held -Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) { +Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, const std::string& replication_sequence) { if (!opened_.load()) { return Status::Busy("Cloud Binlog is not open yet"); } @@ -176,7 +176,7 @@ Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksd if (!s.ok()) { return s; } - std::string data = PikaCloudBinlogTransverter::BinlogEncode(db_id, rocksdb_id, time(nullptr), term, filenum, offset, item); + std::string data = PikaCloudBinlogTransverter::BinlogEncode(db_id, rocksdb_id, time(nullptr), term, filenum, offset, item, replication_sequence); s = Put(data.c_str(), static_cast(data.size())); if (!s.ok()) { diff --git a/src/pika_cloud_binlog.proto b/src/pika_cloud_binlog.proto index b9ff844bf8..b3e4b00b0f 100644 --- a/src/pika_cloud_binlog.proto +++ b/src/pika_cloud_binlog.proto @@ -13,4 +13,5 @@ message BinlogCloudItem { uint64 file_num = 5; uint64 offset = 6; bytes content = 7; + bytes replication_sequence = 8; } \ No newline at end of file diff --git a/src/pika_cloud_binlog_transverter.cc b/src/pika_cloud_binlog_transverter.cc index e2c7fd1049..6d8472d9f1 100644 --- a/src/pika_cloud_binlog_transverter.cc +++ b/src/pika_cloud_binlog_transverter.cc @@ -17,7 +17,7 @@ const int SPACE_STROE_PARAMETER_LENGTH = 5; std::string PikaCloudBinlogTransverter::BinlogEncode(uint32_t db_id, uint32_t rocksdb_id, uint32_t exec_time, uint32_t term_id, uint32_t filenum, uint64_t offset, - const std::string& content) { + const std::string& content, const std::string& replication_sequence) { std::string serialize_binlog; cloud::BinlogCloudItem binlog_item; binlog_item.set_db_id(db_id); @@ -27,6 +27,7 @@ std::string PikaCloudBinlogTransverter::BinlogEncode(uint32_t db_id, uint32_t ro binlog_item.set_file_num(filenum); binlog_item.set_offset(offset); binlog_item.set_content(content); + binlog_item.set_replication_sequence(replication_sequence); binlog_item.SerializeToString(&serialize_binlog); return serialize_binlog; } diff --git a/src/pika_db.cc b/src/pika_db.cc index 1269aab988..61f55e3891 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -39,7 +39,12 @@ DB::DB(std::string db_name, const std::string& db_path, log_path_ = DBPath(log_path, "log_" + db_name_); storage_ = std::make_shared(g_pika_conf->db_instance_num(), g_pika_conf->default_slot_num(), g_pika_conf->classic_mode()); +#ifdef USE_S3 + auto wal_writer = g_pika_rm->GetSyncMasterDBByName(db_name)->StableLogger().get(); + rocksdb::Status s = storage_->Open(g_pika_server->storage_options(), db_path_, wal_writer); +#else rocksdb::Status s = storage_->Open(g_pika_server->storage_options(), db_path_); +#endif pstd::CreatePath(db_path_); pstd::CreatePath(log_path_); lock_mgr_ = std::make_shared(1000, 0, std::make_shared()); diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index edaf2e05c8..159c4b7d6b 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -148,24 +148,22 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { * point =getpoint() * if point.filenum>binlogitem_.filenum || (point.filenum==binlogitem_.filenum && point.offset>=binlogitem_.offset) * {continue;}*/ + std::shared_ptr db = + g_pika_rm->GetSyncMasterDBByName(DBInfo(worker->db_name_)); + if (!db) { + LOG(WARNING) << woker->db_name_ <<" not found"; + } + db->Logger()->Put(binlog_res.binlog()); + auto storage = g_pika_server->GetDB(worker->db_name_)->storage(); + s = storage->ApplyWAL(binlog_item.rocksdb_id(), binlog_item.replication_sequence(), binlog_item.type(), binlog_item->content()); + return; } else { if (!PikaBinlogTransverter::BinlogItemWithoutContentDecode(TypeFirst, binlog_res.binlog(), &worker->binlog_item_)) { LOG(WARNING) << "Binlog item decode failed"; slave_db->SetReplState(ReplState::kTryConnect); return; } - } - if (g_pika_conf->pika_model() == PIKA_CLOUD) { - //1.write to binlog - std::shared_ptr db = - g_pika_rm->GetSyncMasterDBByName(DBInfo(worker->db_name_)); - if (!db) { - LOG(WARNING) << worker->db_name_ << "Not found."; - } - db->Logger()->Put(binlog_res.binlog()); - //2.Waiting for interface support:write into rocksdb - } else { const char* redis_parser_start = binlog_res.binlog().data() + BINLOG_ENCODE_LEN; int redis_parser_len = static_cast(binlog_res.binlog().size()) - BINLOG_ENCODE_LEN; int processed_len = 0; diff --git a/src/pika_server.cc b/src/pika_server.cc index 5a36ffd539..cc1d73333d 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -88,6 +88,9 @@ PikaServer::PikaServer() // init role std::string slaveof = g_pika_conf->slaveof(); +#ifdef USE_S3 + storage_options_.cloud_fs_options.is_master = true; +#endif if (!slaveof.empty()) { auto sep = static_cast(slaveof.find(':')); std::string master_ip = slaveof.substr(0, sep); @@ -96,6 +99,9 @@ PikaServer::PikaServer() LOG(FATAL) << "you will slaveof yourself as the config file, please check"; } else { SetMaster(master_ip, master_port); +#ifdef USE_S3 + storage_options_.cloud_fs_options.is_master = false; +#endif } } @@ -1397,6 +1403,12 @@ void PikaServer::InitStorageOptions() { cloud_fs_opts.src_bucket.SetRegion(g_pika_conf->cloud_src_bucket_region()); cloud_fs_opts.dest_bucket.SetBucketName(g_pika_conf->cloud_dest_bucket_suffix(), g_pika_conf->cloud_dest_bucket_prefix()); cloud_fs_opts.dest_bucket.SetRegion(g_pika_conf->cloud_dest_bucket_region()); + + //TODO(wangshoyi): implement upload rocksdb-cloud meta to dashboard + storage_options.cloud_fs_options.upload_meta_func = [](const std::string& a, const std::string& b, const std::string& c) ->bool { + LOG(WARNING) << "args: " << a << " : " << b << " : " << c; + return true; + }; #endif } diff --git a/src/storage/include/storage/storage.h b/src/storage/include/storage/storage.h index 95b174cc8e..0b31fcaac8 100644 --- a/src/storage/include/storage/storage.h +++ b/src/storage/include/storage/storage.h @@ -188,7 +188,7 @@ class Storage { Storage(int db_instance_num, int slot_num, bool is_classic_mode); ~Storage(); - Status Open(const StorageOptions& storage_options, const std::string& db_path); + Status Open(const StorageOptions& storage_options, const std::string& db_path, void* wal_writer = nullptr); Status LoadCursorStartKey(const DataType& dtype, int64_t cursor, char* type, std::string* start_key); @@ -198,6 +198,8 @@ class Storage { std::unique_ptr& GetDBInstance(const std::string& key); + Status ApplyWAL(int rocksdb_id, const std::string& repli_seq, int type, const std::string& content); + // Strings Commands // Set key to hold the string value. if key diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index aa938389bc..0db395aeed 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -25,7 +25,7 @@ rocksdb::Comparator* ZSetsScoreKeyComparator() { return &zsets_score_key_compare; } -Redis::Redis(Storage* const s, int32_t index) +Redis::Redis(Storage* const s, int32_t index, void* wal_writer) : storage_(s), index_(index), lock_mgr_(std::make_shared(1000, 0, std::make_shared())), small_compaction_threshold_(5000), @@ -39,12 +39,17 @@ Redis::Redis(Storage* const s, int32_t index) spop_counts_store_->SetCapacity(1000); scan_cursors_store_->SetCapacity(5000); //env_ = rocksdb::Env::Instance(); - - log_listener_ = std::make_shared(index_, this); +#ifdef USE_S3 + log_listener_ = std::make_shared(index_, this, wal_writer); +#endif handles_.clear(); } Redis::~Redis() { + Close(); +} + +void Redis::Close() { rocksdb::CancelAllBackgroundWork(db_, true); std::vector tmp_handles = handles_; handles_.clear(); @@ -57,13 +62,18 @@ Redis::~Redis() { if (default_compact_range_options_.canceled) { delete default_compact_range_options_.canceled; } +#ifdef USE_S3 + log_listener_.reset(); opened_ = false; +#endif } Status Redis::Open(const StorageOptions& tmp_storage_options, const std::string& db_path) { StorageOptions storage_options(tmp_storage_options); #ifdef USE_S3 + db_path_ = db_path; + storage_options_ = tmp_storage_options; storage_options.cloud_fs_options.roll_cloud_manifest_on_open = true; storage_options.cloud_fs_options.resync_on_open = true; storage_options.cloud_fs_options.resync_manifest_on_open = true; @@ -517,9 +527,9 @@ Status Redis::OpenCloudEnv(rocksdb::CloudFileSystemOptions opts, const std::stri return s; } -Status Redis::ReOpenRocksDB(const std::unordered_map& db_options, - const std::unordered_map& cfs_options) { - return Status::OK(); +Status Redis::ReOpenRocksDB(const storage::StorageOptions& opt) { + Close(); + Open(opt, db_path_); } Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { @@ -527,29 +537,31 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { // Do nothing return Status::OK(); } - - std::unordered_map db_options, cfs_options; + + storage::StorageOptions storage_options(storage_options_); + std::unordered_map db_options; if (is_old_master && !is_new_master) { + storage_options.cloud_fs_options.is_master = false; db_options["disable_auto_compactions"] = "true"; db_options["disable_auto_flush"] = "true"; for (const auto& cf : handles_) { db_->SetOptions(cf, db_options); } cfs_->SwitchMaster(false); - cfs_options["is_master"] = "false"; - return ReOpenRocksDB(db_options, cfs_options); + return ReOpenRocksDB(storage_options); } // slaveof another pika master, just reopen if (!is_old_master && !is_new_master) { - return ReOpenRocksDB(db_options, cfs_options); + storage_options.cloud_fs_options.is_master = false; + return ReOpenRocksDB(storage_options); } // slave promotes to master if (!is_old_master && is_new_master) { + storage_options.cloud_fs_options.is_master = true; db_options["disable_auto_compactions"] = "false"; db_options["disable_auto_flush"] = "false"; - cfs_options["is_master"] = "true"; // compare manifest_sequence uint64_t local_manifest_sequence = 0; auto s = db_->GetManifestUpdateSequence(&local_manifest_sequence); @@ -560,7 +572,7 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { cfs_->GetMaxManifestSequenceFromCurrentManifest(db_->GetName(), &remote_manifest_sequence); // local version behind remote, directly reopen if (local_manifest_sequence < remote_manifest_sequence) { - return ReOpenRocksDB(db_options, cfs_options); + return ReOpenRocksDB(storage_options); } // local's version cannot beyond remote's, just holding extra data in memtables assert(local_manifest_sequence == remote_manifest_sequence); @@ -574,12 +586,44 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { rocksdb::FlushOptions fops; fops.wait = true; db_->Flush(fops, handles_); - //TODO - //cfs_->UploadManifest(); return Status::OK(); } return Status::OK(); } -#endif +Status Redis::ApplyWAL(const std::string& replication_sequence, int type, const std::string& content) { + rocksdb::ReplicationLogRecord::Type rtype = static_cast(type); + rocksdb::ReplicationLogRecord rlr; + rocksdb::DBCloud::ApplyReplicationLogRecordInfo info; + rlr.contents = content; + rlr.type = rtype; + + auto s = db_->ApplyReplicationLogRecord(rlr, repli_seq, nullptr, true, &info, rocksdb::DB::AR_EVICT_OBSOLETE_FILES); + LOG(WARNING) << "applying rocksdb WAL, rocksdb_id: " << index_ + << " replication sequence: " << replication_sequence + << " log record type: " << rtype + << " status: " << s.ToString(); + return s; +} + + +std::string LogListener::OnReplicationLogRecord(rocksdb::ReplicationLogRecord record) { + Redis* redis_inst = (Redis*)inst_; + //TODO(wangshaoyi): get from storage + int db_id = 0; + if (redis_inst->opened_) { + LOG(WANRING) << "rocksdb not opened yet, skip write binlog"; + return "0"; + } + std::string replication_sequence_str = std::to_string(counter_.fetch_add(1)); + auto s = ((StableLog*)(wal_writer_))->Logger()-> Put(record.contents, db_id, + redis_inst->GetIndex(), replication_sequence_str); + if (!s.ok()) { + LOG(ERROR) << "write binlog failed, db_id: " << db_id + << " rocksdb_id: " << rocksdb_id + << " replication sequence: " << replication_sequence_str; + } + return replication_sequence_str; +} +#endif } // namespace storage diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 1870ec618e..2f0fe033ad 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -40,7 +40,7 @@ using Slice = rocksdb::Slice; class LogListener; class Redis { public: - Redis(Storage* storage, int32_t index); + Redis(Storage* storage, int32_t index, void* wal_logger = nullptr); virtual ~Redis(); #ifdef USE_S3 @@ -112,6 +112,7 @@ class Redis { // Common Commands Status Open(const StorageOptions& storage_options, const std::string& db_path); + void Close(); virtual Status CompactRange(const DataType& option_type, const rocksdb::Slice* begin, const rocksdb::Slice* end, const ColumnFamilyType& type = kMetaAndData); @@ -438,6 +439,7 @@ class Redis { Storage* const storage_; std::shared_ptr lock_mgr_; #ifdef USE_S3 + std::string db_path_ = ""; rocksdb::DBCloud* db_ = nullptr; std::shared_ptr log_listener_; #else @@ -472,23 +474,28 @@ class Redis { Status OpenCloudEnv(rocksdb::CloudFileSystemOptions opts, const std::string& db_path); std::unique_ptr cloud_env_; rocksdb::CloudFileSystem* cfs_; - Status ReOpenRocksDB(const std::unordered_map& db_options, - const std::unordered_map& cfs_options); + Status ReOpenRocksDB(const storage::StorageOptions& opt); + Status ApplyWAL(const std::string& replication_sequence, int type, const std::string& content); #endif }; // TODO(wangshaoyi): implement details class LogListener : public rocksdb::ReplicationLogListener { public: - LogListener(int rocksdb_id, void* inst) : rocksdb_id_(rocksdb_id), counter_(0), inst_(inst) {} - std::string OnReplicationLogRecord(rocksdb::ReplicationLogRecord record) override { - auto id = counter_.fetch_add(1); - return std::to_string(id); + LogListener(int rocksdb_id, void* inst, void* wal_writer) + : rocksdb_id_(rocksdb_id), counter_(0), + inst_(inst), wal_writer_(wal_writer) {} + std::string OnReplicationLogRecord(rocksdb::ReplicationLogRecord record) override; + + // reset when switch master or process start + void ResetSequence(uint64_t seq) { + counter_.store(seq); } private: int rocksdb_id_ = 0; std::atomic counter_ = {0}; void* inst_ = nullptr; + void* wal_writer_ = nullptr; }; } // namespace storage diff --git a/src/storage/src/storage.cc b/src/storage/src/storage.cc index 7c3e80aa55..fb527d75e7 100644 --- a/src/storage/src/storage.cc +++ b/src/storage/src/storage.cc @@ -89,12 +89,16 @@ static std::string AppendSubDirectory(const std::string& db_path, int index) { } } -Status Storage::Open(const StorageOptions& storage_options, const std::string& db_path) { +Status Storage::Open(const StorageOptions& storage_options, const std::string& db_path, void* wal_writer) { mkpath(db_path.c_str(), 0755); int inst_count = db_instance_num_; for (int index = 0; index < inst_count; index++) { +#ifdef USE_S3 + insts_.emplace_back(std::make_unique(this, index, wal_writer)); +#else insts_.emplace_back(std::make_unique(this, index)); +#endif Status s = insts_.back()->Open(storage_options, AppendSubDirectory(db_path, index)); if (!s.ok()) { LOG(FATAL) << "open db failed" << s.ToString(); @@ -2315,11 +2319,7 @@ Status Storage::StopScanKeyNum() { return Status::OK(); } -#ifdef USE_S3 -rocksdb::DBCloud* Storage::GetDBByIndex(int index) { -#else rocksdb::DB* Storage::GetDBByIndex(int index) { -#endif if (index < 0 || index >= db_instance_num_) { LOG(WARNING) << "Invalid DB Index: " << index << "total: " << db_instance_num_; @@ -2401,7 +2401,7 @@ Status Storage::EnableAutoCompaction(const OptionType& option_type, void Storage::GetRocksDBInfo(std::string& info) { char temp[12] = {0}; for (const auto& inst : insts_) { - snprintf(temp, sizeof(temp), "instance:%2d", inst->GetIndex()); + snprintf(temp, sizeof(temp), "instance%d_", inst->GetIndex()); inst->GetRocksDBInfo(info, temp); } } @@ -2449,18 +2449,12 @@ void Storage::DisableWal(const bool is_wal_disable) { } #ifdef USE_S3 -Status Storage::SwitchMaster(bool is_old_master, bool is_new_master) { - Status s = Status::OK(); - for (const auto& inst : insts_) { - s = inst->SwitchMaster(is_old_master, is_new_master); - if (!s.ok()) { - LOG(WARNING) << "switch mode failed, when switch from " - << (is_old_master ? "master" : "slave") << " to " - << (is_new_master ? "master" : "slave"); - return s; - } - } - return s; + +Status Storage::ApplyWAL(int rocksdb_id, const std::string& repli_seq, + int type, const std::string& content) { + auto& inst = insts_[rocksdb_id]; + return inst->ApplyWAL(replication_sequence, type, content); } #endif + } // namespace storage From 9f2077636c1ba83ae01f52d4584f108daadff95b Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Mon, 25 Mar 2024 12:05:02 +0800 Subject: [PATCH 031/116] implement post manifest file to sentinel --- CMakeLists.txt | 6 ++++ include/pika_server.h | 18 ++++++++++++ src/pika_server.cc | 67 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c075c60af1..286526ab5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -845,6 +845,12 @@ target_link_directories(${PROJECT_NAME} PUBLIC ${INSTALL_LIBDIR_64} PUBLIC ${INSTALL_LIBDIR}) +if (USE_S3) +find_package(AWSSDK REQUIRED COMPONENTS ${SERVICE_COMPONENTS}) +target_link_libraries(${PROJECT_NAME} + ${AWSSDK_LINK_LIBRARIES}) +endif() + add_dependencies(${PROJECT_NAME} gflags gtest diff --git a/include/pika_server.h b/include/pika_server.h index a04137dcd7..5619f2ee4a 100644 --- a/include/pika_server.h +++ b/include/pika_server.h @@ -16,6 +16,10 @@ #include #include +#ifdef USE_S3 +#include +#endif + #include "src/cache/include/config.h" #include "net/include/bg_thread.h" #include "net/include/net_pubsub.h" @@ -307,6 +311,12 @@ class PikaServer : public pstd::noncopyable { std::shared_mutex bgsave_protector_; BgSaveInfo bgsave_info_; +#ifdef USE_S3 + bool UploadMetaToSentinel(const std::string& s3_bucket, const std::string& remote_path, + const std::string& content); +#endif + + /* * BGSlotsReload used */ @@ -508,6 +518,14 @@ class PikaServer : public pstd::noncopyable { int port_ = 0; time_t start_time_s_ = 0; +#ifdef USE_S3 + std::string sentinel_addr_; + //TODO(wangshaoyi): make it thread loacal + std::shared_ptr sentinel_client_; + std::string lease_term_id_; + std::string group_id_; +#endif + std::shared_mutex storage_options_rw_; storage::StorageOptions storage_options_; void InitStorageOptions(); diff --git a/src/pika_server.cc b/src/pika_server.cc index cc1d73333d..4592e117a9 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -11,6 +11,18 @@ #include #include #include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include "net/include/net_cli.h" #include "net/include/net_interfaces.h" #include "net/include/net_stats.h" @@ -26,6 +38,10 @@ #include "include/pika_rm.h" #include "include/pika_server.h" +using namespace Aws::Http; +using namespace Aws::Utils; +using namespace Aws::Client; + using pstd::Status; extern PikaServer* g_pika_server; extern std::unique_ptr g_pika_rm; @@ -1800,3 +1816,54 @@ void PikaServer::CacheConfigInit(cache::CacheConfig& cache_cfg) { cache_cfg.maxmemory_samples = g_pika_conf->cache_maxmemory_samples(); cache_cfg.lfu_decay_time = g_pika_conf->cache_lfu_decay_time(); } + +#ifdef USE_S3 +bool PikaServer::UploadMetaToSentinel(const std::string& s3_bucket, + const std::string& remote_path, + const std::string& content) { + Aws::String url(sentinel_addr_); + if (sentinel_client_ == nullptr) { + sentinel_client_ = CreateHttpClient(Aws::Client::ClientConfiguration()); + } + + // construct request body + Json::JsonValue request_doc; + request_doc.WithString("term_id", Aws::String(lease_term_id_)); + request_doc.WithString("group_id", Aws::String(group_id_)); + request_doc.WithString("s3_bucket", Aws::String(s3_bucket)); + request_doc.WithString("s3_path", Aws::String(remote_path)); + request_doc.WithString("content", Aws::String(content)); + + std::shared_ptr body = Aws::MakeShared("wsy demo"); + *body << request_doc.View().WriteReadable(); + + auto request = CreateHttpRequest(url, HttpMethod::HTTP_POST, + Aws::Utils::Stream::DefaultResponseStreamFactoryMethod); + request->AddContentBody(body); + body->seekg(0, body->end); + auto streamSize = body->tellg(); + body->seekg(0, body->beg); + Aws::StringStream contentLength; + contentLength << streamSize; + request->SetContentLength(contentLength.str()); + request->SetContentType("application/json"); + + auto response = sentinel_client_->MakeRequest(request); + if (response->HasClientError()) { + exit(1); + } + if (response->GetResponseCode() == HttpResponseCode::OK) { + LOG(ERROR) << "UploadMetaToSentinel success" + << " s3_bucket: " << s3_bucket + << " group_id: " << group_id_ + << " remote path: " << remote_path; + return true; + } + + LOG(ERROR) << "UploadMetaToSentinel failed " + << " s3_bucket: " << s3_bucket + << " group_id: " << group_id_ + << " remote path: " << remote_path; + return false; +} +#endif From 0dea2641ea8cf21518c44f180bef28a9c211ae25 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Mon, 25 Mar 2024 15:44:47 +0800 Subject: [PATCH 032/116] fix by review comments --- src/pika_repl_bgworker.cc | 2 ++ src/storage/src/redis.cc | 3 ++- src/storage/src/storage.cc | 20 +++++++++++++++++++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index 159c4b7d6b..204c4b23a0 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -152,6 +152,8 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { g_pika_rm->GetSyncMasterDBByName(DBInfo(worker->db_name_)); if (!db) { LOG(WARNING) << woker->db_name_ <<" not found"; + slave_db->SetReplState(ReplState::kTryConnect); + return; } db->Logger()->Put(binlog_res.binlog()); auto storage = g_pika_server->GetDB(worker->db_name_)->storage(); diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 0db395aeed..b1fc7d3f2d 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -598,7 +598,8 @@ Status Redis::ApplyWAL(const std::string& replication_sequence, int type, const rlr.contents = content; rlr.type = rtype; - auto s = db_->ApplyReplicationLogRecord(rlr, repli_seq, nullptr, true, &info, rocksdb::DB::AR_EVICT_OBSOLETE_FILES); + auto s = db_->ApplyReplicationLogRecord(rlr, replication_sequence, + nullptr, true, &info, rocksdb::DB::AR_EVICT_OBSOLETE_FILES); LOG(WARNING) << "applying rocksdb WAL, rocksdb_id: " << index_ << " replication sequence: " << replication_sequence << " log record type: " << rtype diff --git a/src/storage/src/storage.cc b/src/storage/src/storage.cc index fb527d75e7..c77f8221ce 100644 --- a/src/storage/src/storage.cc +++ b/src/storage/src/storage.cc @@ -2319,7 +2319,12 @@ Status Storage::StopScanKeyNum() { return Status::OK(); } + +#ifdef USE_S3 +rocksdb::DBCloud* Storage::GetDBByIndex(int index) { +#else rocksdb::DB* Storage::GetDBByIndex(int index) { +#endif if (index < 0 || index >= db_instance_num_) { LOG(WARNING) << "Invalid DB Index: " << index << "total: " << db_instance_num_; @@ -2401,7 +2406,7 @@ Status Storage::EnableAutoCompaction(const OptionType& option_type, void Storage::GetRocksDBInfo(std::string& info) { char temp[12] = {0}; for (const auto& inst : insts_) { - snprintf(temp, sizeof(temp), "instance%d_", inst->GetIndex()); + snprintf(temp, sizeof(temp), "instance%2d", inst->GetIndex()); inst->GetRocksDBInfo(info, temp); } } @@ -2449,6 +2454,19 @@ void Storage::DisableWal(const bool is_wal_disable) { } #ifdef USE_S3 +Status Storage::SwitchMaster(bool is_old_master, bool is_new_master) { + Status s = Status::OK(); + for (const auto& inst : insts_) { + s = inst->SwitchMaster(is_old_master, is_new_master); + if (!s.ok()) { + LOG(WARNING) << "switch mode failed, when switch from " + << (is_old_master ? "master" : "slave") << " to " + << (is_new_master ? "master" : "slave"); + return s; + } + } + return s; +} Status Storage::ApplyWAL(int rocksdb_id, const std::string& repli_seq, int type, const std::string& content) { From de3a17552000bb58ecc1e69a34372977772251e7 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Mon, 25 Mar 2024 16:42:02 +0800 Subject: [PATCH 033/116] add abstract class for binlog --- include/pika_binlog.h | 5 +++-- src/pika_db.cc | 2 +- src/pstd/include/pstd_wal.h | 23 +++++++++++++++++++++++ src/storage/include/storage/storage.h | 3 ++- src/storage/src/redis.cc | 11 +++++------ src/storage/src/redis.h | 15 ++++++++++----- src/storage/src/storage.cc | 4 ++-- 7 files changed, 46 insertions(+), 17 deletions(-) create mode 100644 src/pstd/include/pstd_wal.h diff --git a/include/pika_binlog.h b/include/pika_binlog.h index 872fe2a87a..fa36a6aac4 100644 --- a/include/pika_binlog.h +++ b/include/pika_binlog.h @@ -12,6 +12,7 @@ #include "pstd/include/pstd_mutex.h" #include "pstd/include/pstd_status.h" #include "pstd/include/noncopyable.h" +#include "pstd/include/pstd_wal.h" #include "include/pika_define.h" std::string NewFileName(const std::string& name, uint32_t current); @@ -43,7 +44,7 @@ class Version final : public pstd::noncopyable { std::shared_ptr save_; }; -class Binlog : public pstd::noncopyable { +class Binlog : public pstd::WalWriter { public: Binlog(std::string Binlog_path, int file_size = 100 * 1024 * 1024); virtual ~Binlog(); @@ -53,7 +54,7 @@ class Binlog : public pstd::noncopyable { virtual pstd::Status Put(const std::string& item); - virtual pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, const std::string& rep_seq); + virtual pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, const std::string& rep_seq) override; virtual pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); /* diff --git a/src/pika_db.cc b/src/pika_db.cc index 61f55e3891..86985eda26 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -40,7 +40,7 @@ DB::DB(std::string db_name, const std::string& db_path, storage_ = std::make_shared(g_pika_conf->db_instance_num(), g_pika_conf->default_slot_num(), g_pika_conf->classic_mode()); #ifdef USE_S3 - auto wal_writer = g_pika_rm->GetSyncMasterDBByName(db_name)->StableLogger().get(); + std::shared_ptr wal_writer = g_pika_rm->GetSyncMasterDBByName(db_name)->StableLogger()->Logger(); rocksdb::Status s = storage_->Open(g_pika_server->storage_options(), db_path_, wal_writer); #else rocksdb::Status s = storage_->Open(g_pika_server->storage_options(), db_path_); diff --git a/src/pstd/include/pstd_wal.h b/src/pstd/include/pstd_wal.h new file mode 100644 index 0000000000..514cff427f --- /dev/null +++ b/src/pstd/include/pstd_wal.h @@ -0,0 +1,23 @@ +// Copyright (c) 2024-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#ifndef __PSTD_WAL_H__ +#define __PSTD_WAL_H__ + +#include "pstd/include/pstd_status.h" +#include "pstd/include/noncopyable.h" + +namespace pstd { + +// virutal base class for wal writer +class WalWriter : public noncopyable { +public: + virtual ~WalWriter() {} + virtual Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, + const std::string& rep_seq) = 0; +}; +} // namespace pstd + +#endif // __PSTD_WAL_H__ \ No newline at end of file diff --git a/src/storage/include/storage/storage.h b/src/storage/include/storage/storage.h index 0b31fcaac8..c4a17ed9c8 100644 --- a/src/storage/include/storage/storage.h +++ b/src/storage/include/storage/storage.h @@ -24,6 +24,7 @@ #ifdef USE_S3 #include "rocksdb/cloud/db_cloud.h" #include "rocksdb/cloud/cloud_file_system.h" +#include "pstd/include/pstd_wal.h" #endif #include "slot_indexer.h" @@ -188,7 +189,7 @@ class Storage { Storage(int db_instance_num, int slot_num, bool is_classic_mode); ~Storage(); - Status Open(const StorageOptions& storage_options, const std::string& db_path, void* wal_writer = nullptr); + Status Open(const StorageOptions& storage_options, const std::string& db_path, std::shared_ptr wal_writer = nullptr); Status LoadCursorStartKey(const DataType& dtype, int64_t cursor, char* type, std::string* start_key); diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index b1fc7d3f2d..28d1d007f4 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -25,7 +25,7 @@ rocksdb::Comparator* ZSetsScoreKeyComparator() { return &zsets_score_key_compare; } -Redis::Redis(Storage* const s, int32_t index, void* wal_writer) +Redis::Redis(Storage* const s, int32_t index, std::shared_ptr wal_writer) : storage_(s), index_(index), lock_mgr_(std::make_shared(1000, 0, std::make_shared())), small_compaction_threshold_(5000), @@ -598,8 +598,7 @@ Status Redis::ApplyWAL(const std::string& replication_sequence, int type, const rlr.contents = content; rlr.type = rtype; - auto s = db_->ApplyReplicationLogRecord(rlr, replication_sequence, - nullptr, true, &info, rocksdb::DB::AR_EVICT_OBSOLETE_FILES); + auto s = db_->ApplyReplicationLogRecord(rlr, replication_sequence, nullptr, true, &info, rocksdb::DB::AR_EVICT_OBSOLETE_FILES); LOG(WARNING) << "applying rocksdb WAL, rocksdb_id: " << index_ << " replication sequence: " << replication_sequence << " log record type: " << rtype @@ -613,15 +612,15 @@ std::string LogListener::OnReplicationLogRecord(rocksdb::ReplicationLogRecord re //TODO(wangshaoyi): get from storage int db_id = 0; if (redis_inst->opened_) { - LOG(WANRING) << "rocksdb not opened yet, skip write binlog"; + LOG(WARNING) << "rocksdb not opened yet, skip write binlog"; return "0"; } std::string replication_sequence_str = std::to_string(counter_.fetch_add(1)); - auto s = ((StableLog*)(wal_writer_))->Logger()-> Put(record.contents, db_id, + auto s = wal_writer_->Put(record.contents, db_id, redis_inst->GetIndex(), replication_sequence_str); if (!s.ok()) { LOG(ERROR) << "write binlog failed, db_id: " << db_id - << " rocksdb_id: " << rocksdb_id + << " rocksdb_id: " << redis_inst->GetIndex() << " replication sequence: " << replication_sequence_str; } return replication_sequence_str; diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 2f0fe033ad..9eb25087b8 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -12,6 +12,7 @@ #ifdef USE_S3 #include "rocksdb/cloud/db_cloud.h" +#include "pstd/include/pstd_wal.h" #else #include "rocksdb/db.h" #endif @@ -40,7 +41,7 @@ using Slice = rocksdb::Slice; class LogListener; class Redis { public: - Redis(Storage* storage, int32_t index, void* wal_logger = nullptr); + Redis(Storage* storage, int32_t index, std::shared_ptr wal_writer = nullptr); virtual ~Redis(); #ifdef USE_S3 @@ -432,9 +433,13 @@ class Redis { inline Status SetFirstOrLastID(const rocksdb::Slice& key, StreamMetaValue& stream_meta, bool is_set_first, rocksdb::ReadOptions& read_options); +public: + bool opened_ = false; +#ifdef USE_S3 + Status ApplyWAL(const std::string& replication_sequence, int type, const std::string& content); +#endif private: - bool opened_ = false; int32_t index_ = 0; Storage* const storage_; std::shared_ptr lock_mgr_; @@ -442,6 +447,7 @@ class Redis { std::string db_path_ = ""; rocksdb::DBCloud* db_ = nullptr; std::shared_ptr log_listener_; + StorageOptions storage_options_; #else rocksdb::DB* db_ = nullptr; #endif @@ -475,14 +481,13 @@ class Redis { std::unique_ptr cloud_env_; rocksdb::CloudFileSystem* cfs_; Status ReOpenRocksDB(const storage::StorageOptions& opt); - Status ApplyWAL(const std::string& replication_sequence, int type, const std::string& content); #endif }; // TODO(wangshaoyi): implement details class LogListener : public rocksdb::ReplicationLogListener { public: - LogListener(int rocksdb_id, void* inst, void* wal_writer) + LogListener(int rocksdb_id, void* inst, std::shared_ptr wal_writer) : rocksdb_id_(rocksdb_id), counter_(0), inst_(inst), wal_writer_(wal_writer) {} std::string OnReplicationLogRecord(rocksdb::ReplicationLogRecord record) override; @@ -495,7 +500,7 @@ class LogListener : public rocksdb::ReplicationLogListener { int rocksdb_id_ = 0; std::atomic counter_ = {0}; void* inst_ = nullptr; - void* wal_writer_ = nullptr; + std::shared_ptr wal_writer_ = nullptr; }; } // namespace storage diff --git a/src/storage/src/storage.cc b/src/storage/src/storage.cc index c77f8221ce..4f7c3d4895 100644 --- a/src/storage/src/storage.cc +++ b/src/storage/src/storage.cc @@ -89,7 +89,7 @@ static std::string AppendSubDirectory(const std::string& db_path, int index) { } } -Status Storage::Open(const StorageOptions& storage_options, const std::string& db_path, void* wal_writer) { +Status Storage::Open(const StorageOptions& storage_options, const std::string& db_path, std::shared_ptr wal_writer) { mkpath(db_path.c_str(), 0755); int inst_count = db_instance_num_; @@ -2468,7 +2468,7 @@ Status Storage::SwitchMaster(bool is_old_master, bool is_new_master) { return s; } -Status Storage::ApplyWAL(int rocksdb_id, const std::string& repli_seq, +Status Storage::ApplyWAL(int rocksdb_id, const std::string& replication_sequence, int type, const std::string& content) { auto& inst = insts_[rocksdb_id]; return inst->ApplyWAL(replication_sequence, type, content); From b6c8d445ea744bbd9abd8aea97060139c63a5fb6 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 26 Mar 2024 17:50:41 +0800 Subject: [PATCH 034/116] fix by review comments --- src/pika_server.cc | 1 + src/storage/src/redis.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pika_server.cc b/src/pika_server.cc index 4592e117a9..e95da54a0f 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1867,3 +1867,4 @@ bool PikaServer::UploadMetaToSentinel(const std::string& s3_bucket, return false; } #endif + diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 9eb25087b8..d41e0fed7f 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -444,7 +444,7 @@ class Redis { Storage* const storage_; std::shared_ptr lock_mgr_; #ifdef USE_S3 - std::string db_path_ = ""; + std::string db_path_; rocksdb::DBCloud* db_ = nullptr; std::shared_ptr log_listener_; StorageOptions storage_options_; From aee839b58a2965625dcc2ce31a7851212bfd4a2f Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Wed, 27 Mar 2024 20:48:42 +0800 Subject: [PATCH 035/116] fix by review comments --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 286526ab5d..20d77bb304 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -846,7 +846,7 @@ target_link_directories(${PROJECT_NAME} PUBLIC ${INSTALL_LIBDIR}) if (USE_S3) -find_package(AWSSDK REQUIRED COMPONENTS ${SERVICE_COMPONENTS}) +find_package(AWSSDK REQUIRED COMPONENTS) target_link_libraries(${PROJECT_NAME} ${AWSSDK_LINK_LIBRARIES}) endif() From f134419fe21c4bf860c82cee3eba87573e769c54 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 26 Mar 2024 17:48:29 +0800 Subject: [PATCH 036/116] fix by review comments --- codis/go.mod | 4 ++++ codis/go.sum | 5 +++++ codis/pkg/proxy/redis/conn.go | 10 +++++++--- codis/pkg/proxy/redis/encoder.go | 27 +++++++++++++++++++++++++++ codis/pkg/utils/bufio2/bufio.go | 8 ++++++++ src/pika_db.cc | 5 +++-- src/pika_repl_bgworker.cc | 11 +++++------ src/storage/include/storage/storage.h | 2 ++ src/storage/src/redis.cc | 8 ++++++++ src/storage/src/redis.h | 5 ++--- src/storage/src/storage.cc | 6 ++++++ 11 files changed, 77 insertions(+), 14 deletions(-) diff --git a/codis/go.mod b/codis/go.mod index e4af7493af..1bd2e7bff6 100644 --- a/codis/go.mod +++ b/codis/go.mod @@ -18,6 +18,7 @@ require ( github.com/martini-contrib/render v0.0.0-20150707142108-ec18f8345a11 github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414 github.com/spinlock/jemalloc-go v0.0.0-20201010032256-e81523fb8524 + github.com/stretchr/testify v1.8.0 go.etcd.io/etcd/client/v2 v2.305.7 golang.org/x/net v0.17.0 gopkg.in/alexcesaro/statsd.v2 v2.0.0 @@ -26,10 +27,13 @@ require ( require ( github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0 // indirect github.com/coreos/go-semver v0.3.1 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect go.etcd.io/etcd/api/v3 v3.5.7 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.7 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/codis/go.sum b/codis/go.sum index f30f9e17be..9bdb0e40f8 100644 --- a/codis/go.sum +++ b/codis/go.sum @@ -41,8 +41,11 @@ github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414/go.mod h1:gi+0 github.com/spinlock/jemalloc-go v0.0.0-20201010032256-e81523fb8524 h1:U+dpuWn15gFCqZkqhpUd5a85X1Oe1Tb+DeGF3nn6Bvs= github.com/spinlock/jemalloc-go v0.0.0-20201010032256-e81523fb8524/go.mod h1:A/ik9Cf2cSgEVcmTWlvTfCxyFgoL1UP/WbevsdDeguc= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= go.etcd.io/etcd/api/v3 v3.5.7 h1:sbcmosSVesNrWOJ58ZQFitHMdncusIifYcrBfwrlJSY= go.etcd.io/etcd/api/v3 v3.5.7/go.mod h1:9qew1gCdDDLu+VwmeG+iFpL+QlpHTo7iubavdVDgCAA= go.etcd.io/etcd/client/pkg/v3 v3.5.7 h1:y3kf5Gbp4e4q7egZdn5T7W9TSHUvkClN6u+Rq9mEOmg= @@ -53,6 +56,8 @@ golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= gopkg.in/alexcesaro/statsd.v2 v2.0.0 h1:FXkZSCZIH17vLCO5sO2UucTHsH9pc+17F6pl3JVCwMc= gopkg.in/alexcesaro/statsd.v2 v2.0.0/go.mod h1:i0ubccKGzBVNBpdGV5MocxyA/XlLUJzA7SLonnE4drU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/codis/pkg/proxy/redis/conn.go b/codis/pkg/proxy/redis/conn.go index 1128c228c2..3cd4b51728 100644 --- a/codis/pkg/proxy/redis/conn.go +++ b/codis/pkg/proxy/redis/conn.go @@ -174,13 +174,17 @@ func (p *FlushEncoder) NeedFlush() bool { func (p *FlushEncoder) Flush(force bool) error { if force || p.NeedFlush() { + hdata := p.Conn.HeadData() if err := p.Conn.Flush(); err != nil { return err +hdata := p.Conn.HeadData() } p.nbuffered = 0 +var hdata string +var hsize int } - return nil -} + return := + := 0 func (p *FlushEncoder) Encode(resp *Resp) error { if err := p.Conn.Encode(resp, false); err != nil { @@ -196,6 +200,6 @@ func (p *FlushEncoder) EncodeMultiBulk(multi []*Resp) error { return err } else { p.nbuffered++ - return nil + , hdata: %s, hsize: %dreturn ni, hdata, hsizel } } diff --git a/codis/pkg/proxy/redis/encoder.go b/codis/pkg/proxy/redis/encoder.go index 5437403c45..425ecfeb26 100644 --- a/codis/pkg/proxy/redis/encoder.go +++ b/codis/pkg/proxy/redis/encoder.go @@ -1,4 +1,23 @@ // Copyright 2016 CodisLabs. All Rights Reserved. +R1 +1/Flush +nnnkk:wq + +R1 +1/Flush +nnnkk:wgit diff +find . -name bufio.go +vim pk ut bufio buf . +/flush +kkko +func (b *Writer) HeadData() string { + dsize := b.wpos + if dsize > 40 { + dsize = 40 + } + return string(b.buf[:dsize]) +} + // Licensed under the MIT (MIT-LICENSE.txt) license. package redis @@ -87,6 +106,14 @@ func (e *Encoder) EncodeMultiBulk(multi []*Resp, flush bool) error { return e.Err } +func (e *Encoder) HeadData() (string, int) { + return e.bw.HeadData() +} + +func (e *Encoder) HeadData() string { + return e.bw.HeadData(), b.wpos +} + func (e *Encoder) Flush() error { if e.Err != nil { return errors.Trace(ErrFailedEncoder) diff --git a/codis/pkg/utils/bufio2/bufio.go b/codis/pkg/utils/bufio2/bufio.go index c5dc3e5b79..38f91c29ba 100644 --- a/codis/pkg/utils/bufio2/bufio.go +++ b/codis/pkg/utils/bufio2/bufio.go @@ -202,6 +202,14 @@ func (b *Writer) Flush() error { return b.flush() } +func (b *Writer) HeadData() string { + dsize := b.wpos + if dsize > 20 { + dsize = 20 + } + return string(b.buf[:dsize]) +} + func (b *Writer) flush() error { if b.err != nil { return b.err diff --git a/src/pika_db.cc b/src/pika_db.cc index 86985eda26..ed47b16066 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -39,11 +39,12 @@ DB::DB(std::string db_name, const std::string& db_path, log_path_ = DBPath(log_path, "log_" + db_name_); storage_ = std::make_shared(g_pika_conf->db_instance_num(), g_pika_conf->default_slot_num(), g_pika_conf->classic_mode()); + rocksdb::Status s; #ifdef USE_S3 std::shared_ptr wal_writer = g_pika_rm->GetSyncMasterDBByName(db_name)->StableLogger()->Logger(); - rocksdb::Status s = storage_->Open(g_pika_server->storage_options(), db_path_, wal_writer); + s = storage_->Open(g_pika_server->storage_options(), db_path_, wal_writer); #else - rocksdb::Status s = storage_->Open(g_pika_server->storage_options(), db_path_); + s = storage_->Open(g_pika_server->storage_options(), db_path_); #endif pstd::CreatePath(db_path_); pstd::CreatePath(log_path_); diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index 204c4b23a0..f87281a73d 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -142,12 +142,11 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { slave_db->SetReplState(ReplState::kTryConnect); return; } - //Waiting for interface support - //get master binlog drop point - /* - * point =getpoint() - * if point.filenum>binlogitem_.filenum || (point.filenum==binlogitem_.filenum && point.offset>=binlogitem_.offset) - * {continue;}*/ + + if (storage->ShouldSkip(binlog_item.rocksdb_id(), binlog_item->content())) { + continue; + } + std::shared_ptr db = g_pika_rm->GetSyncMasterDBByName(DBInfo(worker->db_name_)); if (!db) { diff --git a/src/storage/include/storage/storage.h b/src/storage/include/storage/storage.h index c4a17ed9c8..4302f402f2 100644 --- a/src/storage/include/storage/storage.h +++ b/src/storage/include/storage/storage.h @@ -201,6 +201,8 @@ class Storage { Status ApplyWAL(int rocksdb_id, const std::string& repli_seq, int type, const std::string& content); + bool ShouldSkip(int rocksdb_id, const std::string& content); + // Strings Commands // Set key to hold the string value. if key diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 28d1d007f4..f52063d3d2 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -591,6 +591,14 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { return Status::OK(); } + +bool Redis::ShouldSkip(const std::string content) { + rocksdb::WriteBatch batch; + s = rocksdb::WriteBatchInternal::SetContents(&batch, std::move(record.contents)); + auto sq_number = db_->GetLatestSequenceNumber(); + return WriteBatchInternal::Sequence(&batch) != sq_number + 1; +} + Status Redis::ApplyWAL(const std::string& replication_sequence, int type, const std::string& content) { rocksdb::ReplicationLogRecord::Type rtype = static_cast(type); rocksdb::ReplicationLogRecord rlr; diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index d41e0fed7f..516eaad1fb 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -395,6 +395,8 @@ class Redis { } #ifdef USE_S3 + Status ApplyWAL(const std::string& replication_sequence, int type, const std::string& content); + Status ShouldSkip(const std::string content); Status SwitchMaster(bool is_old_master, bool is_new_master); void ResetLogListener(std::shared_ptr handle) { log_listener_ = handle; @@ -435,9 +437,6 @@ class Redis { public: bool opened_ = false; -#ifdef USE_S3 - Status ApplyWAL(const std::string& replication_sequence, int type, const std::string& content); -#endif private: int32_t index_ = 0; diff --git a/src/storage/src/storage.cc b/src/storage/src/storage.cc index 4f7c3d4895..4a4b001ab2 100644 --- a/src/storage/src/storage.cc +++ b/src/storage/src/storage.cc @@ -2473,6 +2473,12 @@ Status Storage::ApplyWAL(int rocksdb_id, const std::string& replication_sequence auto& inst = insts_[rocksdb_id]; return inst->ApplyWAL(replication_sequence, type, content); } + + +bool Storage::ShouldSkip(int rocksdb_id, const std::string& content) { + auto& inst = insts_[rocksdb_id]; + return inst->ShouldSkip(content); +} #endif } // namespace storage From c3fb68a41364cbc81fbe3fccd4eb770fbc89235c Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 26 Mar 2024 22:15:01 +0800 Subject: [PATCH 037/116] remove unused code --- codis/pkg/proxy/redis/conn.go | 10 +++------- codis/pkg/proxy/redis/encoder.go | 27 --------------------------- codis/pkg/utils/bufio2/bufio.go | 8 -------- 3 files changed, 3 insertions(+), 42 deletions(-) diff --git a/codis/pkg/proxy/redis/conn.go b/codis/pkg/proxy/redis/conn.go index 3cd4b51728..1128c228c2 100644 --- a/codis/pkg/proxy/redis/conn.go +++ b/codis/pkg/proxy/redis/conn.go @@ -174,17 +174,13 @@ func (p *FlushEncoder) NeedFlush() bool { func (p *FlushEncoder) Flush(force bool) error { if force || p.NeedFlush() { - hdata := p.Conn.HeadData() if err := p.Conn.Flush(); err != nil { return err -hdata := p.Conn.HeadData() } p.nbuffered = 0 -var hdata string -var hsize int } - return := - := 0 + return nil +} func (p *FlushEncoder) Encode(resp *Resp) error { if err := p.Conn.Encode(resp, false); err != nil { @@ -200,6 +196,6 @@ func (p *FlushEncoder) EncodeMultiBulk(multi []*Resp) error { return err } else { p.nbuffered++ - , hdata: %s, hsize: %dreturn ni, hdata, hsizel + return nil } } diff --git a/codis/pkg/proxy/redis/encoder.go b/codis/pkg/proxy/redis/encoder.go index 425ecfeb26..5437403c45 100644 --- a/codis/pkg/proxy/redis/encoder.go +++ b/codis/pkg/proxy/redis/encoder.go @@ -1,23 +1,4 @@ // Copyright 2016 CodisLabs. All Rights Reserved. -R1 -1/Flush -nnnkk:wq - -R1 -1/Flush -nnnkk:wgit diff -find . -name bufio.go -vim pk ut bufio buf . -/flush -kkko -func (b *Writer) HeadData() string { - dsize := b.wpos - if dsize > 40 { - dsize = 40 - } - return string(b.buf[:dsize]) -} - // Licensed under the MIT (MIT-LICENSE.txt) license. package redis @@ -106,14 +87,6 @@ func (e *Encoder) EncodeMultiBulk(multi []*Resp, flush bool) error { return e.Err } -func (e *Encoder) HeadData() (string, int) { - return e.bw.HeadData() -} - -func (e *Encoder) HeadData() string { - return e.bw.HeadData(), b.wpos -} - func (e *Encoder) Flush() error { if e.Err != nil { return errors.Trace(ErrFailedEncoder) diff --git a/codis/pkg/utils/bufio2/bufio.go b/codis/pkg/utils/bufio2/bufio.go index 38f91c29ba..c5dc3e5b79 100644 --- a/codis/pkg/utils/bufio2/bufio.go +++ b/codis/pkg/utils/bufio2/bufio.go @@ -202,14 +202,6 @@ func (b *Writer) Flush() error { return b.flush() } -func (b *Writer) HeadData() string { - dsize := b.wpos - if dsize > 20 { - dsize = 20 - } - return string(b.buf[:dsize]) -} - func (b *Writer) flush() error { if b.err != nil { return b.err From fcc46ce6cf9742f8d34ef7f54830475d932a9966 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 26 Mar 2024 22:18:20 +0800 Subject: [PATCH 038/116] remove unused code --- codis/go.mod | 4 ---- codis/go.sum | 5 ----- 2 files changed, 9 deletions(-) diff --git a/codis/go.mod b/codis/go.mod index 1bd2e7bff6..e4af7493af 100644 --- a/codis/go.mod +++ b/codis/go.mod @@ -18,7 +18,6 @@ require ( github.com/martini-contrib/render v0.0.0-20150707142108-ec18f8345a11 github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414 github.com/spinlock/jemalloc-go v0.0.0-20201010032256-e81523fb8524 - github.com/stretchr/testify v1.8.0 go.etcd.io/etcd/client/v2 v2.305.7 golang.org/x/net v0.17.0 gopkg.in/alexcesaro/statsd.v2 v2.0.0 @@ -27,13 +26,10 @@ require ( require ( github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0 // indirect github.com/coreos/go-semver v0.3.1 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect go.etcd.io/etcd/api/v3 v3.5.7 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.7 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/codis/go.sum b/codis/go.sum index 9bdb0e40f8..f30f9e17be 100644 --- a/codis/go.sum +++ b/codis/go.sum @@ -41,11 +41,8 @@ github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414/go.mod h1:gi+0 github.com/spinlock/jemalloc-go v0.0.0-20201010032256-e81523fb8524 h1:U+dpuWn15gFCqZkqhpUd5a85X1Oe1Tb+DeGF3nn6Bvs= github.com/spinlock/jemalloc-go v0.0.0-20201010032256-e81523fb8524/go.mod h1:A/ik9Cf2cSgEVcmTWlvTfCxyFgoL1UP/WbevsdDeguc= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= go.etcd.io/etcd/api/v3 v3.5.7 h1:sbcmosSVesNrWOJ58ZQFitHMdncusIifYcrBfwrlJSY= go.etcd.io/etcd/api/v3 v3.5.7/go.mod h1:9qew1gCdDDLu+VwmeG+iFpL+QlpHTo7iubavdVDgCAA= go.etcd.io/etcd/client/pkg/v3 v3.5.7 h1:y3kf5Gbp4e4q7egZdn5T7W9TSHUvkClN6u+Rq9mEOmg= @@ -56,8 +53,6 @@ golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= gopkg.in/alexcesaro/statsd.v2 v2.0.0 h1:FXkZSCZIH17vLCO5sO2UucTHsH9pc+17F6pl3JVCwMc= gopkg.in/alexcesaro/statsd.v2 v2.0.0/go.mod h1:i0ubccKGzBVNBpdGV5MocxyA/XlLUJzA7SLonnE4drU= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 124dd48b6aa7df5d2ee1e77de3ecbda15afc8dac Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 26 Mar 2024 22:50:50 +0800 Subject: [PATCH 039/116] implement rocksdb_cloud's upload_meta_func --- src/pika_server.cc | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/src/pika_server.cc b/src/pika_server.cc index e95da54a0f..cdc16a2b06 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1419,12 +1419,8 @@ void PikaServer::InitStorageOptions() { cloud_fs_opts.src_bucket.SetRegion(g_pika_conf->cloud_src_bucket_region()); cloud_fs_opts.dest_bucket.SetBucketName(g_pika_conf->cloud_dest_bucket_suffix(), g_pika_conf->cloud_dest_bucket_prefix()); cloud_fs_opts.dest_bucket.SetRegion(g_pika_conf->cloud_dest_bucket_region()); - - //TODO(wangshoyi): implement upload rocksdb-cloud meta to dashboard - storage_options.cloud_fs_options.upload_meta_func = [](const std::string& a, const std::string& b, const std::string& c) ->bool { - LOG(WARNING) << "args: " << a << " : " << b << " : " << c; - return true; - }; + storage_options.cloud_fs_options.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); #endif } @@ -1818,14 +1814,43 @@ void PikaServer::CacheConfigInit(cache::CacheConfig& cache_cfg) { } #ifdef USE_S3 -bool PikaServer::UploadMetaToSentinel(const std::string& s3_bucket, - const std::string& remote_path, - const std::string& content) { +bool PikaServer::UploadMetaToSentinel(const std::string& local_path, + const std::string& s3_bucket, + const std::string& object_path) { Aws::String url(sentinel_addr_); if (sentinel_client_ == nullptr) { sentinel_client_ = CreateHttpClient(Aws::Client::ClientConfiguration()); } + FILE * fp; + long f_size; + char * buffer; + size_t result; + + fp = fopen(local_path.c_str(), "rb"); + if (fp == nullptr) { + LOG(WANRING) << "read file failed, local_path: " << local_path + << " error msg: " << strerror(errno); + return false; + } + + fseek(fp, 0 , SEEK_END); + f_size = ftell(fp); + rewind(fp); + + DEFER { + fclose(fp); + . delete [] buffer; + }; + + buffer = new char[f_size]; + result = fread(buffer, 1, f_size, fp); + if (result != f_size) { + LOG(WANRING) << "read file failed, local_path: " << local_path + << " fread size: " << result << "fsize: " << f_size; + } + std::string content(buffer, result); + // construct request body Json::JsonValue request_doc; request_doc.WithString("term_id", Aws::String(lease_term_id_)); From 320cb9f887478ac3e370adf699bdfa2ab9273284 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 26 Mar 2024 22:51:39 +0800 Subject: [PATCH 040/116] implement rocksdb_cloud's upload_meta_func --- src/pika_server.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pika_server.cc b/src/pika_server.cc index cdc16a2b06..1841caa522 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -5,6 +5,7 @@ #include #include +#include #include #include #include From 4d6d1caca1806473c4a4e18e9a7d99838bf8b44b Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 26 Mar 2024 22:52:12 +0800 Subject: [PATCH 041/116] implement rocksdb_cloud's upload_meta_func --- src/pika_server.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pika_server.cc b/src/pika_server.cc index 1841caa522..ebb279ba5e 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -3,6 +3,7 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. +#include #include #include #include From 2b212d0226eeea501072aeefdc56b7fa9934d1ac Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Wed, 27 Mar 2024 10:10:28 +0800 Subject: [PATCH 042/116] support upload meta to sentinel --- src/pika_server.cc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/pika_server.cc b/src/pika_server.cc index ebb279ba5e..fcbcd92cea 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1831,8 +1831,9 @@ bool PikaServer::UploadMetaToSentinel(const std::string& local_path, fp = fopen(local_path.c_str(), "rb"); if (fp == nullptr) { - LOG(WANRING) << "read file failed, local_path: " << local_path - << " error msg: " << strerror(errno); + LOG(WANRING) << "read file failed," + << " local_path: " << local_path + << " error: " << strerror(errno); return false; } @@ -1841,8 +1842,8 @@ bool PikaServer::UploadMetaToSentinel(const std::string& local_path, rewind(fp); DEFER { - fclose(fp); . delete [] buffer; + fclose(fp); }; buffer = new char[f_size]; @@ -1861,7 +1862,7 @@ bool PikaServer::UploadMetaToSentinel(const std::string& local_path, request_doc.WithString("s3_path", Aws::String(remote_path)); request_doc.WithString("content", Aws::String(content)); - std::shared_ptr body = Aws::MakeShared("wsy demo"); + std::shared_ptr body = Aws::MakeShared(""); *body << request_doc.View().WriteReadable(); auto request = CreateHttpRequest(url, HttpMethod::HTTP_POST, @@ -1877,7 +1878,12 @@ bool PikaServer::UploadMetaToSentinel(const std::string& local_path, auto response = sentinel_client_->MakeRequest(request); if (response->HasClientError()) { - exit(1); + LOG(ERROR) << "UploadMetaToSentinel failed" + << " s3_bucket: " << s3_bucket + << " group_id: " << group_id_ + << " remote path: " << remote_path; + return false; + } if (response->GetResponseCode() == HttpResponseCode::OK) { LOG(ERROR) << "UploadMetaToSentinel success" From caf5e18f4106ffc8182356867bba7f4e0e1e51f4 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Wed, 27 Mar 2024 14:16:49 +0800 Subject: [PATCH 043/116] fix by review comments --- src/pika_server.cc | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/pika_server.cc b/src/pika_server.cc index fcbcd92cea..bad00c6e75 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1823,13 +1823,8 @@ bool PikaServer::UploadMetaToSentinel(const std::string& local_path, if (sentinel_client_ == nullptr) { sentinel_client_ = CreateHttpClient(Aws::Client::ClientConfiguration()); } - - FILE * fp; - long f_size; - char * buffer; - size_t result; - fp = fopen(local_path.c_str(), "rb"); + FILE* fp = fopen(local_path.c_str(), "rb"); if (fp == nullptr) { LOG(WANRING) << "read file failed," << " local_path: " << local_path @@ -1838,16 +1833,16 @@ bool PikaServer::UploadMetaToSentinel(const std::string& local_path, } fseek(fp, 0 , SEEK_END); - f_size = ftell(fp); + long f_size = ftell(fp); rewind(fp); + char* buffer = new char[f_size]; DEFER { . delete [] buffer; fclose(fp); }; - buffer = new char[f_size]; - result = fread(buffer, 1, f_size, fp); + size_t result = fread(buffer, 1, f_size, fp); if (result != f_size) { LOG(WANRING) << "read file failed, local_path: " << local_path << " fread size: " << result << "fsize: " << f_size; From 936cf33083725bca1528ece6c6fc6a6c2ca85d29 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Thu, 28 Mar 2024 08:20:07 +0800 Subject: [PATCH 044/116] change conf for cloud mode --- CMakeLists.txt | 8 +++++--- conf/pika.conf | 14 +++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 20d77bb304..eb18ea339e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -575,6 +575,7 @@ endif() set(PROTOBUF_INCLUDE_DIR ${INSTALL_INCLUDEDIR}) set(PROTOBUF_LIBRARY ${INSTALL_LIBDIR}/${LIB_PROTOBUF}) set(PROTOBUF_PROTOC ${STAGED_INSTALL_PREFIX}/bin/protoc) +set(USE_S3 1) if (USE_S3) ExternalProject_Add(rocksdb @@ -588,9 +589,10 @@ ExternalProject_Add(rocksdb ${LIBGPERF_NAME} ${LIBJEMALLOC_NAME} URL - https://github.com/longfar-ncy/rocksdb-cloud/archive/refs/heads/pika.zip + #temporary for debug, skip download from github + http://10.224.129.40:8000/rocksdb_cloud.tar.gz URL_HASH - MD5=761d1f7ccd6ea9aa86c1f2ce0e246a26 + MD5=fbcf0e166bfddf367063caa1dc583db8 DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND @@ -898,7 +900,7 @@ target_link_libraries(${PROJECT_NAME} if (USE_S3) target_link_libraries(${PROJECT_NAME} - libaws-cpp-sdk-core.so + libaws-cpp-sdk-core.so libaws-cpp-sdk-transfer.so libaws-cpp-sdk-kinesis.so libaws-cpp-sdk-s3.so diff --git a/conf/pika.conf b/conf/pika.conf index 37f652300e..e8792ee220 100644 --- a/conf/pika.conf +++ b/conf/pika.conf @@ -513,30 +513,30 @@ cache-lfu-decay-time: 1 # aclfile : ../conf/users.acl ###################################################################### -# rocksdb-cloud options +# rocksdb-cloud options ####################################################################### # Normally, the AWS SDK will automatically determine the endpoint based on the selected region. # However, in special cases, you can manually specify the URL of the endpoint through this configuration, # such as local development. # Default: "" -# cloud-endpoint-override : +cloud-endpoint-override : 10.224.129.40:9000 # The aws access key id and aws secret key used for authentication when accessing aws s3. -cloud-access-key : -cloud-secret-key : +cloud-access-key : minioadmin +cloud-secret-key : minioadmin # The source bucket name prefix and suffix to use for storage on s3 -# The final bucket name is [prefix][suffix] +# The final bucket name is [prefix][suffix] # Default: "pika." # cloud-src-bucket-prefix : # Default: "database" # cloud-src-bucket-suffix : -# The source bucket region +# The source bucket region # cloud-src-bucket-region : # Configuration information of the destination bucket # cloud-dest-bucket-prefix : # cloud-dest-bucket-suffix : -# cloud-dest-bucket-region : \ No newline at end of file +# cloud-dest-bucket-region : From 152bd125e8aa3dbfa47ea512798f04e780fe58d6 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Thu, 28 Mar 2024 12:41:00 +0800 Subject: [PATCH 045/116] fix compile error --- src/storage/src/redis.cc | 22 +++++++++++----------- src/storage/src/redis.h | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index f52063d3d2..07df1a18b5 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -6,6 +6,7 @@ #include #include "rocksdb/env.h" +#include "db/write_batch_internal.h" #include "src/redis.h" #include "rocksdb/options.h" @@ -458,7 +459,7 @@ void Redis::SetCompactRangeOptions(const bool is_canceled) { default_compact_range_options_.canceled = new std::atomic(is_canceled); } else { default_compact_range_options_.canceled->store(is_canceled); - } + } } Status Redis::GetProperty(const std::string& property, uint64_t* out) { @@ -510,14 +511,14 @@ void Redis::ScanDatabase() { } #ifdef USE_S3 -Status Redis::OpenCloudEnv(rocksdb::CloudFileSystemOptions opts, const std::string& db_path) { +Status Redis::OpenCloudEnv(rocksdb::CloudFileSystemOptions opts, const std::string& db_path) { std::string s3_path = db_path[0] == '.' ? db_path.substr(1) : db_path; opts.src_bucket.SetObjectPath(s3_path); opts.dest_bucket.SetObjectPath(s3_path); Status s = rocksdb::CloudFileSystem::NewAwsFileSystem( - rocksdb::FileSystem::Default(), - opts, - nullptr, + rocksdb::FileSystem::Default(), + opts, + nullptr, &cfs_ ); if (s.ok()) { @@ -591,12 +592,11 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { return Status::OK(); } - -bool Redis::ShouldSkip(const std::string content) { +bool Redis::ShouldSkip(const std::string& content) { rocksdb::WriteBatch batch; - s = rocksdb::WriteBatchInternal::SetContents(&batch, std::move(record.contents)); + auto s = rocksdb::WriteBatchInternal::SetContents(&batch, content); auto sq_number = db_->GetLatestSequenceNumber(); - return WriteBatchInternal::Sequence(&batch) != sq_number + 1; + return rocksdb::WriteBatchInternal::Sequence(&batch) != sq_number + 1; } Status Redis::ApplyWAL(const std::string& replication_sequence, int type, const std::string& content) { @@ -627,11 +627,11 @@ std::string LogListener::OnReplicationLogRecord(rocksdb::ReplicationLogRecord re auto s = wal_writer_->Put(record.contents, db_id, redis_inst->GetIndex(), replication_sequence_str); if (!s.ok()) { - LOG(ERROR) << "write binlog failed, db_id: " << db_id + LOG(ERROR) << "write binlog failed, db_id: " << db_id << " rocksdb_id: " << redis_inst->GetIndex() << " replication sequence: " << replication_sequence_str; } - return replication_sequence_str; + return replication_sequence_str; } #endif } // namespace storage diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 516eaad1fb..ff66fd5ff7 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -396,7 +396,7 @@ class Redis { #ifdef USE_S3 Status ApplyWAL(const std::string& replication_sequence, int type, const std::string& content); - Status ShouldSkip(const std::string content); + bool ShouldSkip(const std::string& content); Status SwitchMaster(bool is_old_master, bool is_new_master); void ResetLogListener(std::shared_ptr handle) { log_listener_ = handle; @@ -473,7 +473,7 @@ class Redis { Status UpdateSpecificKeyStatistics(const DataType& dtype, const std::string& key, uint64_t count); Status UpdateSpecificKeyDuration(const DataType& dtype, const std::string& key, uint64_t duration); Status AddCompactKeyTaskIfNeeded(const DataType& dtype, const std::string& key, uint64_t count, uint64_t duration); - + #ifdef USE_S3 // rocksdb-cloud Status OpenCloudEnv(rocksdb::CloudFileSystemOptions opts, const std::string& db_path); From 81a31962216506b96e580fd7ef389c1545904afc Mon Sep 17 00:00:00 2001 From: baixin Date: Wed, 20 Mar 2024 18:06:00 +0800 Subject: [PATCH 046/116] =?UTF-8?q?Auto-increment=20TermID=E3=80=81UPLoad?= =?UTF-8?q?=20Manifest=20to=20S3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- codis/pkg/models/group.go | 1 + codis/pkg/topom/topom_api.go | 21 +++++++++++++++++++++ codis/pkg/topom/topom_group.go | 21 +++++++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/codis/pkg/models/group.go b/codis/pkg/models/group.go index 092ec2f117..88aa8cbc21 100644 --- a/codis/pkg/models/group.go +++ b/codis/pkg/models/group.go @@ -7,6 +7,7 @@ const MaxGroupId = 9999 type Group struct { Id int `json:"id"` + TermId int `json:"term_id"` Servers []*GroupServer `json:"servers"` Promoting struct { diff --git a/codis/pkg/topom/topom_api.go b/codis/pkg/topom/topom_api.go index 6b8d9cc0e3..dc5c0fa4e5 100644 --- a/codis/pkg/topom/topom_api.go +++ b/codis/pkg/topom/topom_api.go @@ -99,6 +99,7 @@ func newApiServer(t *Topom) http.Handler { r.Put("/remove/:xauth/:addr", api.SyncRemoveAction) }) r.Get("/info/:addr", api.InfoServer) + r.Put("/upload-s3/:xauth/:gid/:tid/:manifest", api.UploadManifestToS3) }) r.Group("/slots", func(r martini.Router) { r.Group("/action", func(r martini.Router) { @@ -500,6 +501,26 @@ func (s *apiServer) SyncRemoveAction(params martini.Params) (int, string) { } } +func (s *apiServer) UploadManifestToS3(params martini.Params) (int, string) { + if err := s.verifyXAuth(params); err != nil { + return rpc.ApiResponseError(err) + } + gid, err := s.parseInteger(params, "gid") + if err != nil { + return rpc.ApiResponseError(err) + } + tid, err := s.parseInteger(params, "tid") + if err != nil { + return rpc.ApiResponseError(err) + } + + if err := s.topom.UploadManifestToS3(gid, tid); err != nil { + return rpc.ApiResponseError(err) + } else { + return rpc.ApiResponseJson("OK") + } +} + func (s *apiServer) SlotCreateAction(params martini.Params) (int, string) { if err := s.verifyXAuth(params); err != nil { return rpc.ApiResponseError(err) diff --git a/codis/pkg/topom/topom_group.go b/codis/pkg/topom/topom_group.go index 517fb2da4c..1a6e1178bb 100644 --- a/codis/pkg/topom/topom_group.go +++ b/codis/pkg/topom/topom_group.go @@ -517,6 +517,7 @@ func (s *Topom) doSwitchGroupMaster(g *models.Group, newMasterAddr string, newMa g.Servers[newMasterIndex].Role = models.RoleMaster g.Servers[newMasterIndex].Action.State = models.ActionSynced g.Servers[0], g.Servers[newMasterIndex] = g.Servers[newMasterIndex], g.Servers[0] + g.TermId++ defer func() { err = s.storeUpdateGroup(g) // clean cache whether err is nil or not @@ -784,3 +785,23 @@ func (s *Topom) newSyncActionExecutor(addr string) (func() error, error) { } }, nil } + +func (s *Topom) UploadManifestToS3(gid int, tid int) error { + s.mu.Lock() + defer s.mu.Unlock() + ctx, err := s.newContext() + if err != nil { + return err + } + + if gid <= 0 || gid > models.MaxGroupId { + return errors.Errorf("invalid group id = %d, out of range", gid) + } + if ctx.group[gid].TermId == tid { + //waiting for upload to s3 + return nil + } else { + return errors.Errorf("group-[%d] term id:[%d] not equal to pika term id:[%d]", + gid, ctx.group[gid].TermId, tid) + } +} From c4df9b1953ceaadc3653ef6eb16959c317a56002 Mon Sep 17 00:00:00 2001 From: baixin Date: Fri, 22 Mar 2024 11:54:34 +0800 Subject: [PATCH 047/116] send pkping --- codis/config/dashboard.toml | 1 + codis/example/dashboard.py | 3 +- codis/pkg/topom/config.go | 2 + codis/pkg/topom/topom.go | 2 +- codis/pkg/topom/topom_sentinel.go | 25 ++++++- codis/pkg/utils/redis/client.go | 10 +++ codis/pkg/utils/redis/codis_sentinel.go | 89 ++++++++++++++++++++++++- 7 files changed, 128 insertions(+), 4 deletions(-) diff --git a/codis/config/dashboard.toml b/codis/config/dashboard.toml index 44ef06213a..34dac74dd4 100644 --- a/codis/config/dashboard.toml +++ b/codis/config/dashboard.toml @@ -44,4 +44,5 @@ sentinel_down_after = "30s" sentinel_failover_timeout = "5m" sentinel_notification_script = "" sentinel_client_reconfig_script = "" +sentinel_pika_local_model = true diff --git a/codis/example/dashboard.py b/codis/example/dashboard.py index 62568cc83a..8889859fbb 100644 --- a/codis/example/dashboard.py +++ b/codis/example/dashboard.py @@ -43,7 +43,8 @@ def _open_config(admin_port, product_name, product_auth=None): f.write('sentinel_failover_timeout = "10m"\n') path = os.getcwd() f.write('sentinel_notification_script = "{}"\n'.format(os.path.join(path, "sentinel_notify.sh"))) - f.write('sentinel_client_reconfig_script = "{}"\n'.format(os.path.join(path, "sentinel_reconfig.sh"))) + f.write('sentinel_client_reconfig_script = "{}"\n'.format(os.path.join(path, "sentinel_reconfig.sh")) + f.write('sentinel_pika_local_model = "true"\n')) return config diff --git a/codis/pkg/topom/config.go b/codis/pkg/topom/config.go index d1e0d44e5f..9ae508d9a3 100644 --- a/codis/pkg/topom/config.go +++ b/codis/pkg/topom/config.go @@ -61,6 +61,7 @@ sentinel_down_after = "30s" sentinel_failover_timeout = "5m" sentinel_notification_script = "" sentinel_client_reconfig_script = "" +sentinel_pika_local_model = true ` type Config struct { @@ -95,6 +96,7 @@ type Config struct { SentinelFailoverTimeout timesize.Duration `toml:"sentinel_failover_timeout" json:"sentinel_failover_timeout"` SentinelNotificationScript string `toml:"sentinel_notification_script" json:"sentinel_notification_script"` SentinelClientReconfigScript string `toml:"sentinel_client_reconfig_script" json:"sentinel_client_reconfig_script"` + SentinelPikaLocalModel bool `toml:"sentinel_pika_local_model" json:"sentinel_pika_local_model"` } func NewDefaultConfig() *Config { diff --git a/codis/pkg/topom/topom.go b/codis/pkg/topom/topom.go index f2c34f6b58..ca7186494e 100644 --- a/codis/pkg/topom/topom.go +++ b/codis/pkg/topom/topom.go @@ -197,7 +197,7 @@ func (s *Topom) Start(routines bool) error { return nil } - // Check the status of all masters and slaves every 5 seconds + // Check the status of all masters and slaves every 10 seconds gxruntime.GoUnterminated(func() { for !s.IsClosed() { if s.IsOnline() { diff --git a/codis/pkg/topom/topom_sentinel.go b/codis/pkg/topom/topom_sentinel.go index 3ea8b3cd9f..cec28cb809 100644 --- a/codis/pkg/topom/topom_sentinel.go +++ b/codis/pkg/topom/topom_sentinel.go @@ -16,12 +16,21 @@ func (s *Topom) CheckStateAndSwitchSlavesAndMasters(filter func(index int, g *mo return err } + var states []*redis.ReplicationState groupServers := filterGroupServer(ctx.getGroupServers(), filter) if len(groupServers) == 0 { return nil } + if s.Config().SentinelPikaLocalModel { + states = checkGroupServersReplicationState(s.Config(), groupServers) + } else { + var groups_info map[int]int + for gid, _ := range groupServers { + groups_info[gid] = ctx.group[gid].TermId + } + states = checkGroupServersPKPingState(s.Config(), groupServers, groups_info) + } - states := checkGroupServersReplicationState(s.Config(), groupServers) var slaveOfflineGroups []*models.Group var masterOfflineGroups []*models.Group var recoveredGroupServersState []*redis.ReplicationState @@ -111,6 +120,20 @@ func checkGroupServersReplicationState(conf *Config, gs map[int][]*models.GroupS return sentinel.RefreshMastersAndSlavesClient(config.ParallelSyncs, gs) } +func checkGroupServersPKPingState(conf *Config, gs map[int][]*models.GroupServer, groups_info map[int]int) []*redis.ReplicationState { + config := &redis.MonitorConfig{ + Quorum: conf.SentinelQuorum, + ParallelSyncs: conf.SentinelParallelSyncs, + DownAfter: conf.SentinelDownAfter.Duration(), + FailoverTimeout: conf.SentinelFailoverTimeout.Duration(), + NotificationScript: conf.SentinelNotificationScript, + ClientReconfigScript: conf.SentinelClientReconfigScript, + } + + sentinel := redis.NewCodisSentinel(conf.ProductName, conf.ProductAuth) + return sentinel.RefreshMastersAndSlavesClientWithPKPing(config.ParallelSyncs, gs, groups_info) +} + func filterGroupServer(groupServers map[int][]*models.GroupServer, filter func(index int, gs *models.GroupServer) bool) map[int][]*models.GroupServer { filteredGroupServers := make(map[int][]*models.GroupServer) diff --git a/codis/pkg/utils/redis/client.go b/codis/pkg/utils/redis/client.go index 5f751321db..3063b781e9 100644 --- a/codis/pkg/utils/redis/client.go +++ b/codis/pkg/utils/redis/client.go @@ -436,6 +436,16 @@ func (c *Client) Role() (string, error) { } } +func (c *Client) PKPing(group_info []byte) (*InfoReplication, error) { + //waiting to complete + text, err := redigo.String(c.Do("PKPing", group_info)) + if err != nil { + return nil, errors.Trace(err) + } + + return parseInfoReplication(text) +} + var ErrClosedPool = errors.New("use of closed redis pool") type Pool struct { diff --git a/codis/pkg/utils/redis/codis_sentinel.go b/codis/pkg/utils/redis/codis_sentinel.go index 4d1ce73bed..505ea4e765 100644 --- a/codis/pkg/utils/redis/codis_sentinel.go +++ b/codis/pkg/utils/redis/codis_sentinel.go @@ -2,6 +2,7 @@ package redis import ( "context" + "encoding/json" "fmt" "time" @@ -102,7 +103,6 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClient(parallel int, groupServers defer func() { <-limit }() - info, err := s.infoReplicationDispatch(server.Addr) state := &ReplicationState{ Index: index, @@ -131,6 +131,80 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClient(parallel int, groupServers return results } +func (s *CodisSentinel) RefreshMastersAndSlavesClientWithPKPing(parallel int, groupServers map[int][]*models.GroupServer, groups_info map[int]int) []*ReplicationState { + if len(groupServers) == 0 { + s.printf("there's no groups") + return nil + } + + parallel = math2.MaxInt(10, parallel) + limit := make(chan struct{}, parallel) + defer close(limit) + + type GroupInfo struct { + GroupId int `json:"group_id"` + TermId int `json:"term_id"` + MastersAddr []string `json:"master_addr"` + SlavesAddr []string `json:"slaves_addr"` + } + + var fut sync2.Future + for gid, servers := range groupServers { + var group_info GroupInfo + group_info.GroupId = gid + group_info.TermId = groups_info[gid] + for _, server := range servers { + if server.Role == models.RoleMaster { + group_info.MastersAddr = append(group_info.MastersAddr, server.Addr) + } + + if server.Role == models.RoleSlave { + group_info.SlavesAddr = append(group_info.SlavesAddr, server.Addr) + } + } + + //build pkping parameter + group_inf_json, err := json.Marshal(group_info) + if err != nil { + log.WarnErrorf(err, "json: %s Serialization Failure failed", group_inf_json) + } + for index, server := range servers { + limit <- struct{}{} + fut.Add() + + go func(gid, index int, server *models.GroupServer) { + defer func() { + <-limit + }() + //info, err := s.infoReplicationDispatch(server.Addr) + info, err := s.PkPingDispatch(server.Addr, group_inf_json) + state := &ReplicationState{ + Index: index, + GroupID: gid, + Addr: server.Addr, + Server: server, + Replication: info, + Err: err, + } + fut.Done(fmt.Sprintf("%d_%d", gid, index), state) + }(gid, index, server) + } + } + + results := make([]*ReplicationState, 0) + + for _, v := range fut.Wait() { + switch val := v.(type) { + case *ReplicationState: + if val != nil { + results = append(results, val) + } + } + } + + return results +} + func (s *CodisSentinel) infoReplicationDispatch(addr string) (*InfoReplication, error) { var ( client *Client @@ -143,3 +217,16 @@ func (s *CodisSentinel) infoReplicationDispatch(addr string) (*InfoReplication, defer client.Close() return client.InfoReplication() } + +func (s *CodisSentinel) PkPingDispatch(addr string, group_info []byte) (*InfoReplication, error) { + var ( + client *Client + err error + ) + if client, err = NewClient(addr, s.Auth, time.Second); err != nil { + log.WarnErrorf(err, "create redis client to %s failed", addr) + return nil, err + } + defer client.Close() + return client.PKPing(group_info) +} From ba378560efaa5d5383c43c878fd531dc92cfa59a Mon Sep 17 00:00:00 2001 From: baixin Date: Sun, 24 Mar 2024 20:40:39 +0800 Subject: [PATCH 048/116] manifest upload to s3 --- codis/pkg/topom/config.go | 4 +++ codis/pkg/topom/topom_api.go | 13 +++++--- codis/pkg/topom/topom_group.go | 56 ++++++++++++++++++++++++++++++---- 3 files changed, 62 insertions(+), 11 deletions(-) diff --git a/codis/pkg/topom/config.go b/codis/pkg/topom/config.go index 9ae508d9a3..a2bd6ae4bb 100644 --- a/codis/pkg/topom/config.go +++ b/codis/pkg/topom/config.go @@ -97,6 +97,10 @@ type Config struct { SentinelNotificationScript string `toml:"sentinel_notification_script" json:"sentinel_notification_script"` SentinelClientReconfigScript string `toml:"sentinel_client_reconfig_script" json:"sentinel_client_reconfig_script"` SentinelPikaLocalModel bool `toml:"sentinel_pika_local_model" json:"sentinel_pika_local_model"` + CloudAccessKey string `toml:"cloud_access_key" json:"cloud_access_key"` + CloudSecretKey string `toml:"cloud_secret_key" json:"cloud_secret_key"` + CloudEndPointOverride string `toml:"cloud_endpoint_override" json:"cloud_endpoint_override"` + CloudSrcBucketRegion string `toml:"cloud_src_bucket_region" json:"cloud_src_bucket_region"` } func NewDefaultConfig() *Config { diff --git a/codis/pkg/topom/topom_api.go b/codis/pkg/topom/topom_api.go index dc5c0fa4e5..023ac050f4 100644 --- a/codis/pkg/topom/topom_api.go +++ b/codis/pkg/topom/topom_api.go @@ -43,6 +43,7 @@ func newApiServer(t *Topom) http.Handler { break } } + log.Warnf("req [%s]", req.GetBody) log.Warnf("[%p] API call %s from %s [%s]", t, path, remoteAddr, headerAddr) } c.Next() @@ -74,6 +75,8 @@ func newApiServer(t *Topom) http.Handler { r.Get("/xping/:xauth", api.XPing) r.Get("/stats/:xauth", api.Stats) r.Get("/slots/:xauth", api.Slots) + r.Post("/upload-s3/:gid/:tid/:bucket/:filename/:manifest", + api.UploadManifestToS3) r.Put("/reload/:xauth", api.Reload) r.Put("/shutdown/:xauth", api.Shutdown) r.Put("/loglevel/:xauth/:value", api.LogLevel) @@ -99,7 +102,6 @@ func newApiServer(t *Topom) http.Handler { r.Put("/remove/:xauth/:addr", api.SyncRemoveAction) }) r.Get("/info/:addr", api.InfoServer) - r.Put("/upload-s3/:xauth/:gid/:tid/:manifest", api.UploadManifestToS3) }) r.Group("/slots", func(r martini.Router) { r.Group("/action", func(r martini.Router) { @@ -502,9 +504,6 @@ func (s *apiServer) SyncRemoveAction(params martini.Params) (int, string) { } func (s *apiServer) UploadManifestToS3(params martini.Params) (int, string) { - if err := s.verifyXAuth(params); err != nil { - return rpc.ApiResponseError(err) - } gid, err := s.parseInteger(params, "gid") if err != nil { return rpc.ApiResponseError(err) @@ -513,8 +512,12 @@ func (s *apiServer) UploadManifestToS3(params martini.Params) (int, string) { if err != nil { return rpc.ApiResponseError(err) } + //:gid/:tid/:bucket/:filename/:manifest", + bucket := params["bucket"] + filename := params["filename"] + manifest := params["manifest"] - if err := s.topom.UploadManifestToS3(gid, tid); err != nil { + if err := s.topom.UploadManifestToS3(gid, tid, bucket, filename, manifest); err != nil { return rpc.ApiResponseError(err) } else { return rpc.ApiResponseJson("OK") diff --git a/codis/pkg/topom/topom_group.go b/codis/pkg/topom/topom_group.go index 1a6e1178bb..719d149db8 100644 --- a/codis/pkg/topom/topom_group.go +++ b/codis/pkg/topom/topom_group.go @@ -5,6 +5,9 @@ package topom import ( "encoding/json" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3/s3manager" "time" "pika/codis/v2/pkg/models" @@ -31,6 +34,7 @@ func (s *Topom) CreateGroup(gid int) error { g := &models.Group{ Id: gid, + TermId: 0, Servers: []*models.GroupServer{}, } return s.storeCreateGroup(g) @@ -319,6 +323,7 @@ func (s *Topom) GroupPromoteServer(gid int, addr string) error { g = &models.Group{ Id: g.Id, + TermId: g.TermId, Servers: g.Servers, } return s.storeUpdateGroup(g) @@ -786,7 +791,7 @@ func (s *Topom) newSyncActionExecutor(addr string) (func() error, error) { }, nil } -func (s *Topom) UploadManifestToS3(gid int, tid int) error { +func (s *Topom) UploadManifestToS3(gid int, tid int, bucket string, filename string, manifest string) error { s.mu.Lock() defer s.mu.Unlock() ctx, err := s.newContext() @@ -797,11 +802,50 @@ func (s *Topom) UploadManifestToS3(gid int, tid int) error { if gid <= 0 || gid > models.MaxGroupId { return errors.Errorf("invalid group id = %d, out of range", gid) } - if ctx.group[gid].TermId == tid { - //waiting for upload to s3 - return nil + + group, exists := ctx.group[gid] + //if true { + if exists { + //if true { + if group.TermId == tid { + sess, err := session.NewSession(&aws.Config{ + Credentials: credentials.NewStaticCredentials(s.Config().CloudAccessKey, + s.Config().CloudSecretKey, ""), + Endpoint: aws.String(s.Config().CloudEndPointOverride), + Region: aws.String(s.Config().CloudSrcBucketRegion), + DisableSSL: aws.Bool(true), + S3ForcePathStyle: aws.Bool(true), + DisableEndpointHostPrefix: aws.Bool(true), + }) + + //waiting for pika trans manifest + //bucket := "pika" + //filename := "db/db0/bz.cc" + + file, err := os.Open("/Users/charlieqiao/Desktop/bz.cc") + //ioutil.ReadAll(strings.NewReader(params["manifest"])) + if err != nil { + //exitErrorf("Unable to open file %q, %v", err) + println(err) + } + defer file.Close() + //over + + uploader := s3manager.NewUploader(sess) + _, err = uploader.Upload(&s3manager.UploadInput{ + Bucket: aws.String(bucket), + Key: aws.String(filename), + Body: file, + }) + if err != nil { + return errors.Errorf("Unable to upload [%s] to [%s], [%s]", filename, bucket, err) + } + } else { + return errors.Errorf("group-[%d] term id:[%d] not equal to pika term id:[%d]", + gid, ctx.group[gid].TermId, tid) + } } else { - return errors.Errorf("group-[%d] term id:[%d] not equal to pika term id:[%d]", - gid, ctx.group[gid].TermId, tid) + return errors.Errorf("group-[%d] not exists", gid) } + return nil } From 5ae6186e442679b4b0315ff66edfeb32f659a009 Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 25 Mar 2024 10:39:22 +0800 Subject: [PATCH 049/116] clean code --- codis/pkg/topom/topom_api.go | 1 - 1 file changed, 1 deletion(-) diff --git a/codis/pkg/topom/topom_api.go b/codis/pkg/topom/topom_api.go index 023ac050f4..0a433cfd7f 100644 --- a/codis/pkg/topom/topom_api.go +++ b/codis/pkg/topom/topom_api.go @@ -43,7 +43,6 @@ func newApiServer(t *Topom) http.Handler { break } } - log.Warnf("req [%s]", req.GetBody) log.Warnf("[%p] API call %s from %s [%s]", t, path, remoteAddr, headerAddr) } c.Next() From b717276eb42adfd196e98e728d6f0f9b827a385e Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 25 Mar 2024 10:42:55 +0800 Subject: [PATCH 050/116] clean code --- codis/pkg/utils/redis/codis_sentinel.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/codis/pkg/utils/redis/codis_sentinel.go b/codis/pkg/utils/redis/codis_sentinel.go index 505ea4e765..5a52a4f370 100644 --- a/codis/pkg/utils/redis/codis_sentinel.go +++ b/codis/pkg/utils/redis/codis_sentinel.go @@ -149,6 +149,8 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClientWithPKPing(parallel int, gr } var fut sync2.Future + + //build pkping parameter for gid, servers := range groupServers { var group_info GroupInfo group_info.GroupId = gid @@ -163,7 +165,6 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClientWithPKPing(parallel int, gr } } - //build pkping parameter group_inf_json, err := json.Marshal(group_info) if err != nil { log.WarnErrorf(err, "json: %s Serialization Failure failed", group_inf_json) From a927e2f06eebea80192d40d05709fd3c03ea615f Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 25 Mar 2024 11:22:53 +0800 Subject: [PATCH 051/116] clean code --- codis/pkg/topom/topom_group.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/codis/pkg/topom/topom_group.go b/codis/pkg/topom/topom_group.go index 719d149db8..87dc00255b 100644 --- a/codis/pkg/topom/topom_group.go +++ b/codis/pkg/topom/topom_group.go @@ -5,9 +5,11 @@ package topom import ( "encoding/json" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/s3/s3manager" + "os" "time" "pika/codis/v2/pkg/models" From f94cfa46275fd9db3eaa312b500b4d47577ca3e5 Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 25 Mar 2024 14:20:03 +0800 Subject: [PATCH 052/116] add s3 config --- codis/example/dashboard.py | 6 +++++- codis/pkg/topom/config.go | 4 ++++ codis/pkg/topom/topom_group.go | 4 ++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/codis/example/dashboard.py b/codis/example/dashboard.py index 8889859fbb..3d5fafce5c 100644 --- a/codis/example/dashboard.py +++ b/codis/example/dashboard.py @@ -44,7 +44,11 @@ def _open_config(admin_port, product_name, product_auth=None): path = os.getcwd() f.write('sentinel_notification_script = "{}"\n'.format(os.path.join(path, "sentinel_notify.sh"))) f.write('sentinel_client_reconfig_script = "{}"\n'.format(os.path.join(path, "sentinel_reconfig.sh")) - f.write('sentinel_pika_local_model = "true"\n')) + f.write('sentinel_pika_local_model = "true"\n') + f.write('cloud_access_key = "minioadmin"\n') + f.write('cloud_secret_key = "minioadmin"\n') + f.write('cloud_endpoint_override = "http://10.224.129.40:9000"\n') + f.write('cloud_src_bucket_region = "us-east-1"\n')) return config diff --git a/codis/pkg/topom/config.go b/codis/pkg/topom/config.go index a2bd6ae4bb..ff898d0fba 100644 --- a/codis/pkg/topom/config.go +++ b/codis/pkg/topom/config.go @@ -62,6 +62,10 @@ sentinel_failover_timeout = "5m" sentinel_notification_script = "" sentinel_client_reconfig_script = "" sentinel_pika_local_model = true +cloud_access_key = "minioadmin" +cloud_secret_key = "minioadmin" +cloud_endpoint_override = "http://10.224.129.40:9000" +cloud_src_bucket_region = "us-east-1" ` type Config struct { diff --git a/codis/pkg/topom/topom_group.go b/codis/pkg/topom/topom_group.go index 87dc00255b..2509d2143f 100644 --- a/codis/pkg/topom/topom_group.go +++ b/codis/pkg/topom/topom_group.go @@ -821,9 +821,9 @@ func (s *Topom) UploadManifestToS3(gid int, tid int, bucket string, filename str }) //waiting for pika trans manifest + //bucket := "pika" //filename := "db/db0/bz.cc" - file, err := os.Open("/Users/charlieqiao/Desktop/bz.cc") //ioutil.ReadAll(strings.NewReader(params["manifest"])) if err != nil { @@ -831,7 +831,7 @@ func (s *Topom) UploadManifestToS3(gid int, tid int, bucket string, filename str println(err) } defer file.Close() - //over + //waiting for pika trans manifest over uploader := s3manager.NewUploader(sess) _, err = uploader.Upload(&s3manager.UploadInput{ From a6fe12cf27a1370777aa612d8c4ac232fdbd03c9 Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 25 Mar 2024 16:23:25 +0800 Subject: [PATCH 053/116] change parameter name --- codis/pkg/topom/topom_api.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/codis/pkg/topom/topom_api.go b/codis/pkg/topom/topom_api.go index 0a433cfd7f..9771159f2a 100644 --- a/codis/pkg/topom/topom_api.go +++ b/codis/pkg/topom/topom_api.go @@ -74,7 +74,7 @@ func newApiServer(t *Topom) http.Handler { r.Get("/xping/:xauth", api.XPing) r.Get("/stats/:xauth", api.Stats) r.Get("/slots/:xauth", api.Slots) - r.Post("/upload-s3/:gid/:tid/:bucket/:filename/:manifest", + r.Post("/upload-s3/:group_id/:term_id/:s3_bucket/:s3_path/:content", api.UploadManifestToS3) r.Put("/reload/:xauth", api.Reload) r.Put("/shutdown/:xauth", api.Shutdown) @@ -503,18 +503,18 @@ func (s *apiServer) SyncRemoveAction(params martini.Params) (int, string) { } func (s *apiServer) UploadManifestToS3(params martini.Params) (int, string) { - gid, err := s.parseInteger(params, "gid") + gid, err := s.parseInteger(params, "group_id") if err != nil { return rpc.ApiResponseError(err) } - tid, err := s.parseInteger(params, "tid") + tid, err := s.parseInteger(params, "term_id") if err != nil { return rpc.ApiResponseError(err) } //:gid/:tid/:bucket/:filename/:manifest", - bucket := params["bucket"] - filename := params["filename"] - manifest := params["manifest"] + bucket := params["s3_bucket"] + filename := params["s3_path"] + manifest := params["content"] if err := s.topom.UploadManifestToS3(gid, tid, bucket, filename, manifest); err != nil { return rpc.ApiResponseError(err) From a680fb3b231760a22d46631ab011c83d9f266fa1 Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 25 Mar 2024 20:53:56 +0800 Subject: [PATCH 054/116] change http post style --- codis/config/dashboard.toml | 4 ++++ codis/pkg/topom/topom_api.go | 28 +++++++++++++++++----------- codis/pkg/topom/topom_group.go | 17 +++++++---------- 3 files changed, 28 insertions(+), 21 deletions(-) diff --git a/codis/config/dashboard.toml b/codis/config/dashboard.toml index 34dac74dd4..dbc48f5609 100644 --- a/codis/config/dashboard.toml +++ b/codis/config/dashboard.toml @@ -45,4 +45,8 @@ sentinel_failover_timeout = "5m" sentinel_notification_script = "" sentinel_client_reconfig_script = "" sentinel_pika_local_model = true +cloud_access_key = "minioadmin" +cloud_secret_key = "minioadmin" +cloud_endpoint_override = "http://10.224.129.40:9000" +cloud_src_bucket_region = "us-east-1" diff --git a/codis/pkg/topom/topom_api.go b/codis/pkg/topom/topom_api.go index 9771159f2a..98e98f3a66 100644 --- a/codis/pkg/topom/topom_api.go +++ b/codis/pkg/topom/topom_api.go @@ -4,7 +4,9 @@ package topom import ( + "encoding/json" "fmt" + "io" "net/http" "strconv" "strings" @@ -74,8 +76,7 @@ func newApiServer(t *Topom) http.Handler { r.Get("/xping/:xauth", api.XPing) r.Get("/stats/:xauth", api.Stats) r.Get("/slots/:xauth", api.Slots) - r.Post("/upload-s3/:group_id/:term_id/:s3_bucket/:s3_path/:content", - api.UploadManifestToS3) + r.Post("/upload-s3", api.UploadManifestToS3) r.Put("/reload/:xauth", api.Reload) r.Put("/shutdown/:xauth", api.Shutdown) r.Put("/loglevel/:xauth/:value", api.LogLevel) @@ -502,21 +503,26 @@ func (s *apiServer) SyncRemoveAction(params martini.Params) (int, string) { } } -func (s *apiServer) UploadManifestToS3(params martini.Params) (int, string) { - gid, err := s.parseInteger(params, "group_id") +func (s *apiServer) UploadManifestToS3(req *http.Request) (int, string) { + body, err := io.ReadAll(req.Body) if err != nil { return rpc.ApiResponseError(err) } - tid, err := s.parseInteger(params, "term_id") + + type UploadRequest struct { + GroupId int `json:"group_id"` + TermId int `json:"term_id"` + S3Bucket string `json:"s3_bucket"` + S3Path string `json:"s3_path"` + Content string `json:"content"` + } + var uploadReq UploadRequest + err = json.Unmarshal(body, &uploadReq) if err != nil { return rpc.ApiResponseError(err) } - //:gid/:tid/:bucket/:filename/:manifest", - bucket := params["s3_bucket"] - filename := params["s3_path"] - manifest := params["content"] - - if err := s.topom.UploadManifestToS3(gid, tid, bucket, filename, manifest); err != nil { + if err := s.topom.UploadManifestToS3(uploadReq.GroupId, uploadReq.TermId, uploadReq.S3Bucket, + uploadReq.S3Path, uploadReq.Content); err != nil { return rpc.ApiResponseError(err) } else { return rpc.ApiResponseJson("OK") diff --git a/codis/pkg/topom/topom_group.go b/codis/pkg/topom/topom_group.go index 2509d2143f..0477487f95 100644 --- a/codis/pkg/topom/topom_group.go +++ b/codis/pkg/topom/topom_group.go @@ -793,7 +793,7 @@ func (s *Topom) newSyncActionExecutor(addr string) (func() error, error) { }, nil } -func (s *Topom) UploadManifestToS3(gid int, tid int, bucket string, filename string, manifest string) error { +func (s *Topom) UploadManifestToS3(gid int, tid int, bucket string, filename string, content string) error { s.mu.Lock() defer s.mu.Unlock() ctx, err := s.newContext() @@ -820,18 +820,15 @@ func (s *Topom) UploadManifestToS3(gid int, tid int, bucket string, filename str DisableEndpointHostPrefix: aws.Bool(true), }) - //waiting for pika trans manifest - - //bucket := "pika" - //filename := "db/db0/bz.cc" - file, err := os.Open("/Users/charlieqiao/Desktop/bz.cc") - //ioutil.ReadAll(strings.NewReader(params["manifest"])) + file, err := os.Create("./tmp") if err != nil { - //exitErrorf("Unable to open file %q, %v", err) - println(err) + return errors.Errorf("Create manifest file err :[%s]", err) } defer file.Close() - //waiting for pika trans manifest over + _, err = file.WriteString(content) + if err != nil { + return errors.Errorf("Write manifest err :[%s]", err) + } uploader := s3manager.NewUploader(sess) _, err = uploader.Upload(&s3manager.UploadInput{ From e52826d738e54372b04e6c00b1408c0d6abfe902 Mon Sep 17 00:00:00 2001 From: baixin Date: Wed, 27 Mar 2024 20:40:21 +0800 Subject: [PATCH 055/116] add pika pkping command, fix bug --- codis/pkg/topom/topom_group.go | 83 ++++++++++++------------- codis/pkg/utils/redis/codis_sentinel.go | 14 ++--- include/pika_admin.h | 20 ++++++ include/pika_command.h | 2 + src/pika_admin.cc | 50 +++++++++++++++ src/pika_command.cc | 4 ++ 6 files changed, 124 insertions(+), 49 deletions(-) diff --git a/codis/pkg/topom/topom_group.go b/codis/pkg/topom/topom_group.go index 0477487f95..9b68cfbc9d 100644 --- a/codis/pkg/topom/topom_group.go +++ b/codis/pkg/topom/topom_group.go @@ -5,12 +5,13 @@ package topom import ( "encoding/json" + "os" + "time" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/s3/s3manager" - "os" - "time" "pika/codis/v2/pkg/models" "pika/codis/v2/pkg/utils/errors" @@ -795,56 +796,54 @@ func (s *Topom) newSyncActionExecutor(addr string) (func() error, error) { func (s *Topom) UploadManifestToS3(gid int, tid int, bucket string, filename string, content string) error { s.mu.Lock() - defer s.mu.Unlock() ctx, err := s.newContext() if err != nil { return err } - if gid <= 0 || gid > models.MaxGroupId { return errors.Errorf("invalid group id = %d, out of range", gid) } - group, exists := ctx.group[gid] - //if true { - if exists { - //if true { - if group.TermId == tid { - sess, err := session.NewSession(&aws.Config{ - Credentials: credentials.NewStaticCredentials(s.Config().CloudAccessKey, - s.Config().CloudSecretKey, ""), - Endpoint: aws.String(s.Config().CloudEndPointOverride), - Region: aws.String(s.Config().CloudSrcBucketRegion), - DisableSSL: aws.Bool(true), - S3ForcePathStyle: aws.Bool(true), - DisableEndpointHostPrefix: aws.Bool(true), - }) - - file, err := os.Create("./tmp") - if err != nil { - return errors.Errorf("Create manifest file err :[%s]", err) - } - defer file.Close() - _, err = file.WriteString(content) - if err != nil { - return errors.Errorf("Write manifest err :[%s]", err) - } + s.mu.Unlock() - uploader := s3manager.NewUploader(sess) - _, err = uploader.Upload(&s3manager.UploadInput{ - Bucket: aws.String(bucket), - Key: aws.String(filename), - Body: file, - }) - if err != nil { - return errors.Errorf("Unable to upload [%s] to [%s], [%s]", filename, bucket, err) - } - } else { - return errors.Errorf("group-[%d] term id:[%d] not equal to pika term id:[%d]", - gid, ctx.group[gid].TermId, tid) - } - } else { + if !exists { return errors.Errorf("group-[%d] not exists", gid) } + + if group.TermId != tid { + return errors.Errorf("group-[%d] term id:[%d] not equal to pika term id:[%d]", + gid, ctx.group[gid].TermId, tid) + } + + sess, err := session.NewSession(&aws.Config{ + Credentials: credentials.NewStaticCredentials(s.Config().CloudAccessKey, + s.Config().CloudSecretKey, ""), + Endpoint: aws.String(s.Config().CloudEndPointOverride), + Region: aws.String(s.Config().CloudSrcBucketRegion), + DisableSSL: aws.Bool(true), + S3ForcePathStyle: aws.Bool(true), + DisableEndpointHostPrefix: aws.Bool(true), + }) + + file, err := os.Create("./tmp") + if err != nil { + return errors.Errorf("Create manifest file err :[%s]", err) + } + defer file.Close() + _, err = file.WriteString(content) + if err != nil { + return errors.Errorf("Write manifest err :[%s]", err) + } + + uploader := s3manager.NewUploader(sess) + _, err = uploader.Upload(&s3manager.UploadInput{ + Bucket: aws.String(bucket), + Key: aws.String(filename), + Body: file, + }) + if err != nil { + return errors.Errorf("Unable to upload [%s] to [%s], [%s]", filename, bucket, err) + } + return nil } diff --git a/codis/pkg/utils/redis/codis_sentinel.go b/codis/pkg/utils/redis/codis_sentinel.go index 5a52a4f370..7b495624c1 100644 --- a/codis/pkg/utils/redis/codis_sentinel.go +++ b/codis/pkg/utils/redis/codis_sentinel.go @@ -131,6 +131,13 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClient(parallel int, groupServers return results } +type GroupInfo struct { + GroupId int `json:"group_id"` + TermId int `json:"term_id"` + MastersAddr []string `json:"master_addr"` + SlavesAddr []string `json:"slaves_addr"` +} + func (s *CodisSentinel) RefreshMastersAndSlavesClientWithPKPing(parallel int, groupServers map[int][]*models.GroupServer, groups_info map[int]int) []*ReplicationState { if len(groupServers) == 0 { s.printf("there's no groups") @@ -141,13 +148,6 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClientWithPKPing(parallel int, gr limit := make(chan struct{}, parallel) defer close(limit) - type GroupInfo struct { - GroupId int `json:"group_id"` - TermId int `json:"term_id"` - MastersAddr []string `json:"master_addr"` - SlavesAddr []string `json:"slaves_addr"` - } - var fut sync2.Future //build pkping parameter diff --git a/include/pika_admin.h b/include/pika_admin.h index 7693f0329d..daf138998d 100644 --- a/include/pika_admin.h +++ b/include/pika_admin.h @@ -260,6 +260,8 @@ class InfoCmd : public Cmd { kInfoCache }; + friend class PKPingCmd; + InfoCmd(const std::string& name, int arity, uint32_t flag) : Cmd(name, arity, flag) {} void Do() override; void Split(const HintKeys& hint_keys) override {}; @@ -595,6 +597,24 @@ class ClearCacheCmd : public Cmd { void DoInitial() override; }; +class PKPingCmd : public Cmd { + public: + PKPingCmd(const std::string& name, int arity, uint32_t flag) : Cmd(name, arity, flag) {} + void Do() override; + void Split(const HintKeys& hint_keys) override {}; + void Merge() override {}; + Cmd* Clone() override { return new PKPingCmd(*this); } + + private: + uint32_t group_id_ = 0; + uint32_t term_id_ = 0; + std::vector masters_addr_; + std::vector slaves_addr_; + + void DoInitial() override; + void Clear() override {} +}; + #ifdef WITH_COMMAND_DOCS class CommandCmd : public Cmd { public: diff --git a/include/pika_command.h b/include/pika_command.h index 3d5d535971..ce6a4474d3 100644 --- a/include/pika_command.h +++ b/include/pika_command.h @@ -248,6 +248,8 @@ const std::string kCmdNameXInfo = "xinfo"; const std::string kClusterPrefix = "pkcluster"; +const std::string kCmdPkPing = "pkping"; + using PikaCmdArgsType = net::RedisCmdArgsType; static const int RAW_ARGS_LEN = 1024 * 1024; diff --git a/src/pika_admin.cc b/src/pika_admin.cc index d1481e16f1..ed0ea4dee2 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -13,6 +13,7 @@ #include #include +#include #include "include/build_version.h" #include "include/pika_cmd_table_manager.h" @@ -22,7 +23,9 @@ #include "include/pika_conf.h" #include "pstd/include/rsync.h" + using pstd::Status; +using namespace Aws::Utils; extern PikaServer* g_pika_server; extern std::unique_ptr g_pika_rm; @@ -3229,6 +3232,53 @@ void ClearCacheCmd::Do() { res_.SetRes(CmdRes::kOk, "Cache is cleared"); } +void PKPingCmd::DoInitial() { + if (!CheckArg(argv_.size())) { + res_.SetRes(CmdRes::kWrongNum, kCmdPkPing); + return; + } + + Json::JsonValue json_str(argv_[1]); + Json::JsonView jw(json_str); + + group_id_ = jw.GetInt64("group_id"); + term_id_ = jw.GetInt64("term_id"); + + + auto jsonArrayView = jw.GetArray("mastersAddr"); + size_t arraySize = jsonArrayView.GetLength(); + for (size_t i = 0; i < arraySize; ++i) { + if (jsonArrayView[i].IsString()) { + masters_addr_.push_back(jsonArrayView[i].AsString()); + } + } + + jsonArrayView = jw.GetArray("slavesAddr"); + arraySize = jsonArrayView.GetLength(); + for (size_t i = 0; i < arraySize; ++i) { + if (jsonArrayView[i].IsString()) { + slaves_addr_.push_back(jsonArrayView[i].AsString()); + } + } + + if (g_pika_server->role() == PIKA_ROLE_MASTER) { + for (auto const& slave : g_pika_server->slaves_) { + if (std::find(masters_addr_.begin(), masters_addr_.end(), slave.ip_port) != masters_addr_.end()) { + //waiting todo :合并代码后 更新groupid 和 term_id + break; + } + } + } + +} + +void PKPingCmd::Do() { + std::string info; + InfoCmd cmd(kCmdNameSlotsInfo, -1, kCmdFlagsRead | kCmdFlagsAdmin | kCmdFlagsSlow); + cmd.InfoReplication(info); + res_.AppendString(info); +} + #ifdef WITH_COMMAND_DOCS bool CommandCmd::CommandFieldCompare::operator()(const std::string& a, const std::string& b) const { diff --git a/src/pika_command.cc b/src/pika_command.cc index 06e165280b..95660c87e0 100644 --- a/src/pika_command.cc +++ b/src/pika_command.cc @@ -820,6 +820,10 @@ void InitCmdTable(CmdTable* cmd_table) { std::unique_ptr xinfoptr = std::make_unique(kCmdNameXInfo, -2, kCmdFlagsRead | kCmdFlagsStream | kCmdFlagsSlow); cmd_table->insert(std::pair>(kCmdNameXInfo, std::move(xinfoptr))); + ////PKPING + std::unique_ptr pkpingptr = + std::make_unique(kCmdPkPing, 2, kCmdFlagsRead | kCmdFlagsAdmin | kCmdFlagsSlow); + cmd_table->insert(std::pair>(kCmdPkPing, std::move(pkpingptr))); } Cmd* GetCmdFromDB(const std::string& opt, const CmdTable& cmd_table) { From 864c512fbd6c1835217c63d73042d890a01a28f7 Mon Sep 17 00:00:00 2001 From: baixin Date: Thu, 28 Mar 2024 15:46:18 +0800 Subject: [PATCH 056/116] fix bug --- include/pika_define.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/include/pika_define.h b/include/pika_define.h index 39869313fe..8b87663411 100644 --- a/include/pika_define.h +++ b/include/pika_define.h @@ -55,18 +55,17 @@ struct DBStruct { cloud_bucket_suffix(std::move(cloud_bucket_suffix)), cloud_bucket_region(std::move(cloud_bucket_region)) {} #endif - bool operator==(const DBStruct& db_struct) const { - return db_name == db_struct.db_name && db_instance_num == db_struct.db_instance_num; - } -#ifdef USE_S3 bool operator==(const DBStruct& db_struct) const { +#ifdef USE_S3 return db_name == db_struct.db_name && db_instance_num == db_struct.db_instance_num && cloud_endpoint_override == db_struct.cloud_endpoint_override && cloud_bucket_prefix == db_struct.cloud_bucket_prefix && cloud_bucket_suffix == db_struct.cloud_bucket_suffix && cloud_bucket_region == db_struct.cloud_bucket_region; - } #endif + return db_name == db_struct.db_name && db_instance_num == db_struct.db_instance_num; + } + std::string db_name; int32_t db_instance_num = 0; #ifdef USE_S3 From 7d0ca49bd176ef068c6d020f137464b31001e278 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Thu, 28 Mar 2024 16:28:43 +0800 Subject: [PATCH 057/116] fix compile error --- include/pika_binlog.h | 2 +- include/pika_cloud_binlog_transverter.h | 2 +- include/pika_db.h | 2 +- src/pika_binlog.cc | 2 +- src/pika_cloud_binlog.cc | 4 ++-- src/pika_cloud_binlog.proto | 3 +-- src/pika_cloud_binlog_transverter.cc | 3 +-- src/pika_db.cc | 4 ++-- src/pika_repl_bgworker.cc | 14 +++++++++----- src/pika_server.cc | 7 ++++--- src/pstd/include/pstd_wal.h | 5 ++--- src/storage/src/redis.cc | 13 +++++-------- src/storage/src/redis.h | 2 +- src/storage/src/storage.cc | 4 ++-- 14 files changed, 33 insertions(+), 34 deletions(-) diff --git a/include/pika_binlog.h b/include/pika_binlog.h index fa36a6aac4..bf763da003 100644 --- a/include/pika_binlog.h +++ b/include/pika_binlog.h @@ -54,7 +54,7 @@ class Binlog : public pstd::WalWriter { virtual pstd::Status Put(const std::string& item); - virtual pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, const std::string& rep_seq) override; + virtual pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) override; virtual pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); /* diff --git a/include/pika_cloud_binlog_transverter.h b/include/pika_cloud_binlog_transverter.h index 1c28b921c5..90337315df 100644 --- a/include/pika_cloud_binlog_transverter.h +++ b/include/pika_cloud_binlog_transverter.h @@ -18,7 +18,7 @@ class PikaCloudBinlogTransverter { public: PikaCloudBinlogTransverter() = default; static std::string BinlogEncode(uint32_t db_id, uint32_t rocksdb_id, uint32_t exec_time, uint32_t term_id, - uint32_t filenum, uint64_t offset, const std::string& content, const std::string& replication_sequence); + uint32_t filenum, uint64_t offset, const std::string& content); static bool BinlogDecode(const std::string& binlog, cloud::BinlogCloudItem* binlog_item); diff --git a/include/pika_db.h b/include/pika_db.h index a24dc7df88..6d4ed980cc 100644 --- a/include/pika_db.h +++ b/include/pika_db.h @@ -157,7 +157,7 @@ class DB : public std::enable_shared_from_this, public pstd::noncopyable { /* * Switch Master/Slave role use */ - pstd::Status SwitchMaster(bool is_old_master, bool is_new_master); + rocksdb::Status SwitchMaster(bool is_old_master, bool is_new_master); private: bool opened_ = false; diff --git a/src/pika_binlog.cc b/src/pika_binlog.cc index 3dddbccea2..b1fc6d3829 100644 --- a/src/pika_binlog.cc +++ b/src/pika_binlog.cc @@ -165,7 +165,7 @@ Status Binlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32 return Status::OK(); } -Status Binlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, const std::string& rep_seq) { +Status Binlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) { return Status::Error("data err"); } diff --git a/src/pika_cloud_binlog.cc b/src/pika_cloud_binlog.cc index 80c36358ae..589d43d219 100644 --- a/src/pika_cloud_binlog.cc +++ b/src/pika_cloud_binlog.cc @@ -161,7 +161,7 @@ Status CloudBinlog::Put(const std::string& item) { return Status::Error("data err: db_id and rocksdb_id empty"); } // Note: mutex lock should be held -Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, const std::string& replication_sequence) { +Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) { if (!opened_.load()) { return Status::Busy("Cloud Binlog is not open yet"); } @@ -176,7 +176,7 @@ Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksd if (!s.ok()) { return s; } - std::string data = PikaCloudBinlogTransverter::BinlogEncode(db_id, rocksdb_id, time(nullptr), term, filenum, offset, item, replication_sequence); + std::string data = PikaCloudBinlogTransverter::BinlogEncode(db_id, rocksdb_id, time(nullptr), term, filenum, offset, item); s = Put(data.c_str(), static_cast(data.size())); if (!s.ok()) { diff --git a/src/pika_cloud_binlog.proto b/src/pika_cloud_binlog.proto index b3e4b00b0f..75eb7b0620 100644 --- a/src/pika_cloud_binlog.proto +++ b/src/pika_cloud_binlog.proto @@ -13,5 +13,4 @@ message BinlogCloudItem { uint64 file_num = 5; uint64 offset = 6; bytes content = 7; - bytes replication_sequence = 8; -} \ No newline at end of file +} diff --git a/src/pika_cloud_binlog_transverter.cc b/src/pika_cloud_binlog_transverter.cc index 6d8472d9f1..e2c7fd1049 100644 --- a/src/pika_cloud_binlog_transverter.cc +++ b/src/pika_cloud_binlog_transverter.cc @@ -17,7 +17,7 @@ const int SPACE_STROE_PARAMETER_LENGTH = 5; std::string PikaCloudBinlogTransverter::BinlogEncode(uint32_t db_id, uint32_t rocksdb_id, uint32_t exec_time, uint32_t term_id, uint32_t filenum, uint64_t offset, - const std::string& content, const std::string& replication_sequence) { + const std::string& content) { std::string serialize_binlog; cloud::BinlogCloudItem binlog_item; binlog_item.set_db_id(db_id); @@ -27,7 +27,6 @@ std::string PikaCloudBinlogTransverter::BinlogEncode(uint32_t db_id, uint32_t ro binlog_item.set_file_num(filenum); binlog_item.set_offset(offset); binlog_item.set_content(content); - binlog_item.set_replication_sequence(replication_sequence); binlog_item.SerializeToString(&serialize_binlog); return serialize_binlog; } diff --git a/src/pika_db.cc b/src/pika_db.cc index ed47b16066..d2dfd88243 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -645,8 +645,8 @@ bool DB::FlushDB() { return FlushDBWithoutLock(); } -pstd::Status DB::SwitchMaster(bool is_old_master, bool is_new_master) { #ifdef USE_S3 +rocksdb::Status DB::SwitchMaster(bool is_old_master, bool is_new_master) { return storage_.SwitchMaster(is_old_master, is_new_master); #endif -} \ No newline at end of file +} diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index f87281a73d..4e7333d4e7 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -143,20 +143,24 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { return; } - if (storage->ShouldSkip(binlog_item.rocksdb_id(), binlog_item->content())) { + auto storage = g_pika_server->GetDB(worker->db_name_)->storage(); + if (storage->ShouldSkip(binlog_item.rocksdb_id(), binlog_item.content())) { continue; } - + std::shared_ptr db = g_pika_rm->GetSyncMasterDBByName(DBInfo(worker->db_name_)); if (!db) { - LOG(WARNING) << woker->db_name_ <<" not found"; + LOG(WARNING) << worker->db_name_ <<" not found"; slave_db->SetReplState(ReplState::kTryConnect); return; } db->Logger()->Put(binlog_res.binlog()); - auto storage = g_pika_server->GetDB(worker->db_name_)->storage(); - s = storage->ApplyWAL(binlog_item.rocksdb_id(), binlog_item.replication_sequence(), binlog_item.type(), binlog_item->content()); + auto s = storage->ApplyWAL(binlog_item.rocksdb_id(), binlog_item.type(), binlog_item->content()); + if (!s.ok()) { + LOG(WARNING) << "rocksdb apply wal failed, error: " << s.ToString(); + return; + } return; } else { if (!PikaBinlogTransverter::BinlogItemWithoutContentDecode(TypeFirst, binlog_res.binlog(), &worker->binlog_item_)) { diff --git a/src/pika_server.cc b/src/pika_server.cc index bad00c6e75..a6af8f6bb4 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -31,6 +31,7 @@ #include "net/include/redis_cli.h" #include "pstd/include/env.h" #include "pstd/include/rsync.h" +#include "pstd/include/pstd_defer.h" #include "pstd/include/pika_codis_slot.h" #include "include/pika_cmd_table_manager.h" @@ -1818,7 +1819,7 @@ void PikaServer::CacheConfigInit(cache::CacheConfig& cache_cfg) { #ifdef USE_S3 bool PikaServer::UploadMetaToSentinel(const std::string& local_path, const std::string& s3_bucket, - const std::string& object_path) { + const std::string& remote_path) { Aws::String url(sentinel_addr_); if (sentinel_client_ == nullptr) { sentinel_client_ = CreateHttpClient(Aws::Client::ClientConfiguration()); @@ -1826,7 +1827,7 @@ bool PikaServer::UploadMetaToSentinel(const std::string& local_path, FILE* fp = fopen(local_path.c_str(), "rb"); if (fp == nullptr) { - LOG(WANRING) << "read file failed," + LOG(WARNING) << "read file failed," << " local_path: " << local_path << " error: " << strerror(errno); return false; @@ -1844,7 +1845,7 @@ bool PikaServer::UploadMetaToSentinel(const std::string& local_path, size_t result = fread(buffer, 1, f_size, fp); if (result != f_size) { - LOG(WANRING) << "read file failed, local_path: " << local_path + LOG(WARNING) << "read file failed, local_path: " << local_path << " fread size: " << result << "fsize: " << f_size; } std::string content(buffer, result); diff --git a/src/pstd/include/pstd_wal.h b/src/pstd/include/pstd_wal.h index 514cff427f..7028a81dcd 100644 --- a/src/pstd/include/pstd_wal.h +++ b/src/pstd/include/pstd_wal.h @@ -15,9 +15,8 @@ namespace pstd { class WalWriter : public noncopyable { public: virtual ~WalWriter() {} - virtual Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, - const std::string& rep_seq) = 0; + virtual Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) = 0; }; } // namespace pstd -#endif // __PSTD_WAL_H__ \ No newline at end of file +#endif // __PSTD_WAL_H__ diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 07df1a18b5..b46450d008 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -599,16 +599,15 @@ bool Redis::ShouldSkip(const std::string& content) { return rocksdb::WriteBatchInternal::Sequence(&batch) != sq_number + 1; } -Status Redis::ApplyWAL(const std::string& replication_sequence, int type, const std::string& content) { +Status Redis::ApplyWAL(int type, const std::string& content) { rocksdb::ReplicationLogRecord::Type rtype = static_cast(type); rocksdb::ReplicationLogRecord rlr; rocksdb::DBCloud::ApplyReplicationLogRecordInfo info; rlr.contents = content; rlr.type = rtype; - auto s = db_->ApplyReplicationLogRecord(rlr, replication_sequence, nullptr, true, &info, rocksdb::DB::AR_EVICT_OBSOLETE_FILES); + auto s = db_->ApplyReplicationLogRecord(rlr, "", nullptr, true, &info, rocksdb::DB::AR_EVICT_OBSOLETE_FILES); LOG(WARNING) << "applying rocksdb WAL, rocksdb_id: " << index_ - << " replication sequence: " << replication_sequence << " log record type: " << rtype << " status: " << s.ToString(); return s; @@ -623,15 +622,13 @@ std::string LogListener::OnReplicationLogRecord(rocksdb::ReplicationLogRecord re LOG(WARNING) << "rocksdb not opened yet, skip write binlog"; return "0"; } - std::string replication_sequence_str = std::to_string(counter_.fetch_add(1)); auto s = wal_writer_->Put(record.contents, db_id, - redis_inst->GetIndex(), replication_sequence_str); + redis_inst->GetIndex()); if (!s.ok()) { LOG(ERROR) << "write binlog failed, db_id: " << db_id - << " rocksdb_id: " << redis_inst->GetIndex() - << " replication sequence: " << replication_sequence_str; + << " rocksdb_id: " << redis_inst->GetIndex(); } - return replication_sequence_str; + return ""; } #endif } // namespace storage diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index ff66fd5ff7..4bbd4f3321 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -395,7 +395,7 @@ class Redis { } #ifdef USE_S3 - Status ApplyWAL(const std::string& replication_sequence, int type, const std::string& content); + Status ApplyWAL(int type, const std::string& content); bool ShouldSkip(const std::string& content); Status SwitchMaster(bool is_old_master, bool is_new_master); void ResetLogListener(std::shared_ptr handle) { diff --git a/src/storage/src/storage.cc b/src/storage/src/storage.cc index 4a4b001ab2..57c9671484 100644 --- a/src/storage/src/storage.cc +++ b/src/storage/src/storage.cc @@ -2468,10 +2468,10 @@ Status Storage::SwitchMaster(bool is_old_master, bool is_new_master) { return s; } -Status Storage::ApplyWAL(int rocksdb_id, const std::string& replication_sequence, +Status Storage::ApplyWAL(int rocksdb_id, int type, const std::string& content) { auto& inst = insts_[rocksdb_id]; - return inst->ApplyWAL(replication_sequence, type, content); + return inst->ApplyWAL(type, content); } From 43a43cee3436634d276e1b71aee6cb4aa89c9055 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Thu, 28 Mar 2024 17:00:01 +0800 Subject: [PATCH 058/116] fix by review comments --- include/pika_binlog.h | 2 +- include/pika_cloud_binlog.h | 2 +- include/pika_cloud_binlog_transverter.h | 2 +- src/pika_binlog.cc | 2 +- src/pika_cloud_binlog.cc | 4 ++-- src/pika_cloud_binlog.proto | 1 + src/pika_cloud_binlog_transverter.cc | 5 +++-- src/pika_server.cc | 8 ++++---- src/pstd/include/pstd_wal.h | 2 +- src/storage/src/redis.cc | 3 ++- tests/gtest/cloud_binlog_test.cc | 6 +++--- 11 files changed, 20 insertions(+), 17 deletions(-) diff --git a/include/pika_binlog.h b/include/pika_binlog.h index bf763da003..c31d99a1b6 100644 --- a/include/pika_binlog.h +++ b/include/pika_binlog.h @@ -54,7 +54,7 @@ class Binlog : public pstd::WalWriter { virtual pstd::Status Put(const std::string& item); - virtual pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) override; + virtual pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, uint32_t type) override; virtual pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); /* diff --git a/include/pika_cloud_binlog.h b/include/pika_cloud_binlog.h index 2025d3cbf1..21389afafd 100644 --- a/include/pika_cloud_binlog.h +++ b/include/pika_cloud_binlog.h @@ -50,7 +50,7 @@ class CloudBinlog : public Binlog { pstd::Status Put(const std::string& item) override; - pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) override; + pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, uint32_t type) override; pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr) override; /* diff --git a/include/pika_cloud_binlog_transverter.h b/include/pika_cloud_binlog_transverter.h index 90337315df..8d94a3c73d 100644 --- a/include/pika_cloud_binlog_transverter.h +++ b/include/pika_cloud_binlog_transverter.h @@ -18,7 +18,7 @@ class PikaCloudBinlogTransverter { public: PikaCloudBinlogTransverter() = default; static std::string BinlogEncode(uint32_t db_id, uint32_t rocksdb_id, uint32_t exec_time, uint32_t term_id, - uint32_t filenum, uint64_t offset, const std::string& content); + uint32_t filenum, uint64_t offset, const std::string& content, uint32_t type); static bool BinlogDecode(const std::string& binlog, cloud::BinlogCloudItem* binlog_item); diff --git a/src/pika_binlog.cc b/src/pika_binlog.cc index b1fc6d3829..7cbb06835d 100644 --- a/src/pika_binlog.cc +++ b/src/pika_binlog.cc @@ -165,7 +165,7 @@ Status Binlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32 return Status::OK(); } -Status Binlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) { +Status Binlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, uint32_t type) { return Status::Error("data err"); } diff --git a/src/pika_cloud_binlog.cc b/src/pika_cloud_binlog.cc index 589d43d219..a6322ab43b 100644 --- a/src/pika_cloud_binlog.cc +++ b/src/pika_cloud_binlog.cc @@ -161,7 +161,7 @@ Status CloudBinlog::Put(const std::string& item) { return Status::Error("data err: db_id and rocksdb_id empty"); } // Note: mutex lock should be held -Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) { +Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, uint32_t type) { if (!opened_.load()) { return Status::Busy("Cloud Binlog is not open yet"); } @@ -176,7 +176,7 @@ Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksd if (!s.ok()) { return s; } - std::string data = PikaCloudBinlogTransverter::BinlogEncode(db_id, rocksdb_id, time(nullptr), term, filenum, offset, item); + std::string data = PikaCloudBinlogTransverter::BinlogEncode(db_id, rocksdb_id, time(nullptr), term, filenum, offset, item, type); s = Put(data.c_str(), static_cast(data.size())); if (!s.ok()) { diff --git a/src/pika_cloud_binlog.proto b/src/pika_cloud_binlog.proto index 75eb7b0620..c54df8f5b9 100644 --- a/src/pika_cloud_binlog.proto +++ b/src/pika_cloud_binlog.proto @@ -13,4 +13,5 @@ message BinlogCloudItem { uint64 file_num = 5; uint64 offset = 6; bytes content = 7; + uint64 type = 8; } diff --git a/src/pika_cloud_binlog_transverter.cc b/src/pika_cloud_binlog_transverter.cc index e2c7fd1049..bb3ac71fb9 100644 --- a/src/pika_cloud_binlog_transverter.cc +++ b/src/pika_cloud_binlog_transverter.cc @@ -17,7 +17,7 @@ const int SPACE_STROE_PARAMETER_LENGTH = 5; std::string PikaCloudBinlogTransverter::BinlogEncode(uint32_t db_id, uint32_t rocksdb_id, uint32_t exec_time, uint32_t term_id, uint32_t filenum, uint64_t offset, - const std::string& content) { + const std::string& content, uint32_t type) { std::string serialize_binlog; cloud::BinlogCloudItem binlog_item; binlog_item.set_db_id(db_id); @@ -27,6 +27,7 @@ std::string PikaCloudBinlogTransverter::BinlogEncode(uint32_t db_id, uint32_t ro binlog_item.set_file_num(filenum); binlog_item.set_offset(offset); binlog_item.set_content(content); + binlog_item.set_type(type); binlog_item.SerializeToString(&serialize_binlog); return serialize_binlog; } @@ -67,7 +68,7 @@ std::string PikaCloudBinlogTransverter::ConstructPaddingBinlog(uint32_t paramete content.append(kNewLine); RedisAppendContent(content, std::string(parameter_len, '*')); - BinlogEncode(0, 0, 0, 0, 0, 0, content); + BinlogEncode(0, 0, 0, 0, 0, 0, content, 0); return binlog; } diff --git a/src/pika_server.cc b/src/pika_server.cc index a6af8f6bb4..a61ff5aa52 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1422,7 +1422,7 @@ void PikaServer::InitStorageOptions() { cloud_fs_opts.src_bucket.SetRegion(g_pika_conf->cloud_src_bucket_region()); cloud_fs_opts.dest_bucket.SetBucketName(g_pika_conf->cloud_dest_bucket_suffix(), g_pika_conf->cloud_dest_bucket_prefix()); cloud_fs_opts.dest_bucket.SetRegion(g_pika_conf->cloud_dest_bucket_region()); - storage_options.cloud_fs_options.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, + cloud_fs_opts.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); #endif } @@ -1824,7 +1824,7 @@ bool PikaServer::UploadMetaToSentinel(const std::string& local_path, if (sentinel_client_ == nullptr) { sentinel_client_ = CreateHttpClient(Aws::Client::ClientConfiguration()); } - + FILE* fp = fopen(local_path.c_str(), "rb"); if (fp == nullptr) { LOG(WARNING) << "read file failed," @@ -1832,12 +1832,12 @@ bool PikaServer::UploadMetaToSentinel(const std::string& local_path, << " error: " << strerror(errno); return false; } - + fseek(fp, 0 , SEEK_END); long f_size = ftell(fp); rewind(fp); char* buffer = new char[f_size]; - + DEFER { . delete [] buffer; fclose(fp); diff --git a/src/pstd/include/pstd_wal.h b/src/pstd/include/pstd_wal.h index 7028a81dcd..539c9d50ab 100644 --- a/src/pstd/include/pstd_wal.h +++ b/src/pstd/include/pstd_wal.h @@ -15,7 +15,7 @@ namespace pstd { class WalWriter : public noncopyable { public: virtual ~WalWriter() {} - virtual Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id) = 0; + virtual Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, uint32_t type) = 0; }; } // namespace pstd diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index b46450d008..b61b451ccb 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -531,6 +531,7 @@ Status Redis::OpenCloudEnv(rocksdb::CloudFileSystemOptions opts, const std::stri Status Redis::ReOpenRocksDB(const storage::StorageOptions& opt) { Close(); Open(opt, db_path_); + return Status::OK(); } Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { @@ -623,7 +624,7 @@ std::string LogListener::OnReplicationLogRecord(rocksdb::ReplicationLogRecord re return "0"; } auto s = wal_writer_->Put(record.contents, db_id, - redis_inst->GetIndex()); + redis_inst->GetIndex(), uint32_t(record.type)); if (!s.ok()) { LOG(ERROR) << "write binlog failed, db_id: " << db_id << " rocksdb_id: " << redis_inst->GetIndex(); diff --git a/tests/gtest/cloud_binlog_test.cc b/tests/gtest/cloud_binlog_test.cc index d19bac1639..9357d673b8 100644 --- a/tests/gtest/cloud_binlog_test.cc +++ b/tests/gtest/cloud_binlog_test.cc @@ -67,7 +67,7 @@ TEST_F(CloudBinlogTest, GetPutTest) { TEST_F(CloudBinlogTransverterTest, CodeTest) { std::string binlog_item_s = - PikaCloudBinlogTransverter::BinlogEncode(1, 1, 1, 1, 4294967294, 18446744073709551615, "test"); + PikaCloudBinlogTransverter::BinlogEncode(1, 1, 1, 1, 4294967294, 18446744073709551615, "test", 0); cloud::BinlogCloudItem* binlog_item = new cloud::BinlogCloudItem(); PikaCloudBinlogTransverter::BinlogDecode(binlog_item_s, binlog_item); ASSERT_EQ(1, binlog_item->db_id()); @@ -82,7 +82,7 @@ TEST_F(CloudBinlogTransverterTest, CodeTest) { TEST_F(CloudBinlogTransverterTest, WithoutContentDecodeTest) { std::string binlog_item_s = - PikaCloudBinlogTransverter::BinlogEncode(1, 1, 1, 1, 4294967294, 18446744073709551615, "test"); + PikaCloudBinlogTransverter::BinlogEncode(1, 1, 1, 1, 4294967294, 18446744073709551615, "test", 0); cloud::BinlogCloudItem* binlog_item = new cloud::BinlogCloudItem(); PikaCloudBinlogTransverter::BinlogItemWithoutContentDecode(binlog_item_s, binlog_item); ASSERT_EQ(1, binlog_item->db_id()); @@ -98,4 +98,4 @@ TEST_F(CloudBinlogTransverterTest, WithoutContentDecodeTest) { int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); -} \ No newline at end of file +} From d12068ce14d350698b9a2b1e7756c3e249ef83c2 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Thu, 28 Mar 2024 17:12:43 +0800 Subject: [PATCH 059/116] fix by review comments --- src/pika_db.cc | 2 +- src/pika_repl_bgworker.cc | 2 +- src/pika_server.cc | 2 +- src/pstd/include/pstd_wal.h | 2 +- src/storage/include/storage/storage.h | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pika_db.cc b/src/pika_db.cc index d2dfd88243..c39f2c710d 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -647,6 +647,6 @@ bool DB::FlushDB() { #ifdef USE_S3 rocksdb::Status DB::SwitchMaster(bool is_old_master, bool is_new_master) { - return storage_.SwitchMaster(is_old_master, is_new_master); + return storage_->SwitchMaster(is_old_master, is_new_master); #endif } diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index 4e7333d4e7..6a36e148b0 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -156,7 +156,7 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { return; } db->Logger()->Put(binlog_res.binlog()); - auto s = storage->ApplyWAL(binlog_item.rocksdb_id(), binlog_item.type(), binlog_item->content()); + auto s = storage->ApplyWAL(binlog_item.rocksdb_id(), binlog_item.type(), binlog_item.content()); if (!s.ok()) { LOG(WARNING) << "rocksdb apply wal failed, error: " << s.ToString(); return; diff --git a/src/pika_server.cc b/src/pika_server.cc index a61ff5aa52..57f8454d34 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1839,7 +1839,7 @@ bool PikaServer::UploadMetaToSentinel(const std::string& local_path, char* buffer = new char[f_size]; DEFER { - . delete [] buffer; + delete [] buffer; fclose(fp); }; diff --git a/src/pstd/include/pstd_wal.h b/src/pstd/include/pstd_wal.h index 539c9d50ab..9ece9ffa7d 100644 --- a/src/pstd/include/pstd_wal.h +++ b/src/pstd/include/pstd_wal.h @@ -15,7 +15,7 @@ namespace pstd { class WalWriter : public noncopyable { public: virtual ~WalWriter() {} - virtual Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, uint32_t type) = 0; + virtual Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, uint32_t type) = 0; }; } // namespace pstd diff --git a/src/storage/include/storage/storage.h b/src/storage/include/storage/storage.h index 4302f402f2..0ef91d9526 100644 --- a/src/storage/include/storage/storage.h +++ b/src/storage/include/storage/storage.h @@ -199,7 +199,7 @@ class Storage { std::unique_ptr& GetDBInstance(const std::string& key); - Status ApplyWAL(int rocksdb_id, const std::string& repli_seq, int type, const std::string& content); + Status ApplyWAL(int rocksdb_id, int type, const std::string& content); bool ShouldSkip(int rocksdb_id, const std::string& content); @@ -1120,7 +1120,7 @@ class Storage { Status SetOptions(const OptionType& option_type, const std::string& db_type, const std::unordered_map& options); void SetCompactRangeOptions(const bool is_canceled); - Status EnableDymayticOptions(const OptionType& option_type, + Status EnableDymayticOptions(const OptionType& option_type, const std::string& db_type, const std::unordered_map& options); Status EnableAutoCompaction(const OptionType& option_type, const std::string& db_type, const std::unordered_map& options); From 697c75cb416604aa63ed39a8c4207a9ac5040134 Mon Sep 17 00:00:00 2001 From: baixin Date: Thu, 28 Mar 2024 17:13:16 +0800 Subject: [PATCH 060/116] fix bug --- codis/pkg/topom/topom_group.go | 3 +-- codis/pkg/utils/redis/codis_sentinel.go | 11 ++++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/codis/pkg/topom/topom_group.go b/codis/pkg/topom/topom_group.go index 9b68cfbc9d..612547c491 100644 --- a/codis/pkg/topom/topom_group.go +++ b/codis/pkg/topom/topom_group.go @@ -795,7 +795,6 @@ func (s *Topom) newSyncActionExecutor(addr string) (func() error, error) { } func (s *Topom) UploadManifestToS3(gid int, tid int, bucket string, filename string, content string) error { - s.mu.Lock() ctx, err := s.newContext() if err != nil { return err @@ -803,8 +802,8 @@ func (s *Topom) UploadManifestToS3(gid int, tid int, bucket string, filename str if gid <= 0 || gid > models.MaxGroupId { return errors.Errorf("invalid group id = %d, out of range", gid) } + group, exists := ctx.group[gid] - s.mu.Unlock() if !exists { return errors.Errorf("group-[%d] not exists", gid) diff --git a/codis/pkg/utils/redis/codis_sentinel.go b/codis/pkg/utils/redis/codis_sentinel.go index 7b495624c1..58fab5ead8 100644 --- a/codis/pkg/utils/redis/codis_sentinel.go +++ b/codis/pkg/utils/redis/codis_sentinel.go @@ -100,11 +100,13 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClient(parallel int, groupServers fut.Add() go func(gid, index int, server *models.GroupServer) { + var state *ReplicationState defer func() { + fut.Done(fmt.Sprintf("%d_%d", gid, index), state) <-limit }() info, err := s.infoReplicationDispatch(server.Addr) - state := &ReplicationState{ + state = &ReplicationState{ Index: index, GroupID: gid, Addr: server.Addr, @@ -112,7 +114,6 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClient(parallel int, groupServers Replication: info, Err: err, } - fut.Done(fmt.Sprintf("%d_%d", gid, index), state) }(gid, index, server) } } @@ -174,12 +175,13 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClientWithPKPing(parallel int, gr fut.Add() go func(gid, index int, server *models.GroupServer) { + var state *ReplicationState defer func() { + fut.Done(fmt.Sprintf("%d_%d", gid, index), state) <-limit }() - //info, err := s.infoReplicationDispatch(server.Addr) info, err := s.PkPingDispatch(server.Addr, group_inf_json) - state := &ReplicationState{ + state = &ReplicationState{ Index: index, GroupID: gid, Addr: server.Addr, @@ -187,7 +189,6 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClientWithPKPing(parallel int, gr Replication: info, Err: err, } - fut.Done(fmt.Sprintf("%d_%d", gid, index), state) }(gid, index, server) } } From d0de696f73ad3aafeacd61b53a62a70772df09fc Mon Sep 17 00:00:00 2001 From: baixin Date: Thu, 28 Mar 2024 17:51:14 +0800 Subject: [PATCH 061/116] fix bug --- codis/pkg/topom/topom_api.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/codis/pkg/topom/topom_api.go b/codis/pkg/topom/topom_api.go index 98e98f3a66..35fd1df445 100644 --- a/codis/pkg/topom/topom_api.go +++ b/codis/pkg/topom/topom_api.go @@ -503,19 +503,20 @@ func (s *apiServer) SyncRemoveAction(params martini.Params) (int, string) { } } +type UploadRequest struct { + GroupId int `json:"group_id"` + TermId int `json:"term_id"` + S3Bucket string `json:"s3_bucket"` + S3Path string `json:"s3_path"` + Content string `json:"content"` +} + func (s *apiServer) UploadManifestToS3(req *http.Request) (int, string) { body, err := io.ReadAll(req.Body) if err != nil { return rpc.ApiResponseError(err) } - type UploadRequest struct { - GroupId int `json:"group_id"` - TermId int `json:"term_id"` - S3Bucket string `json:"s3_bucket"` - S3Path string `json:"s3_path"` - Content string `json:"content"` - } var uploadReq UploadRequest err = json.Unmarshal(body, &uploadReq) if err != nil { From e87194dd3a28944df6234023d6c93f3bf5845912 Mon Sep 17 00:00:00 2001 From: baixin Date: Fri, 29 Mar 2024 10:54:50 +0800 Subject: [PATCH 062/116] fix bug --- src/pika_inner_message.proto | 5 +++++ src/pika_server.cc | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/pika_inner_message.proto b/src/pika_inner_message.proto index c037cd9d51..db7a87e463 100644 --- a/src/pika_inner_message.proto +++ b/src/pika_inner_message.proto @@ -119,6 +119,11 @@ message InnerResponse { required string db_name = 1; required int32 slot_num = 2; required int32 db_instance_num = 3; + //s3 info + optional string cloud_endpoint_override = 4; + optional string cloud_bucket_prefix = 5; + optional string cloud_bucket_suffix = 6; + optional string cloud_bucket_region = 7; } required bool classic_mode = 1; repeated DBInfo dbs_info = 2; diff --git a/src/pika_server.cc b/src/pika_server.cc index 57f8454d34..9f4d31ff21 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -335,16 +335,16 @@ void PikaServer::InitDBStruct() { std::string log_path = g_pika_conf->log_path(); std::vector db_structs = g_pika_conf->db_structs(); std::lock_guard rwl(dbs_rw_); - for (const auto& db : db_structs) { + for (auto& db : db_structs) { std::string name = db.db_name; std::shared_ptr db_ptr = std::make_shared(name, db_path, log_path); db_ptr->Init(); dbs_.emplace(name, db_ptr); #ifdef USE_S3 db.cloud_endpoint_override = g_pika_conf->cloud_endpoint_override(); - db.cloud_bucket_prefix = g_pika_conf->cloud_bucket_prefix(); - db.cloud_bucket_suffix = g_pika_conf->cloud_bucket_suffix(); - db.cloud_bucket_region = g_pika_conf->cloud_bucket_region(); + db.cloud_bucket_prefix = g_pika_conf->cloud_src_bucket_prefix(); + db.cloud_bucket_suffix = g_pika_conf->cloud_src_bucket_prefix(); + db.cloud_bucket_region = g_pika_conf->cloud_src_bucket_region(); #endif } } From 06642f7a1851b234bae53b1a7677516fa4bd6713 Mon Sep 17 00:00:00 2001 From: baixin Date: Sun, 31 Mar 2024 20:32:43 +0800 Subject: [PATCH 063/116] fix sync bug --- src/pika_cloud_binlog.cc | 13 +++++++++++- src/pika_repl_server_conn.cc | 6 +++++- src/pika_rm.cc | 22 ++++++++++++++++---- src/pika_server.cc | 5 +++-- src/pika_stable_log.cc | 39 ++++++++++++++++++++++++++---------- 5 files changed, 66 insertions(+), 19 deletions(-) diff --git a/src/pika_cloud_binlog.cc b/src/pika_cloud_binlog.cc index a6322ab43b..006386d5dc 100644 --- a/src/pika_cloud_binlog.cc +++ b/src/pika_cloud_binlog.cc @@ -158,7 +158,18 @@ Status CloudBinlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, u } Status CloudBinlog::Put(const std::string& item) { - return Status::Error("data err: db_id and rocksdb_id empty"); + if (!opened_.load()) { + return Status::Busy("Cloud Binlog is not open yet"); + } + + Lock(); + DEFER { Unlock(); }; + + Status s = Put(item.c_str(), static_cast(item.size())); + if (!s.ok()) { + binlog_io_error_.store(true); + } + return s; } // Note: mutex lock should be held Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, uint32_t type) { diff --git a/src/pika_repl_server_conn.cc b/src/pika_repl_server_conn.cc index e5d0236f61..7e5132f94c 100644 --- a/src/pika_repl_server_conn.cc +++ b/src/pika_repl_server_conn.cc @@ -125,7 +125,11 @@ void PikaReplServerConn::HandleTrySyncRequest(void* arg) { //In cloud mode, only full synchronization is possible. if (g_pika_conf->pika_model() == PIKA_CLOUD) { if (pre_success) { - try_sync_response->set_reply_code(InnerMessage::InnerResponse::TrySync::kSyncPointBePurged); + if (!db->CheckSlaveNodeExist(node.ip(), node.port())) { + try_sync_response->set_reply_code(InnerMessage::InnerResponse::TrySync::kSyncPointBePurged); + } else if (TrySyncOffsetCheck(db, try_sync_request, try_sync_response)){ + TrySyncUpdateSlaveNode(db, try_sync_request, conn, try_sync_response); + } } } else { if (pre_success && TrySyncOffsetCheck(db, try_sync_request, try_sync_response)) { diff --git a/src/pika_rm.cc b/src/pika_rm.cc index e1deb6d8c8..e881725762 100644 --- a/src/pika_rm.cc +++ b/src/pika_rm.cc @@ -18,6 +18,7 @@ #include "include/pika_server.h" #include "include/pika_admin.h" +#include "include/pika_cloud_binlog_transverter.h" #include "include/pika_command.h" using pstd::Status; @@ -164,12 +165,25 @@ Status SyncMasterDB::ReadBinlogFileToWq(const std::shared_ptr& slave_ return s; } BinlogItem item; - if (!PikaBinlogTransverter::BinlogItemWithoutContentDecode(TypeFirst, msg, &item)) { - LOG(WARNING) << "Binlog item decode failed"; - return Status::Corruption("Binlog item decode failed"); + cloud::BinlogCloudItem cloud_item; + if (g_pika_conf->pika_model() == PIKA_CLOUD){ + if (!PikaCloudBinlogTransverter::BinlogItemWithoutContentDecode(msg, &cloud_item)) { + return Status::Corruption("Binlog item decode failed"); + } + } else { + if (!PikaBinlogTransverter::BinlogItemWithoutContentDecode(TypeFirst, msg, &item)) { + LOG(WARNING) << "Binlog item decode failed"; + return Status::Corruption("Binlog item decode failed"); + } } + BinlogOffset sent_b_offset = BinlogOffset(filenum, offset); - LogicOffset sent_l_offset = LogicOffset(item.term_id(), item.logic_id()); + LogicOffset sent_l_offset; + if (g_pika_conf->pika_model() == PIKA_CLOUD){ + sent_l_offset = LogicOffset(cloud_item.term_id(), 0); + } else { + sent_l_offset = LogicOffset(item.term_id(), item.logic_id()); + } LogOffset sent_offset(sent_b_offset, sent_l_offset); slave_ptr->sync_win.Push(SyncWinItem(sent_offset, msg.size())); diff --git a/src/pika_server.cc b/src/pika_server.cc index 9f4d31ff21..144853ec2b 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1422,8 +1422,9 @@ void PikaServer::InitStorageOptions() { cloud_fs_opts.src_bucket.SetRegion(g_pika_conf->cloud_src_bucket_region()); cloud_fs_opts.dest_bucket.SetBucketName(g_pika_conf->cloud_dest_bucket_suffix(), g_pika_conf->cloud_dest_bucket_prefix()); cloud_fs_opts.dest_bucket.SetRegion(g_pika_conf->cloud_dest_bucket_region()); - cloud_fs_opts.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, - std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); + //for test + //cloud_fs_opts.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, + // std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); #endif } diff --git a/src/pika_stable_log.cc b/src/pika_stable_log.cc index e965ee4494..b9cf747ebe 100644 --- a/src/pika_stable_log.cc +++ b/src/pika_stable_log.cc @@ -8,12 +8,13 @@ #include +#include "include/pika_cloud_binlog.h" +#include "include/pika_cloud_binlog_transverter.h" +#include "include/pika_conf.h" #include "include/pika_rm.h" #include "include/pika_server.h" #include "include/pika_stable_log.h" #include "pstd/include/env.h" -#include "include/pika_conf.h" -#include "include/pika_cloud_binlog.h" using pstd::Status; @@ -176,6 +177,7 @@ void StableLog::UpdateFirstOffset(uint32_t filenum) { BinlogItem item; BinlogOffset offset; + cloud::BinlogCloudItem cloud_item; while (true) { std::string binlog; Status s = binlog_reader.Get(&binlog, &(offset.filenum), &(offset.offset)); @@ -186,20 +188,35 @@ void StableLog::UpdateFirstOffset(uint32_t filenum) { LOG(WARNING) << "Binlog reader get failed"; return; } - if (!PikaBinlogTransverter::BinlogItemWithoutContentDecode(TypeFirst, binlog, &item)) { - LOG(WARNING) << "Binlog item decode failed"; - return; - } - // exec_time == 0, could be padding binlog - if (item.exec_time() != 0) { - break; + if (g_pika_conf->pika_model() == PIKA_CLOUD) { + if (!PikaCloudBinlogTransverter::BinlogItemWithoutContentDecode(binlog, &cloud_item)) { + LOG(WARNING) << "Cloud Binlog item decode failed"; + return; + } + // exec_time == 0, could be padding cloudbinlog + if (cloud_item.exec_time() != 0) { + break; + } + } else { + if (!PikaBinlogTransverter::BinlogItemWithoutContentDecode(TypeFirst, binlog, &item)) { + LOG(WARNING) << "Binlog item decode failed"; + return; + } + // exec_time == 0, could be padding binlog + if (item.exec_time() != 0) { + break; + } } } std::lock_guard l(offset_rwlock_); first_offset_.b_offset = offset; - first_offset_.l_offset.term = item.term_id(); - first_offset_.l_offset.index = item.logic_id(); + if (g_pika_conf->pika_model() == PIKA_CLOUD) { + first_offset_.l_offset.term = cloud_item.term_id(); + } else { + first_offset_.l_offset.term = item.term_id(); + first_offset_.l_offset.index = item.logic_id(); + } } Status StableLog::PurgeFileAfter(uint32_t filenum) { From e0098a0109ce94fb1db31a8153d9e2f981584457 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Mon, 1 Apr 2024 16:00:30 +0800 Subject: [PATCH 064/116] fix binlog sync --- include/pika_binlog.h | 2 ++ include/pika_cloud_binlog.h | 6 +++++ src/pika_binlog.cc | 4 +++ src/pika_cloud_binlog.cc | 37 ++++++++++++++++++++++++++++ src/pika_cloud_binlog_transverter.cc | 1 - src/pika_db.cc | 6 ++++- src/pika_repl_bgworker.cc | 12 ++++----- src/pika_rm.cc | 9 +++++++ src/storage/src/redis.cc | 9 +++++-- 9 files changed, 75 insertions(+), 11 deletions(-) diff --git a/include/pika_binlog.h b/include/pika_binlog.h index c31d99a1b6..d107873f06 100644 --- a/include/pika_binlog.h +++ b/include/pika_binlog.h @@ -57,6 +57,8 @@ class Binlog : public pstd::WalWriter { virtual pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, uint32_t type) override; virtual pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); + + virtual pstd::Status GetOldestBinlogToKeep(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); /* * Set Producer pro_num and pro_offset with lock */ diff --git a/include/pika_cloud_binlog.h b/include/pika_cloud_binlog.h index 21389afafd..2a60ba08a3 100644 --- a/include/pika_cloud_binlog.h +++ b/include/pika_cloud_binlog.h @@ -30,6 +30,8 @@ class CloudVersion final : public pstd::noncopyable { uint32_t pro_num_ = 0; uint64_t pro_offset_ = 0; uint32_t term_ = 0; + uint32_t keep_filenum_ = 0; + uint64_t keep_offset_ = 0; std::shared_mutex rwlock_; @@ -53,6 +55,8 @@ class CloudBinlog : public Binlog { pstd::Status Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, uint32_t type) override; pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr) override; + + pstd::Status GetOldestBinlogToKeep(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr) override; /* * Set Producer pro_num and pro_offset with lock */ @@ -107,6 +111,8 @@ class CloudBinlog : public Binlog { std::string filename_; std::atomic binlog_io_error_; + + std::unordered_map> binlog_to_keep_; }; #endif diff --git a/src/pika_binlog.cc b/src/pika_binlog.cc index 7cbb06835d..7049d88251 100644 --- a/src/pika_binlog.cc +++ b/src/pika_binlog.cc @@ -165,6 +165,10 @@ Status Binlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32 return Status::OK(); } +Status Binlog::GetOldestBinlogToKeep(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term, uint64_t* logic_id) { + return Status::NotSupported("not supported in cloud mode"); +} + Status Binlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, uint32_t type) { return Status::Error("data err"); } diff --git a/src/pika_cloud_binlog.cc b/src/pika_cloud_binlog.cc index 006386d5dc..bbd12e16c5 100644 --- a/src/pika_cloud_binlog.cc +++ b/src/pika_cloud_binlog.cc @@ -37,6 +37,10 @@ Status CloudVersion::StableSave() { memcpy(p, &pro_offset_, sizeof(uint64_t)); p += 8; memcpy(p, &term_, sizeof(uint32_t)); + p += 4; + memcpy(p, &keep_filenum_, sizeof(uint32_t)); + p += 4; + memcpy(p, &keep_offset_, sizeof(uint64_t)); return Status::OK(); } @@ -46,6 +50,8 @@ Status CloudVersion::Init() { memcpy(reinterpret_cast(&pro_num_), save_->GetData(), sizeof(uint32_t)); memcpy(reinterpret_cast(&pro_offset_), save_->GetData() + 4, sizeof(uint64_t)); memcpy(reinterpret_cast(&term_), save_->GetData() + 12, sizeof(uint32_t)); + memcpy(reinterpret_cast(&keep_filenum_), save_->GetData() + 16, sizeof(uint32_t)); + memcpy(reinterpret_cast(&pro_offset_), save_->GetData() + 20, sizeof(uint64_t)); return Status::OK(); } else { return Status::Corruption("version init error"); @@ -157,6 +163,21 @@ Status CloudBinlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, u return Status::OK(); } +Status CloudBinlog::GetOldestBinlogToKeep(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term, uint64_t* logic_id) { + if (!opened_.load()) { + return Status::Busy("Cloud Binlog is not open yet"); + } + + std::shared_lock l(version_->rwlock_); + *filenum = version_->keep_filenum_; + *pro_offset = version_->keep_offset_; + if (term) { + *term = version_->term_; + } + LOG(WARNING) << "GetOldestBinlogToKeep keep_filenum: " << *filenum << " keep_offset: " << *pro_offset; + return Status::OK(); +} + Status CloudBinlog::Put(const std::string& item) { if (!opened_.load()) { return Status::Busy("Cloud Binlog is not open yet"); @@ -193,6 +214,22 @@ Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksd if (!s.ok()) { binlog_io_error_.store(true); } + if (type != 0 || binlog_to_keep_.find(rocksdb_id) == binlog_to_keep_.end()) { + binlog_to_keep_[rocksdb_id] = std::make_pair(filenum, offset); + } + + uint32_t keep_filenum = filenum; + uint64_t keep_offset = keep_offset; + for (const auto& offset : binlog_to_keep_) { + if (keep_filenum >= offset.second.first && + keep_offset >= offset.second.second) { + keep_filenum = offset.second.first; + keep_offset = offset.second.second; + } + } + version_->keep_filenum_ = keep_filenum; + version_->keep_offset_ = keep_offset; + LOG(WARNING) << "keep_filenum: " << keep_filenum << " keep_offset: " << keep_offset; return s; } diff --git a/src/pika_cloud_binlog_transverter.cc b/src/pika_cloud_binlog_transverter.cc index bb3ac71fb9..498106fcd9 100644 --- a/src/pika_cloud_binlog_transverter.cc +++ b/src/pika_cloud_binlog_transverter.cc @@ -79,6 +79,5 @@ bool PikaCloudBinlogTransverter::BinlogItemWithoutContentDecode(const std::strin LOG(ERROR) << "Failed to deserialize cloud binlog item"; return false; } - binlog_item->set_content(""); return true; } diff --git a/src/pika_db.cc b/src/pika_db.cc index c39f2c710d..73cf21a7b2 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -303,6 +303,7 @@ bool DB::RunBgsaveEngine() { LOG(INFO) << db_name_ << " bgsave_info: path=" << info.path << ", filenum=" << info.offset.b_offset.filenum << ", offset=" << info.offset.b_offset.offset; +#ifndef USE_S3 // Backup to tmp dir rocksdb::Status s = bgsave_engine_->CreateNewBackup(info.path); @@ -310,6 +311,7 @@ bool DB::RunBgsaveEngine() { LOG(WARNING) << db_name_ << " create new backup failed :" << s.ToString(); return false; } +#endif LOG(INFO) << db_name_ << " create new backup finished."; return true; @@ -369,7 +371,7 @@ bool DB::InitBgsaveEngine() { std::lock_guard lock(db_rwlock_); LogOffset bgsave_offset; // term, index are 0 - db->Logger()->GetProducerStatus(&(bgsave_offset.b_offset.filenum), &(bgsave_offset.b_offset.offset)); + db->Logger()->GetOldestBinlogToKeep(&(bgsave_offset.b_offset.filenum), &(bgsave_offset.b_offset.offset)); { std::lock_guard l(bgsave_protector_); bgsave_info_.offset = bgsave_offset; @@ -510,11 +512,13 @@ bool DB::TryUpdateMasterOffset() { << ", offset: " << offset << ", term: " << term << ", index: " << index; pstd::DeleteFile(info_path); +/* if (!ChangeDb(dbsync_path_)) { LOG(WARNING) << "DB: " << db_name_ << ", Failed to change db"; slave_db->SetReplState(ReplState::kError); return false; } +*/ // Update master offset std::shared_ptr master_db = diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index 6a36e148b0..d309b01320 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -143,11 +143,6 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { return; } - auto storage = g_pika_server->GetDB(worker->db_name_)->storage(); - if (storage->ShouldSkip(binlog_item.rocksdb_id(), binlog_item.content())) { - continue; - } - std::shared_ptr db = g_pika_rm->GetSyncMasterDBByName(DBInfo(worker->db_name_)); if (!db) { @@ -155,13 +150,16 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { slave_db->SetReplState(ReplState::kTryConnect); return; } - db->Logger()->Put(binlog_res.binlog()); + db->Logger()->Put(binlog_item.content(), binlog_item.db_id(), binlog_item.rocksdb_id(), binlog_item.type()); + auto storage = g_pika_server->GetDB(worker->db_name_)->storage(); + if (storage->ShouldSkip(binlog_item.rocksdb_id(), binlog_item.content())) { + continue; + } auto s = storage->ApplyWAL(binlog_item.rocksdb_id(), binlog_item.type(), binlog_item.content()); if (!s.ok()) { LOG(WARNING) << "rocksdb apply wal failed, error: " << s.ToString(); return; } - return; } else { if (!PikaBinlogTransverter::BinlogItemWithoutContentDecode(TypeFirst, binlog_res.binlog(), &worker->binlog_item_)) { LOG(WARNING) << "Binlog item decode failed"; diff --git a/src/pika_rm.cc b/src/pika_rm.cc index e881725762..9b01ac064b 100644 --- a/src/pika_rm.cc +++ b/src/pika_rm.cc @@ -293,6 +293,15 @@ Status SyncMasterDB::GetSafetyPurgeBinlog(std::string* safety_purge) { break; } } +#ifdef USE_S3 + BinlogOffset old_offset; + s = Logger()->GetOldestBinlogToKeep(&old_offset.filenum, &old_offset.offset); + if (!s.ok()) { + LOG(ERROR) << "get oldest binlog to keep failed"; + } + LOG(WARNING) << "GetSafetyPurgeBinlog, origin filenum: " << purge_max << " oldest log to keep: " << old_offset.filenum; + purge_max = std::min(purge_max, old_offset.filenum - 2); +#endif } *safety_purge = (success ? kBinlogPrefix + std::to_string(static_cast(purge_max)) : "none"); return Status::OK(); diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index b61b451ccb..f99de0b431 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -215,7 +215,12 @@ Status Redis::Open(const StorageOptions& tmp_storage_options, const std::string& return s; } db_ops.env = cloud_env_.get(); - return rocksdb::DBCloud::Open(db_ops, db_path, column_families, "", 0, &handles_, &db_); + s = rocksdb::DBCloud::Open(db_ops, db_path, column_families, "", 0, &handles_, &db_); + if (s.ok()) { + opened_ = true; + } + return s; + #else auto s = rocksdb::DB::Open(db_ops, db_path, column_families, &handles_, &db_); opened_ = true; @@ -619,7 +624,7 @@ std::string LogListener::OnReplicationLogRecord(rocksdb::ReplicationLogRecord re Redis* redis_inst = (Redis*)inst_; //TODO(wangshaoyi): get from storage int db_id = 0; - if (redis_inst->opened_) { + if (!redis_inst->opened_) { LOG(WARNING) << "rocksdb not opened yet, skip write binlog"; return "0"; } From 45c85cee367e4df230a8c9bc517573023d7d1e20 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Mon, 1 Apr 2024 16:17:13 +0800 Subject: [PATCH 065/116] fix compile error --- include/pika_cloud_binlog.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pika_cloud_binlog.h b/include/pika_cloud_binlog.h index 2a60ba08a3..7de2306436 100644 --- a/include/pika_cloud_binlog.h +++ b/include/pika_cloud_binlog.h @@ -112,7 +112,7 @@ class CloudBinlog : public Binlog { std::atomic binlog_io_error_; - std::unordered_map> binlog_to_keep_; + std::unordered_map> binlog_to_keep_; }; #endif From 914d67bbf4f87311baaa9e2e807165ce6fa67bb6 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 2 Apr 2024 19:32:22 +0800 Subject: [PATCH 066/116] fix switchmaster bug --- src/pika_admin.cc | 12 +++++++----- src/pika_db.cc | 2 ++ src/pika_rm.cc | 2 ++ src/pika_server.cc | 7 +++++++ src/storage/src/redis.cc | 2 ++ 5 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/pika_admin.cc b/src/pika_admin.cc index ed0ea4dee2..56d307160b 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -150,13 +150,16 @@ void SlaveofCmd::Do() { return; } + bool is_old_master = !(g_pika_server->role() == PIKA_ROLE_SLAVE); + LOG(WARNING) << "slaveofcmd, currently: is_master: " << is_old_master << " role: " << g_pika_server->role(); + g_pika_server->RemoveMaster(); if (is_none_) { - if (g_pika_conf->pika_model() == PIKA_CLOUD && g_pika_server->role() == PIKA_ROLE_SLAVE) { + if (g_pika_conf->pika_model() == PIKA_CLOUD) { std::shared_lock rwl(g_pika_server->dbs_rw_); for (const auto& db_item : g_pika_server->dbs_) { - db_item.second->SwitchMaster(false, true); + db_item.second->SwitchMaster(is_old_master, true); } } res_.SetRes(CmdRes::kOk); @@ -169,12 +172,11 @@ void SlaveofCmd::Do() { * slaveof executor to slave */ bool sm_ret = g_pika_server->SetMaster(master_ip_, static_cast(master_port_)); - if (sm_ret) { - if (g_pika_conf->pika_model() == PIKA_CLOUD && g_pika_server->role() == PIKA_ROLE_MASTER) { + if (g_pika_conf->pika_model() == PIKA_CLOUD) { std::shared_lock rwl(g_pika_server->dbs_rw_); for (const auto& db_item : g_pika_server->dbs_) { - db_item.second->SwitchMaster(true, false); + db_item.second->SwitchMaster(is_old_master, false); } } res_.SetRes(CmdRes::kOk); diff --git a/src/pika_db.cc b/src/pika_db.cc index 73cf21a7b2..d30a49b98b 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -376,11 +376,13 @@ bool DB::InitBgsaveEngine() { std::lock_guard l(bgsave_protector_); bgsave_info_.offset = bgsave_offset; } + /* s = bgsave_engine_->SetBackupContent(); if (!s.ok()) { LOG(WARNING) << db_name_ << " set backup content failed " << s.ToString(); return false; } + */ } return true; } diff --git a/src/pika_rm.cc b/src/pika_rm.cc index 9b01ac064b..9f8fd6632f 100644 --- a/src/pika_rm.cc +++ b/src/pika_rm.cc @@ -793,8 +793,10 @@ Status PikaReplicaManager::CheckDBRole(const std::string& db, int* role) { (sync_master_dbs_[p_info]->GetNumberOfSlaveNode() == 0 && sync_slave_dbs_[p_info]->State() == kNoConnect)) { *role |= PIKA_ROLE_MASTER; + LOG(WARNING) << "role change to PIKA_ROLE_MASTER"; } if (sync_slave_dbs_[p_info]->State() != ReplState::kNoConnect) { + LOG(WARNING) << "role change to PIKA_ROLE_SLAVE"; *role |= PIKA_ROLE_SLAVE; } // if role is not master or slave, the rest situations are all single diff --git a/src/pika_server.cc b/src/pika_server.cc index 144853ec2b..759e5ccb83 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -547,7 +547,9 @@ Status PikaServer::DoSameThingEveryDB(const TaskType& type) { void PikaServer::BecomeMaster() { std::lock_guard l(state_protector_); + int tmp_role = role_; role_ |= PIKA_ROLE_MASTER; + LOG(WARNING) << "role change from " << tmp_role << " to " << role_; } void PikaServer::DeleteSlave(int fd) { @@ -680,10 +682,13 @@ void PikaServer::SyncError() { void PikaServer::RemoveMaster() { { + int tmp_role = role_; std::lock_guard l(state_protector_); repl_state_ = PIKA_REPL_NO_CONNECT; role_ &= ~PIKA_ROLE_SLAVE; + LOG(WARNING) << "removemaster role change from " << tmp_role << " to " << role_; + if (!master_ip_.empty() && master_port_ != -1) { g_pika_rm->CloseReplClientConn(master_ip_, master_port_ + kPortShiftReplServer); g_pika_rm->LostConnection(master_ip_, master_port_); @@ -701,12 +706,14 @@ bool PikaServer::SetMaster(std::string& master_ip, int master_port) { if (master_ip == "127.0.0.1") { master_ip = host_; } + int tmp_role = role_; std::lock_guard l(state_protector_); if (((role_ ^ PIKA_ROLE_SLAVE) != 0) && repl_state_ == PIKA_REPL_NO_CONNECT) { master_ip_ = master_ip; master_port_ = master_port; role_ |= PIKA_ROLE_SLAVE; repl_state_ = PIKA_REPL_SHOULD_META_SYNC; + LOG(WARNING) << "setmaster role change from " << tmp_role << " to " << role_; return true; } return false; diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index f99de0b431..00814f15d9 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -540,6 +540,7 @@ Status Redis::ReOpenRocksDB(const storage::StorageOptions& opt) { } Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { + LOG(WARNING) << "is_old_master: " << is_old_master << " is_new_master: " << is_new_master; if (is_old_master && is_new_master) { // Do nothing return Status::OK(); @@ -577,6 +578,7 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { } uint64_t remote_manifest_sequence = 0; cfs_->GetMaxManifestSequenceFromCurrentManifest(db_->GetName(), &remote_manifest_sequence); + LOG(WARNING) << "switchmaster, remote_manifest_sequence: " << remote_manifest_sequence << " local_manifest_sequence: " << local_manifest_sequence; // local version behind remote, directly reopen if (local_manifest_sequence < remote_manifest_sequence) { return ReOpenRocksDB(storage_options); From bcd9d5cf1a7ecaf025eb8459ed79fbb7a0284084 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 2 Apr 2024 19:32:22 +0800 Subject: [PATCH 067/116] fix replication error && add debug log --- include/pika_binlog.h | 2 +- include/pika_binlog_reader.h | 6 ++++++ include/pika_cloud_binlog.h | 4 ++-- src/pika_admin.cc | 21 ++++++++++++--------- src/pika_binlog.cc | 4 ++-- src/pika_cloud_binlog.cc | 24 ++++++++---------------- src/pika_db.cc | 14 +++++++++++--- src/pika_repl_bgworker.cc | 6 +++++- src/pika_rm.cc | 10 ++++++---- src/pika_server.cc | 7 +++++++ src/storage/src/redis.cc | 30 +++++++++++++++++++++++++++--- src/storage/src/redis.h | 2 ++ 12 files changed, 89 insertions(+), 41 deletions(-) diff --git a/include/pika_binlog.h b/include/pika_binlog.h index d107873f06..980a668bbb 100644 --- a/include/pika_binlog.h +++ b/include/pika_binlog.h @@ -58,7 +58,7 @@ class Binlog : public pstd::WalWriter { virtual pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); - virtual pstd::Status GetOldestBinlogToKeep(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); + virtual pstd::Status GetOldestBinlogToKeep(uint32_t* filenum, uint32_t* term = nullptr, uint64_t* logic_id = nullptr); /* * Set Producer pro_num and pro_offset with lock */ diff --git a/include/pika_binlog_reader.h b/include/pika_binlog_reader.h index 1d604b02f7..c23591f962 100644 --- a/include/pika_binlog_reader.h +++ b/include/pika_binlog_reader.h @@ -27,6 +27,12 @@ class PikaBinlogReader { bool ReadToTheEnd(); void GetReaderStatus(uint32_t* cur_filenum, uint64_t* cur_offset); + static void GetFirstOffset(const std::shared_ptr& logger, uint32_t filenum, uint64_t* offset) { + PikaBinlogReader reader; + reader.Seek(logger, filenum, 0); + reader.GetReaderStatus(&filenum, offset); + } + private: bool GetNext(uint64_t* size); unsigned int ReadPhysicalRecord(pstd::Slice* result, uint32_t* filenum, uint64_t* offset); diff --git a/include/pika_cloud_binlog.h b/include/pika_cloud_binlog.h index 7de2306436..869587ddbc 100644 --- a/include/pika_cloud_binlog.h +++ b/include/pika_cloud_binlog.h @@ -56,7 +56,7 @@ class CloudBinlog : public Binlog { pstd::Status GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr) override; - pstd::Status GetOldestBinlogToKeep(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term = nullptr, uint64_t* logic_id = nullptr) override; + pstd::Status GetOldestBinlogToKeep(uint32_t* filenum, uint32_t* term = nullptr, uint64_t* logic_id = nullptr) override; /* * Set Producer pro_num and pro_offset with lock */ @@ -112,7 +112,7 @@ class CloudBinlog : public Binlog { std::atomic binlog_io_error_; - std::unordered_map> binlog_to_keep_; + std::unordered_map binlog_to_keep_; }; #endif diff --git a/src/pika_admin.cc b/src/pika_admin.cc index ed0ea4dee2..609ab3e00c 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -150,13 +150,16 @@ void SlaveofCmd::Do() { return; } + bool is_old_master = !(g_pika_server->role() == PIKA_ROLE_SLAVE); + LOG(WARNING) << "slaveofcmd, currently role: " << g_pika_server->role(); + g_pika_server->RemoveMaster(); if (is_none_) { - if (g_pika_conf->pika_model() == PIKA_CLOUD && g_pika_server->role() == PIKA_ROLE_SLAVE) { + if (g_pika_conf->pika_model() == PIKA_CLOUD) { std::shared_lock rwl(g_pika_server->dbs_rw_); for (const auto& db_item : g_pika_server->dbs_) { - db_item.second->SwitchMaster(false, true); + db_item.second->SwitchMaster(is_old_master, true); } } res_.SetRes(CmdRes::kOk); @@ -168,15 +171,15 @@ void SlaveofCmd::Do() { * the data synchronization was successful, but only changes the status of the * slaveof executor to slave */ - bool sm_ret = g_pika_server->SetMaster(master_ip_, static_cast(master_port_)); + if (g_pika_conf->pika_model() == PIKA_CLOUD) { + std::shared_lock rwl(g_pika_server->dbs_rw_); + for (const auto& db_item : g_pika_server->dbs_) { + db_item.second->SwitchMaster(is_old_master, false); + } + } + bool sm_ret = g_pika_server->SetMaster(master_ip_, static_cast(master_port_)); if (sm_ret) { - if (g_pika_conf->pika_model() == PIKA_CLOUD && g_pika_server->role() == PIKA_ROLE_MASTER) { - std::shared_lock rwl(g_pika_server->dbs_rw_); - for (const auto& db_item : g_pika_server->dbs_) { - db_item.second->SwitchMaster(true, false); - } - } res_.SetRes(CmdRes::kOk); g_pika_server->ClearCacheDbAsync(db_); g_pika_conf->SetSlaveof(master_ip_ + ":" + std::to_string(master_port_)); diff --git a/src/pika_binlog.cc b/src/pika_binlog.cc index 7049d88251..7972a2ae6b 100644 --- a/src/pika_binlog.cc +++ b/src/pika_binlog.cc @@ -165,8 +165,8 @@ Status Binlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, uint32 return Status::OK(); } -Status Binlog::GetOldestBinlogToKeep(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term, uint64_t* logic_id) { - return Status::NotSupported("not supported in cloud mode"); +Status Binlog::GetOldestBinlogToKeep(uint32_t* filenum, uint32_t* term, uint64_t* logic_id) { + return Status::NotSupported("not supported in local mode"); } Status Binlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksdb_id, uint32_t type) { diff --git a/src/pika_cloud_binlog.cc b/src/pika_cloud_binlog.cc index bbd12e16c5..f1f2f22823 100644 --- a/src/pika_cloud_binlog.cc +++ b/src/pika_cloud_binlog.cc @@ -39,8 +39,6 @@ Status CloudVersion::StableSave() { memcpy(p, &term_, sizeof(uint32_t)); p += 4; memcpy(p, &keep_filenum_, sizeof(uint32_t)); - p += 4; - memcpy(p, &keep_offset_, sizeof(uint64_t)); return Status::OK(); } @@ -51,7 +49,6 @@ Status CloudVersion::Init() { memcpy(reinterpret_cast(&pro_offset_), save_->GetData() + 4, sizeof(uint64_t)); memcpy(reinterpret_cast(&term_), save_->GetData() + 12, sizeof(uint32_t)); memcpy(reinterpret_cast(&keep_filenum_), save_->GetData() + 16, sizeof(uint32_t)); - memcpy(reinterpret_cast(&pro_offset_), save_->GetData() + 20, sizeof(uint64_t)); return Status::OK(); } else { return Status::Corruption("version init error"); @@ -163,18 +160,17 @@ Status CloudBinlog::GetProducerStatus(uint32_t* filenum, uint64_t* pro_offset, u return Status::OK(); } -Status CloudBinlog::GetOldestBinlogToKeep(uint32_t* filenum, uint64_t* pro_offset, uint32_t* term, uint64_t* logic_id) { +Status CloudBinlog::GetOldestBinlogToKeep(uint32_t* filenum, uint32_t* term, uint64_t* logic_id) { if (!opened_.load()) { return Status::Busy("Cloud Binlog is not open yet"); } std::shared_lock l(version_->rwlock_); *filenum = version_->keep_filenum_; - *pro_offset = version_->keep_offset_; if (term) { *term = version_->term_; } - LOG(WARNING) << "GetOldestBinlogToKeep keep_filenum: " << *filenum << " keep_offset: " << *pro_offset; + LOG(WARNING) << "oldest binlog filenum to keep is: " << *filenum; return Status::OK(); } @@ -214,22 +210,18 @@ Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksd if (!s.ok()) { binlog_io_error_.store(true); } + // record first binlog item and manifest update binlog item if (type != 0 || binlog_to_keep_.find(rocksdb_id) == binlog_to_keep_.end()) { - binlog_to_keep_[rocksdb_id] = std::make_pair(filenum, offset); + binlog_to_keep_[rocksdb_id] = filenum; } - uint32_t keep_filenum = filenum; - uint64_t keep_offset = keep_offset; + uint32_t keep_filenum = binlog_to_keep_.begin()->second; for (const auto& offset : binlog_to_keep_) { - if (keep_filenum >= offset.second.first && - keep_offset >= offset.second.second) { - keep_filenum = offset.second.first; - keep_offset = offset.second.second; - } + keep_filenum = std::min(keep_filenum, offset.second); } + version_->keep_filenum_ = keep_filenum; - version_->keep_offset_ = keep_offset; - LOG(WARNING) << "keep_filenum: " << keep_filenum << " keep_offset: " << keep_offset; + LOG(WARNING) << "rocksdb_id: " << rocksdb_id << "type: " << type << " oldest filenum to keep: " << keep_filenum; return s; } diff --git a/src/pika_db.cc b/src/pika_db.cc index 73cf21a7b2..b684ec7aa7 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -371,16 +371,24 @@ bool DB::InitBgsaveEngine() { std::lock_guard lock(db_rwlock_); LogOffset bgsave_offset; // term, index are 0 - db->Logger()->GetOldestBinlogToKeep(&(bgsave_offset.b_offset.filenum), &(bgsave_offset.b_offset.offset)); +#ifdef USE_S3 + db->Logger()->GetOldestBinlogToKeep(&(bgsave_offset.b_offset.filenum)); + PikaBinlogReader::GetFirstOffset(db->Logger(), bgsave_offset.b_offset.filenum, &bgsave_offset.b_offset.offset); + LOG(WARNING) << "bgsave info binlog filenum: " << bgsave_offset.b_offset.filenum << " offset: " << bgsave_offset.b_offset.offset; +#else + db->Logger()->GetProducerStatus(&(bgsave_offset.b_offset.filenum), &(bgsave_offset.b_offset.offset)); +#endif { std::lock_guard l(bgsave_protector_); bgsave_info_.offset = bgsave_offset; } +#ifndef USE_S3 s = bgsave_engine_->SetBackupContent(); if (!s.ok()) { LOG(WARNING) << db_name_ << " set backup content failed " << s.ToString(); return false; } +#endif } return true; } @@ -512,13 +520,13 @@ bool DB::TryUpdateMasterOffset() { << ", offset: " << offset << ", term: " << term << ", index: " << index; pstd::DeleteFile(info_path); -/* +#ifndef USE_S3 if (!ChangeDb(dbsync_path_)) { LOG(WARNING) << "DB: " << db_name_ << ", Failed to change db"; slave_db->SetReplState(ReplState::kError); return false; } -*/ +#endif // Update master offset std::shared_ptr master_db = diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index d309b01320..ddb58709c4 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -83,6 +83,8 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { } } + LOG(WARNING) << "slave receive binlogsync, begin offset: "<< pb_begin.ToString() << " end offset: " << pb_end.ToString(); + if (pb_begin == LogOffset()) { only_keepalive = true; } @@ -132,6 +134,7 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { // empty binlog treated as keepalive packet if (binlog_res.binlog().empty()) { + LOG(WARNING) << "slave receive empty binlog item"; continue; } @@ -152,7 +155,7 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { } db->Logger()->Put(binlog_item.content(), binlog_item.db_id(), binlog_item.rocksdb_id(), binlog_item.type()); auto storage = g_pika_server->GetDB(worker->db_name_)->storage(); - if (storage->ShouldSkip(binlog_item.rocksdb_id(), binlog_item.content())) { + if (binlog_item.type() == 0 && storage->ShouldSkip(binlog_item.rocksdb_id(), binlog_item.content())) { continue; } auto s = storage->ApplyWAL(binlog_item.rocksdb_id(), binlog_item.type(), binlog_item.content()); @@ -192,6 +195,7 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { ack_end = productor_status; ack_end.l_offset.term = pb_end.l_offset.term; } + LOG(WARNING) << "slave Reply to master, ack_start: "<< ack_start.ToString() << " ack_end: " << ack_end.ToString() << "pb_end: " << pb_end.ToString(); g_pika_rm->SendBinlogSyncAckRequest(db_name, ack_start, ack_end); } diff --git a/src/pika_rm.cc b/src/pika_rm.cc index 9b01ac064b..7d871e61e7 100644 --- a/src/pika_rm.cc +++ b/src/pika_rm.cc @@ -294,13 +294,13 @@ Status SyncMasterDB::GetSafetyPurgeBinlog(std::string* safety_purge) { } } #ifdef USE_S3 - BinlogOffset old_offset; - s = Logger()->GetOldestBinlogToKeep(&old_offset.filenum, &old_offset.offset); + uint32_t oldest_filenum; + s = Logger()->GetOldestBinlogToKeep(&oldest_filenum); if (!s.ok()) { LOG(ERROR) << "get oldest binlog to keep failed"; } - LOG(WARNING) << "GetSafetyPurgeBinlog, origin filenum: " << purge_max << " oldest log to keep: " << old_offset.filenum; - purge_max = std::min(purge_max, old_offset.filenum - 2); + oldest_filenum = oldest_filenum > 0 ? oldest_filenum - 1 : 0; + purge_max = std::min(purge_max, oldest_filenum); #endif } *safety_purge = (success ? kBinlogPrefix + std::to_string(static_cast(purge_max)) : "none"); @@ -793,8 +793,10 @@ Status PikaReplicaManager::CheckDBRole(const std::string& db, int* role) { (sync_master_dbs_[p_info]->GetNumberOfSlaveNode() == 0 && sync_slave_dbs_[p_info]->State() == kNoConnect)) { *role |= PIKA_ROLE_MASTER; + LOG(WARNING) << "role change to PIKA_ROLE_MASTER"; } if (sync_slave_dbs_[p_info]->State() != ReplState::kNoConnect) { + LOG(WARNING) << "role change to PIKA_ROLE_SLAVE"; *role |= PIKA_ROLE_SLAVE; } // if role is not master or slave, the rest situations are all single diff --git a/src/pika_server.cc b/src/pika_server.cc index 144853ec2b..759e5ccb83 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -547,7 +547,9 @@ Status PikaServer::DoSameThingEveryDB(const TaskType& type) { void PikaServer::BecomeMaster() { std::lock_guard l(state_protector_); + int tmp_role = role_; role_ |= PIKA_ROLE_MASTER; + LOG(WARNING) << "role change from " << tmp_role << " to " << role_; } void PikaServer::DeleteSlave(int fd) { @@ -680,10 +682,13 @@ void PikaServer::SyncError() { void PikaServer::RemoveMaster() { { + int tmp_role = role_; std::lock_guard l(state_protector_); repl_state_ = PIKA_REPL_NO_CONNECT; role_ &= ~PIKA_ROLE_SLAVE; + LOG(WARNING) << "removemaster role change from " << tmp_role << " to " << role_; + if (!master_ip_.empty() && master_port_ != -1) { g_pika_rm->CloseReplClientConn(master_ip_, master_port_ + kPortShiftReplServer); g_pika_rm->LostConnection(master_ip_, master_port_); @@ -701,12 +706,14 @@ bool PikaServer::SetMaster(std::string& master_ip, int master_port) { if (master_ip == "127.0.0.1") { master_ip = host_; } + int tmp_role = role_; std::lock_guard l(state_protector_); if (((role_ ^ PIKA_ROLE_SLAVE) != 0) && repl_state_ == PIKA_REPL_NO_CONNECT) { master_ip_ = master_ip; master_port_ = master_port; role_ |= PIKA_ROLE_SLAVE; repl_state_ = PIKA_REPL_SHOULD_META_SYNC; + LOG(WARNING) << "setmaster role change from " << tmp_role << " to " << role_; return true; } return false; diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index f99de0b431..f9504d8b52 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -15,6 +15,8 @@ #include "src/base_filter.h" #include "src/zsets_filter.h" +#include "pstd/include/pstd_defer.h" + namespace storage { const rocksdb::Comparator* ListsDataKeyComparator() { static ListsDataKeyComparatorImpl ldkc; @@ -79,9 +81,9 @@ Status Redis::Open(const StorageOptions& tmp_storage_options, const std::string& storage_options.cloud_fs_options.resync_on_open = true; storage_options.cloud_fs_options.resync_manifest_on_open = true; storage_options.cloud_fs_options.skip_dbid_verification = true; - if (tmp_storage_options.cloud_fs_options.is_master) { - storage_options.options.replication_log_listener = log_listener_; - } else { + storage_options.options.replication_log_listener = log_listener_; + is_master_.store(tmp_storage_options.cloud_fs_options.is_master); + if (!tmp_storage_options.cloud_fs_options.is_master) { storage_options.options.disable_auto_flush = true; storage_options.options.disable_auto_compactions = true; } @@ -534,12 +536,18 @@ Status Redis::OpenCloudEnv(rocksdb::CloudFileSystemOptions opts, const std::stri } Status Redis::ReOpenRocksDB(const storage::StorageOptions& opt) { + LOG(WARNING) << "ReOpenRocksDB, closing old rocksdb"; Close(); + LOG(WARNING) << "ReOpenRocksDB, opening new rocksdb"; Open(opt, db_path_); return Status::OK(); } Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { + DEFER { + LOG(WARNING) << "is_old_master: " << is_old_master << " is_new_master: " << is_new_master << " done"; + }; + LOG(WARNING) << "is_old_master: " << is_old_master << " is_new_master: " << is_new_master; if (is_old_master && is_new_master) { // Do nothing return Status::OK(); @@ -548,7 +556,9 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { storage::StorageOptions storage_options(storage_options_); std::unordered_map db_options; if (is_old_master && !is_new_master) { + cfs_->SwitchMaster(false); storage_options.cloud_fs_options.is_master = false; + is_master_.store(false); db_options["disable_auto_compactions"] = "true"; db_options["disable_auto_flush"] = "true"; for (const auto& cf : handles_) { @@ -561,6 +571,7 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { // slaveof another pika master, just reopen if (!is_old_master && !is_new_master) { storage_options.cloud_fs_options.is_master = false; + is_master_.store(false); return ReOpenRocksDB(storage_options); } @@ -577,22 +588,27 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { } uint64_t remote_manifest_sequence = 0; cfs_->GetMaxManifestSequenceFromCurrentManifest(db_->GetName(), &remote_manifest_sequence); + LOG(WARNING) << "switchmaster, remote_manifest_sequence: " << remote_manifest_sequence << " local_manifest_sequence: " << local_manifest_sequence; // local version behind remote, directly reopen if (local_manifest_sequence < remote_manifest_sequence) { return ReOpenRocksDB(storage_options); } // local's version cannot beyond remote's, just holding extra data in memtables assert(local_manifest_sequence == remote_manifest_sequence); + storage_options_.cloud_fs_options.is_master = true; + is_master_.store(true); db_->NewManifestOnNextUpdate(); cfs_->SwitchMaster(true); for (const auto& cf : handles_) { db_->SetOptions(cf, db_options); } + LOG(WARNING) << "flush memtables ..."; rocksdb::FlushOptions fops; fops.wait = true; db_->Flush(fops, handles_); + LOG(WARNING) << "flush memtables done"; return Status::OK(); } return Status::OK(); @@ -628,6 +644,14 @@ std::string LogListener::OnReplicationLogRecord(rocksdb::ReplicationLogRecord re LOG(WARNING) << "rocksdb not opened yet, skip write binlog"; return "0"; } + + if (!redis_inst->IsMaster()) { + LOG(WARNING) << "rocksdb not master, skip write binlog"; + return "0"; + } + + LOG(WARNING) << "write binlogitem " << " db_id: " << db_id << " type: " << record.type; + auto s = wal_writer_->Put(record.contents, db_id, redis_inst->GetIndex(), uint32_t(record.type)); if (!s.ok()) { diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 4bbd4f3321..15a006e88e 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -46,6 +46,7 @@ class Redis { #ifdef USE_S3 rocksdb::DBCloud* GetDB() { return db_; } + bool IsMaster() const { return is_master_.load(); } #else rocksdb::DB* GetDB() { return db_; } #endif @@ -447,6 +448,7 @@ class Redis { rocksdb::DBCloud* db_ = nullptr; std::shared_ptr log_listener_; StorageOptions storage_options_; + std::atomic is_master_ = {true}; #else rocksdb::DB* db_ = nullptr; #endif From 61180004f8365883e4e683de6640f5dccce2091c Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Sun, 7 Apr 2024 12:08:43 +0800 Subject: [PATCH 068/116] fix switchmaster bug --- src/storage/src/redis.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index f9504d8b52..d1bf3ad96e 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -66,7 +66,6 @@ void Redis::Close() { delete default_compact_range_options_.canceled; } #ifdef USE_S3 - log_listener_.reset(); opened_ = false; #endif } @@ -88,6 +87,7 @@ Status Redis::Open(const StorageOptions& tmp_storage_options, const std::string& storage_options.options.disable_auto_compactions = true; } storage_options.options.atomic_flush = true; + storage_options.options.avoid_flush_during_shutdown = true; #endif statistics_store_->SetCapacity(storage_options.statistics_max_size); @@ -559,12 +559,6 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { cfs_->SwitchMaster(false); storage_options.cloud_fs_options.is_master = false; is_master_.store(false); - db_options["disable_auto_compactions"] = "true"; - db_options["disable_auto_flush"] = "true"; - for (const auto& cf : handles_) { - db_->SetOptions(cf, db_options); - } - cfs_->SwitchMaster(false); return ReOpenRocksDB(storage_options); } From 82fa0cc8001a968e3546c4cf75dfc74ad5cd0a86 Mon Sep 17 00:00:00 2001 From: baixin Date: Sun, 7 Apr 2024 12:54:21 +0800 Subject: [PATCH 069/116] fix sentinel bug --- codis/config/dashboard.toml | 2 +- codis/pkg/topom/config.go | 2 +- codis/pkg/topom/topom_sentinel.go | 8 ++++-- codis/pkg/utils/redis/codis_sentinel.go | 14 ++++++---- include/pika_server.h | 10 +++++++ src/pika_admin.cc | 35 ++++++++++++++----------- 6 files changed, 46 insertions(+), 25 deletions(-) diff --git a/codis/config/dashboard.toml b/codis/config/dashboard.toml index dbc48f5609..14b7d16210 100644 --- a/codis/config/dashboard.toml +++ b/codis/config/dashboard.toml @@ -44,7 +44,7 @@ sentinel_down_after = "30s" sentinel_failover_timeout = "5m" sentinel_notification_script = "" sentinel_client_reconfig_script = "" -sentinel_pika_local_model = true +sentinel_pika_local_model = false cloud_access_key = "minioadmin" cloud_secret_key = "minioadmin" cloud_endpoint_override = "http://10.224.129.40:9000" diff --git a/codis/pkg/topom/config.go b/codis/pkg/topom/config.go index ff898d0fba..a37819d31c 100644 --- a/codis/pkg/topom/config.go +++ b/codis/pkg/topom/config.go @@ -61,7 +61,7 @@ sentinel_down_after = "30s" sentinel_failover_timeout = "5m" sentinel_notification_script = "" sentinel_client_reconfig_script = "" -sentinel_pika_local_model = true +sentinel_pika_local_model = false cloud_access_key = "minioadmin" cloud_secret_key = "minioadmin" cloud_endpoint_override = "http://10.224.129.40:9000" diff --git a/codis/pkg/topom/topom_sentinel.go b/codis/pkg/topom/topom_sentinel.go index cec28cb809..e661310eb0 100644 --- a/codis/pkg/topom/topom_sentinel.go +++ b/codis/pkg/topom/topom_sentinel.go @@ -24,9 +24,13 @@ func (s *Topom) CheckStateAndSwitchSlavesAndMasters(filter func(index int, g *mo if s.Config().SentinelPikaLocalModel { states = checkGroupServersReplicationState(s.Config(), groupServers) } else { - var groups_info map[int]int + groups_info := make(map[int]int) for gid, _ := range groupServers { - groups_info[gid] = ctx.group[gid].TermId + group, err := ctx.getGroup(gid) + if err != nil { + continue + } + groups_info[gid] = group.TermId } states = checkGroupServersPKPingState(s.Config(), groupServers, groups_info) } diff --git a/codis/pkg/utils/redis/codis_sentinel.go b/codis/pkg/utils/redis/codis_sentinel.go index 58fab5ead8..f8f9b89da1 100644 --- a/codis/pkg/utils/redis/codis_sentinel.go +++ b/codis/pkg/utils/redis/codis_sentinel.go @@ -135,7 +135,7 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClient(parallel int, groupServers type GroupInfo struct { GroupId int `json:"group_id"` TermId int `json:"term_id"` - MastersAddr []string `json:"master_addr"` + MastersAddr []string `json:"masters_addr"` SlavesAddr []string `json:"slaves_addr"` } @@ -152,8 +152,9 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClientWithPKPing(parallel int, gr var fut sync2.Future //build pkping parameter + groups_parameter := make(map[int]GroupInfo) for gid, servers := range groupServers { - var group_info GroupInfo + group_info := groups_parameter[gid] group_info.GroupId = gid group_info.TermId = groups_info[gid] for _, server := range servers { @@ -165,10 +166,13 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClientWithPKPing(parallel int, gr group_info.SlavesAddr = append(group_info.SlavesAddr, server.Addr) } } + groups_parameter[gid] = group_info + } - group_inf_json, err := json.Marshal(group_info) + for gid, servers := range groupServers { + group_info_json, err := json.Marshal(groups_parameter[gid]) if err != nil { - log.WarnErrorf(err, "json: %s Serialization Failure failed", group_inf_json) + log.WarnErrorf(err, "json: %s Serialization Failure failed", group_info_json) } for index, server := range servers { limit <- struct{}{} @@ -180,7 +184,7 @@ func (s *CodisSentinel) RefreshMastersAndSlavesClientWithPKPing(parallel int, gr fut.Done(fmt.Sprintf("%d_%d", gid, index), state) <-limit }() - info, err := s.PkPingDispatch(server.Addr, group_inf_json) + info, err := s.PkPingDispatch(server.Addr, group_info_json) state = &ReplicationState{ Index: index, GroupID: gid, diff --git a/include/pika_server.h b/include/pika_server.h index 5619f2ee4a..902cc8f6b2 100644 --- a/include/pika_server.h +++ b/include/pika_server.h @@ -502,6 +502,16 @@ class PikaServer : public pstd::noncopyable { */ int64_t GetLastSave() const {return lastsave_;} void UpdateLastSave(int64_t lastsave) {lastsave_ = lastsave;} + + /*term_id used*/ +#ifdef USE_S3 + void set_lease_term_id(const std::string& lease_term_id) {lease_term_id_ = lease_term_id;} + void set_group_id(const std::string& group_id) {group_id_ = group_id;} + + std::string lease_term_id() const {return lease_term_id_;} + std::string group_id() const {return group_id_;} +#endif + private: /* * TimingTask use diff --git a/src/pika_admin.cc b/src/pika_admin.cc index ed0ea4dee2..2a408825ec 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -3243,33 +3243,36 @@ void PKPingCmd::DoInitial() { group_id_ = jw.GetInt64("group_id"); term_id_ = jw.GetInt64("term_id"); - - - auto jsonArrayView = jw.GetArray("mastersAddr"); - size_t arraySize = jsonArrayView.GetLength(); - for (size_t i = 0; i < arraySize; ++i) { - if (jsonArrayView[i].IsString()) { - masters_addr_.push_back(jsonArrayView[i].AsString()); + if (jw.ValueExists("masters_addr")) { + auto jsonArrayView = jw.GetArray("masters_addr"); + size_t arraySize = jsonArrayView.GetLength(); + for (size_t i = 0; i < arraySize; ++i) { + if (jsonArrayView[i].IsString()) { + masters_addr_.push_back(jsonArrayView[i].AsString()); + } + } } - } - jsonArrayView = jw.GetArray("slavesAddr"); - arraySize = jsonArrayView.GetLength(); - for (size_t i = 0; i < arraySize; ++i) { - if (jsonArrayView[i].IsString()) { - slaves_addr_.push_back(jsonArrayView[i].AsString()); + if (jw.ValueExists("slaves_addr")) { + auto jsonArrayView = jw.GetArray("slaves_addr"); + size_t arraySize = jsonArrayView.GetLength(); + for (size_t i = 0; i < arraySize; ++i) { + if (jsonArrayView[i].IsString()) { + slaves_addr_.push_back(jsonArrayView[i].AsString()); + } } } +#ifdef USE_S3 if (g_pika_server->role() == PIKA_ROLE_MASTER) { for (auto const& slave : g_pika_server->slaves_) { if (std::find(masters_addr_.begin(), masters_addr_.end(), slave.ip_port) != masters_addr_.end()) { - //waiting todo :合并代码后 更新groupid 和 term_id - break; + g_pika_server->set_group_id(std::to_string(group_id_)); + g_pika_server->set_lease_term_id(std::to_string(term_id_)); } } } - +#endif } void PKPingCmd::Do() { From 3adb893f8e3c45c0d4e8bcec7b934093a4cc4b1f Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Sun, 7 Apr 2024 19:53:08 +0800 Subject: [PATCH 070/116] change data type of lease_term_id and group_id --- include/pika_server.h | 4 ++-- src/pika_server.cc | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/pika_server.h b/include/pika_server.h index 5619f2ee4a..feb1386f25 100644 --- a/include/pika_server.h +++ b/include/pika_server.h @@ -522,8 +522,8 @@ class PikaServer : public pstd::noncopyable { std::string sentinel_addr_; //TODO(wangshaoyi): make it thread loacal std::shared_ptr sentinel_client_; - std::string lease_term_id_; - std::string group_id_; + int lease_term_id_; + int group_id_; #endif std::shared_mutex storage_options_rw_; diff --git a/src/pika_server.cc b/src/pika_server.cc index 759e5ccb83..9c456ff308 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1431,7 +1431,7 @@ void PikaServer::InitStorageOptions() { cloud_fs_opts.dest_bucket.SetRegion(g_pika_conf->cloud_dest_bucket_region()); //for test //cloud_fs_opts.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, - // std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); + // std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); #endif } @@ -1860,8 +1860,8 @@ bool PikaServer::UploadMetaToSentinel(const std::string& local_path, // construct request body Json::JsonValue request_doc; - request_doc.WithString("term_id", Aws::String(lease_term_id_)); - request_doc.WithString("group_id", Aws::String(group_id_)); + request_doc.WithInteger("term_id", lease_term_id_); + request_doc.WithInteger("group_id", group_id_); request_doc.WithString("s3_bucket", Aws::String(s3_bucket)); request_doc.WithString("s3_path", Aws::String(remote_path)); request_doc.WithString("content", Aws::String(content)); From c5b59e1bd60d1d8dde4192942e51ab93c82ecd6e Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 8 Apr 2024 17:40:38 +0800 Subject: [PATCH 071/116] fix code conflict --- include/pika_server.h | 7 ++----- src/pika_admin.cc | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/include/pika_server.h b/include/pika_server.h index 76a4616756..ca958dfa68 100644 --- a/include/pika_server.h +++ b/include/pika_server.h @@ -505,11 +505,8 @@ class PikaServer : public pstd::noncopyable { /*term_id used*/ #ifdef USE_S3 - void set_lease_term_id(const std::string& lease_term_id) {lease_term_id_ = lease_term_id;} - void set_group_id(const std::string& group_id) {group_id_ = group_id;} - - std::string lease_term_id() const {return lease_term_id_;} - std::string group_id() const {return group_id_;} + void set_lease_term_id(const int lease_term_id) {lease_term_id_ = lease_term_id;} + void set_group_id(const int group_id) {group_id_ = group_id;} #endif private: diff --git a/src/pika_admin.cc b/src/pika_admin.cc index 991a8bd548..5abceed1dd 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -3270,8 +3270,8 @@ void PKPingCmd::DoInitial() { if (g_pika_server->role() == PIKA_ROLE_MASTER) { for (auto const& slave : g_pika_server->slaves_) { if (std::find(masters_addr_.begin(), masters_addr_.end(), slave.ip_port) != masters_addr_.end()) { - g_pika_server->set_group_id(std::to_string(group_id_)); - g_pika_server->set_lease_term_id(std::to_string(term_id_)); + g_pika_server->set_group_id(group_id_); + g_pika_server->set_lease_term_id(term_id_); } } } From 8bd11f7df6e745c55f43ce522ae6cd69909f6d5c Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 8 Apr 2024 17:51:19 +0800 Subject: [PATCH 072/116] clean code --- src/pika_server.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/pika_server.cc b/src/pika_server.cc index 9c456ff308..b9007df3c2 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1429,9 +1429,8 @@ void PikaServer::InitStorageOptions() { cloud_fs_opts.src_bucket.SetRegion(g_pika_conf->cloud_src_bucket_region()); cloud_fs_opts.dest_bucket.SetBucketName(g_pika_conf->cloud_dest_bucket_suffix(), g_pika_conf->cloud_dest_bucket_prefix()); cloud_fs_opts.dest_bucket.SetRegion(g_pika_conf->cloud_dest_bucket_region()); - //for test - //cloud_fs_opts.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, - // std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); + cloud_fs_opts.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); #endif } From 2674fa834fd506875b532df6b45ead7765215c3b Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Mon, 8 Apr 2024 19:47:02 +0800 Subject: [PATCH 073/116] fix binlog sync error --- src/pika_repl_bgworker.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index ddb58709c4..7d4f3e98b7 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -153,7 +153,7 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { slave_db->SetReplState(ReplState::kTryConnect); return; } - db->Logger()->Put(binlog_item.content(), binlog_item.db_id(), binlog_item.rocksdb_id(), binlog_item.type()); + db->Logger()->Put(binlog_res.binlog()); auto storage = g_pika_server->GetDB(worker->db_name_)->storage(); if (binlog_item.type() == 0 && storage->ShouldSkip(binlog_item.rocksdb_id(), binlog_item.content())) { continue; From eee0e10d5d16bb1e0a0a4a89ceeb8c5fc699438a Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 8 Apr 2024 19:54:13 +0800 Subject: [PATCH 074/116] change pika_model to pika_mode --- include/pika_conf.h | 6 +++--- src/pika_admin.cc | 6 +++--- src/pika_command.cc | 2 +- src/pika_conf.cc | 4 ++-- src/pika_consensus.cc | 4 ++-- src/pika_repl_bgworker.cc | 2 +- src/pika_repl_server_conn.cc | 2 +- src/pika_rm.cc | 4 ++-- src/pika_stable_log.cc | 8 ++++---- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/include/pika_conf.h b/include/pika_conf.h index faa0e376c2..1ef749e869 100644 --- a/include/pika_conf.h +++ b/include/pika_conf.h @@ -314,7 +314,7 @@ class PikaConf : public pstd::BaseConf { return network_interface_; } int cache_model() { return cache_model_; } - int pika_model() { return pika_model_; } + int pika_mode() { return pika_mode_; } int sync_window_size() { return sync_window_size_.load(); } int max_conn_rbuf_size() { return max_conn_rbuf_size_.load(); } int consensus_level() { return consensus_level_.load(); } @@ -351,7 +351,7 @@ class PikaConf : public pstd::BaseConf { void SetCacheMaxmemoryPolicy(const int value) { cache_maxmemory_policy_ = value; } void SetCacheMaxmemorySamples(const int value) { cache_maxmemory_samples_ = value; } void SetCacheLFUDecayTime(const int value) { cache_lfu_decay_time_ = value; } - void SetPikaModel(const int value) { pika_model_ = value; } + void SetPikaModel(const int value) { pika_mode_ = value; } void UnsetCacheDisableFlag() { tmp_cache_disable_flag_ = false; } bool enable_blob_files() { return enable_blob_files_; } int64_t min_blob_size() { return min_blob_size_; } @@ -814,7 +814,7 @@ class PikaConf : public pstd::BaseConf { std::atomic_int cache_lfu_decay_time_; //pika model - int32_t pika_model_; + int32_t pika_mode_; // rocksdb blob bool enable_blob_files_ = false; diff --git a/src/pika_admin.cc b/src/pika_admin.cc index 5abceed1dd..ced5eecb4e 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -156,7 +156,7 @@ void SlaveofCmd::Do() { g_pika_server->RemoveMaster(); if (is_none_) { - if (g_pika_conf->pika_model() == PIKA_CLOUD) { + if (g_pika_conf->pika_mode() == PIKA_CLOUD) { std::shared_lock rwl(g_pika_server->dbs_rw_); for (const auto& db_item : g_pika_server->dbs_) { db_item.second->SwitchMaster(is_old_master, true); @@ -171,7 +171,7 @@ void SlaveofCmd::Do() { * the data synchronization was successful, but only changes the status of the * slaveof executor to slave */ - if (g_pika_conf->pika_model() == PIKA_CLOUD) { + if (g_pika_conf->pika_mode() == PIKA_CLOUD) { std::shared_lock rwl(g_pika_server->dbs_rw_); for (const auto& db_item : g_pika_server->dbs_) { db_item.second->SwitchMaster(is_old_master, false); @@ -2918,7 +2918,7 @@ void PaddingCmd::DoInitial() { void PaddingCmd::Do() { res_.SetRes(CmdRes::kOk); } std::string PaddingCmd::ToRedisProtocol() { - if (g_pika_conf->pika_model() == PIKA_CLOUD) { + if (g_pika_conf->pika_mode() == PIKA_CLOUD) { return PikaBinlogTransverter::ConstructPaddingBinlog(BinlogType::TypeFirst, argv_[1].size()); } return PikaBinlogTransverter::ConstructPaddingBinlog( diff --git a/src/pika_command.cc b/src/pika_command.cc index 95660c87e0..f7b9820f70 100644 --- a/src/pika_command.cc +++ b/src/pika_command.cc @@ -884,7 +884,7 @@ void Cmd::InternalProcessCommand(const HintKeys& hint_keys) { do_duration_ += pstd::NowMicros() - start_us; } - if (g_pika_conf->pika_model() == PIKA_LOCAL) { + if (g_pika_conf->pika_mode() == PIKA_LOCAL) { DoBinlog(); } diff --git a/src/pika_conf.cc b/src/pika_conf.cc index 2b49f991cb..8698cfdd33 100644 --- a/src/pika_conf.cc +++ b/src/pika_conf.cc @@ -486,8 +486,8 @@ int PikaConf::Load() { int cache_num = 16; GetConfInt("cache-num", &cache_num); cache_num_ = (0 >= cache_num || 48 < cache_num) ? 16 : cache_num; - //bx pika init pika_model - pika_model_ = PIKA_CLOUD; + //todo: pika init pika_mode from conf + pika_mode_ = PIKA_CLOUD; int cache_model = 0; GetConfInt("cache-model", &cache_model); diff --git a/src/pika_consensus.cc b/src/pika_consensus.cc index 9a7debefa5..83247ed1f9 100644 --- a/src/pika_consensus.cc +++ b/src/pika_consensus.cc @@ -32,7 +32,7 @@ Status Context::StableSave() { memcpy(p, &(applied_index_.b_offset.offset), sizeof(uint64_t)); p += 8; memcpy(p, &(applied_index_.l_offset.term), sizeof(uint32_t)); - if (g_pika_conf->pika_model() == PIKA_LOCAL) { + if (g_pika_conf->pika_mode() == PIKA_LOCAL) { p += 4; memcpy(p, &(applied_index_.l_offset.index), sizeof(uint64_t)); } @@ -58,7 +58,7 @@ Status Context::Init() { memcpy(reinterpret_cast(&(applied_index_.b_offset.filenum)), save_->GetData(), sizeof(uint32_t)); memcpy(reinterpret_cast(&(applied_index_.b_offset.offset)), save_->GetData() + 4, sizeof(uint64_t)); memcpy(reinterpret_cast(&(applied_index_.l_offset.term)), save_->GetData() + 12, sizeof(uint32_t)); - if (g_pika_conf->pika_model() == PIKA_LOCAL) { + if (g_pika_conf->pika_mode() == PIKA_LOCAL) { memcpy(reinterpret_cast(&(applied_index_.l_offset.index)), save_->GetData() + 16, sizeof(uint64_t)); } return Status::OK(); diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index ddb58709c4..d2b921b011 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -138,7 +138,7 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { continue; } - if (g_pika_conf->pika_model() == PIKA_CLOUD) { + if (g_pika_conf->pika_mode() == PIKA_CLOUD) { cloud::BinlogCloudItem binlog_item; if (!PikaCloudBinlogTransverter::BinlogItemWithoutContentDecode(binlog_res.binlog(), &binlog_item)) { LOG(WARNING) << "Cloud Binlog item decode failed"; diff --git a/src/pika_repl_server_conn.cc b/src/pika_repl_server_conn.cc index 7e5132f94c..a210d711d1 100644 --- a/src/pika_repl_server_conn.cc +++ b/src/pika_repl_server_conn.cc @@ -123,7 +123,7 @@ void PikaReplServerConn::HandleTrySyncRequest(void* arg) { response.set_code(InnerMessage::kOk); } //In cloud mode, only full synchronization is possible. - if (g_pika_conf->pika_model() == PIKA_CLOUD) { + if (g_pika_conf->pika_mode() == PIKA_CLOUD) { if (pre_success) { if (!db->CheckSlaveNodeExist(node.ip(), node.port())) { try_sync_response->set_reply_code(InnerMessage::InnerResponse::TrySync::kSyncPointBePurged); diff --git a/src/pika_rm.cc b/src/pika_rm.cc index 7d871e61e7..8ebabe32ee 100644 --- a/src/pika_rm.cc +++ b/src/pika_rm.cc @@ -166,7 +166,7 @@ Status SyncMasterDB::ReadBinlogFileToWq(const std::shared_ptr& slave_ } BinlogItem item; cloud::BinlogCloudItem cloud_item; - if (g_pika_conf->pika_model() == PIKA_CLOUD){ + if (g_pika_conf->pika_mode() == PIKA_CLOUD){ if (!PikaCloudBinlogTransverter::BinlogItemWithoutContentDecode(msg, &cloud_item)) { return Status::Corruption("Binlog item decode failed"); } @@ -179,7 +179,7 @@ Status SyncMasterDB::ReadBinlogFileToWq(const std::shared_ptr& slave_ BinlogOffset sent_b_offset = BinlogOffset(filenum, offset); LogicOffset sent_l_offset; - if (g_pika_conf->pika_model() == PIKA_CLOUD){ + if (g_pika_conf->pika_mode() == PIKA_CLOUD){ sent_l_offset = LogicOffset(cloud_item.term_id(), 0); } else { sent_l_offset = LogicOffset(item.term_id(), item.logic_id()); diff --git a/src/pika_stable_log.cc b/src/pika_stable_log.cc index b9cf747ebe..6be340643a 100644 --- a/src/pika_stable_log.cc +++ b/src/pika_stable_log.cc @@ -23,9 +23,9 @@ extern std::unique_ptr g_pika_rm; StableLog::StableLog(std::string db_name, std::string log_path) : purging_(false), db_name_(std::move(db_name)), log_path_(std::move(log_path)) { - if (g_pika_conf->pika_model() == PIKA_LOCAL) { + if (g_pika_conf->pika_mode() == PIKA_LOCAL) { stable_logger_ = std::make_shared(log_path_, g_pika_conf->binlog_file_size()); - } else if (g_pika_conf->pika_model() == PIKA_CLOUD) { + } else if (g_pika_conf->pika_mode() == PIKA_CLOUD) { stable_logger_ = std::make_shared(log_path_, g_pika_conf->binlog_file_size()); } std::map binlogs; @@ -188,7 +188,7 @@ void StableLog::UpdateFirstOffset(uint32_t filenum) { LOG(WARNING) << "Binlog reader get failed"; return; } - if (g_pika_conf->pika_model() == PIKA_CLOUD) { + if (g_pika_conf->pika_mode() == PIKA_CLOUD) { if (!PikaCloudBinlogTransverter::BinlogItemWithoutContentDecode(binlog, &cloud_item)) { LOG(WARNING) << "Cloud Binlog item decode failed"; return; @@ -211,7 +211,7 @@ void StableLog::UpdateFirstOffset(uint32_t filenum) { std::lock_guard l(offset_rwlock_); first_offset_.b_offset = offset; - if (g_pika_conf->pika_model() == PIKA_CLOUD) { + if (g_pika_conf->pika_mode() == PIKA_CLOUD) { first_offset_.l_offset.term = cloud_item.term_id(); } else { first_offset_.l_offset.term = item.term_id(); From 88afec2f8be419f6d9e9ce3e0ca85a5a79766744 Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 8 Apr 2024 20:52:12 +0800 Subject: [PATCH 075/116] change model to mode --- codis/pkg/topom/config.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codis/pkg/topom/config.go b/codis/pkg/topom/config.go index a37819d31c..6e496ed827 100644 --- a/codis/pkg/topom/config.go +++ b/codis/pkg/topom/config.go @@ -61,7 +61,7 @@ sentinel_down_after = "30s" sentinel_failover_timeout = "5m" sentinel_notification_script = "" sentinel_client_reconfig_script = "" -sentinel_pika_local_model = false +sentinel_pika_local_mode = false cloud_access_key = "minioadmin" cloud_secret_key = "minioadmin" cloud_endpoint_override = "http://10.224.129.40:9000" @@ -100,7 +100,7 @@ type Config struct { SentinelFailoverTimeout timesize.Duration `toml:"sentinel_failover_timeout" json:"sentinel_failover_timeout"` SentinelNotificationScript string `toml:"sentinel_notification_script" json:"sentinel_notification_script"` SentinelClientReconfigScript string `toml:"sentinel_client_reconfig_script" json:"sentinel_client_reconfig_script"` - SentinelPikaLocalModel bool `toml:"sentinel_pika_local_model" json:"sentinel_pika_local_model"` + SentinelPikaLocalMode bool `toml:"sentinel_pika_local_mode" json:"sentinel_pika_local_mode"` CloudAccessKey string `toml:"cloud_access_key" json:"cloud_access_key"` CloudSecretKey string `toml:"cloud_secret_key" json:"cloud_secret_key"` CloudEndPointOverride string `toml:"cloud_endpoint_override" json:"cloud_endpoint_override"` From 988ccef58013e3a510bebdb3adc336a12a901800 Mon Sep 17 00:00:00 2001 From: baixin Date: Tue, 9 Apr 2024 10:37:44 +0800 Subject: [PATCH 076/116] change model to mode --- codis/pkg/topom/topom_sentinel.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codis/pkg/topom/topom_sentinel.go b/codis/pkg/topom/topom_sentinel.go index e661310eb0..a06f073e85 100644 --- a/codis/pkg/topom/topom_sentinel.go +++ b/codis/pkg/topom/topom_sentinel.go @@ -21,7 +21,7 @@ func (s *Topom) CheckStateAndSwitchSlavesAndMasters(filter func(index int, g *mo if len(groupServers) == 0 { return nil } - if s.Config().SentinelPikaLocalModel { + if s.Config().SentinelPikaLocalMode { states = checkGroupServersReplicationState(s.Config(), groupServers) } else { groups_info := make(map[int]int) From 312c49536b1983762c733a379b5ffec62f157f3d Mon Sep 17 00:00:00 2001 From: baixin Date: Tue, 9 Apr 2024 16:10:17 +0800 Subject: [PATCH 077/116] del XXX.so --- CMakeLists.txt | 15 +++------------ src/storage/tests/CMakeLists.txt | 9 +++------ 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eb18ea339e..092a62d4c1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -848,9 +848,9 @@ target_link_directories(${PROJECT_NAME} PUBLIC ${INSTALL_LIBDIR}) if (USE_S3) -find_package(AWSSDK REQUIRED COMPONENTS) -target_link_libraries(${PROJECT_NAME} - ${AWSSDK_LINK_LIBRARIES}) + find_package(AWSSDK REQUIRED COMPONENTS s3 transfer kinesis) + include_directories(${AWS_INCLUDE_DIR}) + target_link_libraries(${PROJECT_NAME} ${AWSSDK_LINK_LIBRARIES}) endif() add_dependencies(${PROJECT_NAME} @@ -898,15 +898,6 @@ target_link_libraries(${PROJECT_NAME} ${LIBUNWIND_LIBRARY} ${JEMALLOC_LIBRARY}) -if (USE_S3) - target_link_libraries(${PROJECT_NAME} - libaws-cpp-sdk-core.so - libaws-cpp-sdk-transfer.so - libaws-cpp-sdk-kinesis.so - libaws-cpp-sdk-s3.so - ) -endif() - option(USE_SSL "Enable SSL support" OFF) add_custom_target( clang-tidy diff --git a/src/storage/tests/CMakeLists.txt b/src/storage/tests/CMakeLists.txt index 3f7ede4439..063f9b8794 100644 --- a/src/storage/tests/CMakeLists.txt +++ b/src/storage/tests/CMakeLists.txt @@ -34,13 +34,10 @@ foreach(blackwindow_test_source ${BLACKWINDOW_TEST_SOURCE}) PUBLIC ${LIBUNWIND_LIBRARY} ) if (USE_S3) - target_link_libraries(${blackwindow_test_name} - PUBLIC libaws-cpp-sdk-core.so - PUBLIC libaws-cpp-sdk-transfer.so - PUBLIC libaws-cpp-sdk-kinesis.so - PUBLIC libaws-cpp-sdk-s3.so - ) + find_package(AWSSDK REQUIRED COMPONENTS s3 transfer kinesis) + target_link_libraries(${blackwindow_test_name} PUBLIC ${AWSSDK_LINK_LIBRARIES}) endif() + add_test(NAME ${blackwindow_test_name} COMMAND ${blackwindow_test_name} WORKING_DIRECTORY .) From c0c34fc8d29283621a853c5f38c740c2353e28ee Mon Sep 17 00:00:00 2001 From: baixin Date: Wed, 10 Apr 2024 13:20:38 +0800 Subject: [PATCH 078/116] change model --- codis/example/dashboard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codis/example/dashboard.py b/codis/example/dashboard.py index 3d5fafce5c..845aceab6d 100644 --- a/codis/example/dashboard.py +++ b/codis/example/dashboard.py @@ -44,7 +44,7 @@ def _open_config(admin_port, product_name, product_auth=None): path = os.getcwd() f.write('sentinel_notification_script = "{}"\n'.format(os.path.join(path, "sentinel_notify.sh"))) f.write('sentinel_client_reconfig_script = "{}"\n'.format(os.path.join(path, "sentinel_reconfig.sh")) - f.write('sentinel_pika_local_model = "true"\n') + f.write('sentinel_pika_local_mode = "true"\n') f.write('cloud_access_key = "minioadmin"\n') f.write('cloud_secret_key = "minioadmin"\n') f.write('cloud_endpoint_override = "http://10.224.129.40:9000"\n') From 820149006d5bd87c999f4af14d5238cd2fd2d540 Mon Sep 17 00:00:00 2001 From: baixin Date: Wed, 10 Apr 2024 13:31:03 +0800 Subject: [PATCH 079/116] change modelF --- codis/config/dashboard.toml | 2 +- include/pika_conf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/codis/config/dashboard.toml b/codis/config/dashboard.toml index 14b7d16210..4cb924ed3e 100644 --- a/codis/config/dashboard.toml +++ b/codis/config/dashboard.toml @@ -44,7 +44,7 @@ sentinel_down_after = "30s" sentinel_failover_timeout = "5m" sentinel_notification_script = "" sentinel_client_reconfig_script = "" -sentinel_pika_local_model = false +sentinel_pika_local_mode = false cloud_access_key = "minioadmin" cloud_secret_key = "minioadmin" cloud_endpoint_override = "http://10.224.129.40:9000" diff --git a/include/pika_conf.h b/include/pika_conf.h index 1ef749e869..3a64e36d0d 100644 --- a/include/pika_conf.h +++ b/include/pika_conf.h @@ -351,7 +351,7 @@ class PikaConf : public pstd::BaseConf { void SetCacheMaxmemoryPolicy(const int value) { cache_maxmemory_policy_ = value; } void SetCacheMaxmemorySamples(const int value) { cache_maxmemory_samples_ = value; } void SetCacheLFUDecayTime(const int value) { cache_lfu_decay_time_ = value; } - void SetPikaModel(const int value) { pika_mode_ = value; } + void SetPikaMode(const int value) { pika_mode_ = value; } void UnsetCacheDisableFlag() { tmp_cache_disable_flag_ = false; } bool enable_blob_files() { return enable_blob_files_; } int64_t min_blob_size() { return min_blob_size_; } From 537ff7094bc6d6602682f289473b3f5fd8bdf001 Mon Sep 17 00:00:00 2001 From: baixin Date: Wed, 10 Apr 2024 13:35:02 +0800 Subject: [PATCH 080/116] change model --- include/pika_conf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pika_conf.h b/include/pika_conf.h index 3a64e36d0d..4b57e429f9 100644 --- a/include/pika_conf.h +++ b/include/pika_conf.h @@ -813,7 +813,7 @@ class PikaConf : public pstd::BaseConf { std::atomic_int cache_maxmemory_samples_; std::atomic_int cache_lfu_decay_time_; - //pika model + //pika mode int32_t pika_mode_; // rocksdb blob From 5e6d68e18e77a10b27db780caca08be2be9c6f6b Mon Sep 17 00:00:00 2001 From: baixin Date: Wed, 10 Apr 2024 19:08:59 +0800 Subject: [PATCH 081/116] delete def use_s3 --- include/pika_conf.h | 4 ---- include/pika_define.h | 9 +-------- src/pika_admin.cc | 17 +++++++++-------- src/pika_conf.cc | 2 -- src/pika_db.cc | 2 -- src/pika_repl_client_conn.cc | 14 +++++++------- src/pika_repl_server_conn.cc | 12 ++++++------ src/pika_server.cc | 17 +++++++++-------- 8 files changed, 32 insertions(+), 45 deletions(-) diff --git a/include/pika_conf.h b/include/pika_conf.h index 4b57e429f9..2b1f23e00b 100644 --- a/include/pika_conf.h +++ b/include/pika_conf.h @@ -398,7 +398,6 @@ class PikaConf : public pstd::BaseConf { uint32_t acl_pubsub_default() { return acl_pubsub_default_.load(); } uint32_t acl_log_max_len() { return acl_Log_max_len_.load(); } -#ifdef USE_S3 // rocksdb-cloud options std::string cloud_endpoint_override() { return cloud_endpoint_override_; } std::string cloud_access_key() { return cloud_access_key_; } @@ -409,7 +408,6 @@ class PikaConf : public pstd::BaseConf { std::string cloud_dest_bucket_prefix() { return cloud_dest_bucket_prefix_; } std::string cloud_dest_bucket_suffix() { return cloud_dest_bucket_suffix_; } std::string cloud_dest_bucket_region() { return cloud_dest_bucket_region_; } -#endif // Setter void SetPort(const int value) { @@ -827,7 +825,6 @@ class PikaConf : public pstd::BaseConf { int64_t blob_file_size_ = 256 * 1024 * 1024; // 256M std::string blob_compression_type_ = "none"; -#ifdef USE_S3 // rocksdb-cloud options std::string cloud_endpoint_override_; std::string cloud_access_key_; @@ -840,7 +837,6 @@ class PikaConf : public pstd::BaseConf { std::string cloud_dest_bucket_prefix_ = "pika."; std::string cloud_dest_bucket_suffix_ = "database"; std::string cloud_dest_bucket_region_; -#endif std::shared_mutex rwlock_; diff --git a/include/pika_define.h b/include/pika_define.h index 8b87663411..cf2062b63f 100644 --- a/include/pika_define.h +++ b/include/pika_define.h @@ -45,7 +45,7 @@ const int kMaxRsyncParallelNum = 4; struct DBStruct { DBStruct(std::string tn, int32_t inst_num) : db_name(std::move(tn)), db_instance_num(inst_num) {} -#ifdef USE_S3 + DBStruct(std::string tn, int32_t inst_num, std::string cloud_endpoint_override, std::string cloud_bucket_prefix, std::string cloud_bucket_suffix, std::string cloud_bucket_region) : db_name(std::move(tn)), @@ -54,27 +54,20 @@ struct DBStruct { cloud_bucket_prefix(std::move(cloud_bucket_prefix)), cloud_bucket_suffix(std::move(cloud_bucket_suffix)), cloud_bucket_region(std::move(cloud_bucket_region)) {} -#endif bool operator==(const DBStruct& db_struct) const { -#ifdef USE_S3 return db_name == db_struct.db_name && db_instance_num == db_struct.db_instance_num && cloud_endpoint_override == db_struct.cloud_endpoint_override && cloud_bucket_prefix == db_struct.cloud_bucket_prefix && cloud_bucket_suffix == db_struct.cloud_bucket_suffix && cloud_bucket_region == db_struct.cloud_bucket_region; -#endif - return db_name == db_struct.db_name && db_instance_num == db_struct.db_instance_num; } std::string db_name; int32_t db_instance_num = 0; -#ifdef USE_S3 - // s3 meta std::string cloud_endpoint_override; std::string cloud_bucket_prefix; std::string cloud_bucket_suffix; std::string cloud_bucket_region; -#endif }; struct SlaveItem { diff --git a/src/pika_admin.cc b/src/pika_admin.cc index ced5eecb4e..b60f74ac5d 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -3266,16 +3266,17 @@ void PKPingCmd::DoInitial() { } } -#ifdef USE_S3 - if (g_pika_server->role() == PIKA_ROLE_MASTER) { - for (auto const& slave : g_pika_server->slaves_) { - if (std::find(masters_addr_.begin(), masters_addr_.end(), slave.ip_port) != masters_addr_.end()) { - g_pika_server->set_group_id(group_id_); - g_pika_server->set_lease_term_id(term_id_); - } + if (g_pika_conf->pika_mode() == PIKA_CLOUD) { + if (g_pika_server->role() == PIKA_ROLE_MASTER) { + for (auto const& slave : g_pika_server->slaves_) { + if (std::find(masters_addr_.begin(), masters_addr_.end(), slave.ip_port) != masters_addr_.end()) { + g_pika_server->set_group_id(group_id_); + g_pika_server->set_lease_term_id(term_id_); + } + } } } -#endif + } void PKPingCmd::Do() { diff --git a/src/pika_conf.cc b/src/pika_conf.cc index 8698cfdd33..47b6f9bf59 100644 --- a/src/pika_conf.cc +++ b/src/pika_conf.cc @@ -580,7 +580,6 @@ int PikaConf::Load() { max_rsync_parallel_num_ = 4; } -#ifdef USE_S3 // rocksdb-cloud options GetConfStr("cloud-endpoint-override", &cloud_endpoint_override_); GetConfStr("cloud-access-key", &cloud_access_key_); @@ -591,7 +590,6 @@ int PikaConf::Load() { GetConfStr("cloud-dest-bucket-prefix", &cloud_dest_bucket_prefix_); GetConfStr("cloud-dest-bucket-suffix", &cloud_dest_bucket_suffix_); GetConfStr("cloud-dest-bucket-region", &cloud_dest_bucket_region_); -#endif return ret; } diff --git a/src/pika_db.cc b/src/pika_db.cc index b684ec7aa7..e662030eea 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -657,8 +657,6 @@ bool DB::FlushDB() { return FlushDBWithoutLock(); } -#ifdef USE_S3 rocksdb::Status DB::SwitchMaster(bool is_old_master, bool is_new_master) { return storage_->SwitchMaster(is_old_master, is_new_master); -#endif } diff --git a/src/pika_repl_client_conn.cc b/src/pika_repl_client_conn.cc index cd9482e811..8db9383c47 100644 --- a/src/pika_repl_client_conn.cc +++ b/src/pika_repl_client_conn.cc @@ -110,13 +110,13 @@ void PikaReplClientConn::HandleMetaSyncResponse(void* arg) { std::vector master_db_structs; for (int idx = 0; idx < meta_sync.dbs_info_size(); ++idx) { const InnerMessage::InnerResponse_MetaSync_DBInfo& db_info = meta_sync.dbs_info(idx); -#ifdef USE_S3 - master_db_structs.push_back({db_info.db_name(), db_info.db_instance_num(), - db_info.cloud_endpoint_override(), db_info.cloud_bucket_prefix(), - db_info.cloud_bucket_suffix(), db_info.cloud_bucket_region()}); -#else - master_db_structs.push_back({db_info.db_name(), db_info.db_instance_num()}); -#endif + if (g_pika_conf->pika_mode() == PIKA_CLOUD) { + master_db_structs.push_back({db_info.db_name(), db_info.db_instance_num(), + db_info.cloud_endpoint_override(), db_info.cloud_bucket_prefix(), + db_info.cloud_bucket_suffix(), db_info.cloud_bucket_region()}); + } else { + master_db_structs.push_back({db_info.db_name(), db_info.db_instance_num()}); + } } std::vector self_db_structs = g_pika_conf->db_structs(); diff --git a/src/pika_repl_server_conn.cc b/src/pika_repl_server_conn.cc index a210d711d1..ed337e9109 100644 --- a/src/pika_repl_server_conn.cc +++ b/src/pika_repl_server_conn.cc @@ -65,12 +65,12 @@ void PikaReplServerConn::HandleMetaSyncRequest(void* arg) { */ db_info->set_slot_num(1); db_info->set_db_instance_num(db_struct.db_instance_num); -#ifdef USE_S3 - db_info->set_cloud_endpoint_override(db_struct.cloud_endpoint_override); - db_info->set_cloud_bucket_prefix(db_struct.cloud_bucket_prefix); - db_info->set_cloud_bucket_suffix(db_struct.cloud_bucket_suffix); - db_info->set_cloud_bucket_region(db_struct.cloud_bucket_region); -#endif + if (g_pika_conf->pika_mode() == PIKA_CLOUD) { + db_info->set_cloud_endpoint_override(db_struct.cloud_endpoint_override); + db_info->set_cloud_bucket_prefix(db_struct.cloud_bucket_prefix); + db_info->set_cloud_bucket_suffix(db_struct.cloud_bucket_suffix); + db_info->set_cloud_bucket_region(db_struct.cloud_bucket_region); + } } } } diff --git a/src/pika_server.cc b/src/pika_server.cc index b9007df3c2..b81813242b 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -340,12 +340,12 @@ void PikaServer::InitDBStruct() { std::shared_ptr db_ptr = std::make_shared(name, db_path, log_path); db_ptr->Init(); dbs_.emplace(name, db_ptr); -#ifdef USE_S3 - db.cloud_endpoint_override = g_pika_conf->cloud_endpoint_override(); - db.cloud_bucket_prefix = g_pika_conf->cloud_src_bucket_prefix(); - db.cloud_bucket_suffix = g_pika_conf->cloud_src_bucket_prefix(); - db.cloud_bucket_region = g_pika_conf->cloud_src_bucket_region(); -#endif + if (g_pika_conf->pika_mode() == PIKA_CLOUD) { + db.cloud_endpoint_override = g_pika_conf->cloud_endpoint_override(); + db.cloud_bucket_prefix = g_pika_conf->cloud_src_bucket_prefix(); + db.cloud_bucket_suffix = g_pika_conf->cloud_src_bucket_prefix(); + db.cloud_bucket_region = g_pika_conf->cloud_src_bucket_region(); + } } } @@ -1429,8 +1429,8 @@ void PikaServer::InitStorageOptions() { cloud_fs_opts.src_bucket.SetRegion(g_pika_conf->cloud_src_bucket_region()); cloud_fs_opts.dest_bucket.SetBucketName(g_pika_conf->cloud_dest_bucket_suffix(), g_pika_conf->cloud_dest_bucket_prefix()); cloud_fs_opts.dest_bucket.SetRegion(g_pika_conf->cloud_dest_bucket_region()); - cloud_fs_opts.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, - std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); + //cloud_fs_opts.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, + // std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); #endif } @@ -1827,6 +1827,7 @@ void PikaServer::CacheConfigInit(cache::CacheConfig& cache_cfg) { bool PikaServer::UploadMetaToSentinel(const std::string& local_path, const std::string& s3_bucket, const std::string& remote_path) { + sentinel_addr_ = "http://127.0.0.1:18080/api/topom/upload-s3"; Aws::String url(sentinel_addr_); if (sentinel_client_ == nullptr) { sentinel_client_ = CreateHttpClient(Aws::Client::ClientConfiguration()); From db2aae53509ec7c9438f11f0c963b87f95a3473a Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Fri, 12 Apr 2024 14:51:16 +0800 Subject: [PATCH 082/116] pika encode manifest content before upload to sentinel --- CMakeLists.txt | 4 +++- src/pika_server.cc | 59 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 092a62d4c1..b4ddee3e37 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -896,7 +896,9 @@ target_link_libraries(${PROJECT_NAME} libz.a librediscache.a ${LIBUNWIND_LIBRARY} - ${JEMALLOC_LIBRARY}) + ${JEMALLOC_LIBRARY} + ssl + crypto) option(USE_SSL "Enable SSL support" OFF) add_custom_target( diff --git a/src/pika_server.cc b/src/pika_server.cc index b81813242b..48738d6a6c 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -24,6 +24,9 @@ #include #include #include +#include +#include +#include #include "net/include/net_cli.h" #include "net/include/net_interfaces.h" @@ -53,6 +56,51 @@ extern std::unique_ptr g_network_statistic; // QUEUE_SIZE_THRESHOLD_PERCENTAGE is used to represent a percentage value and should be within the range of 0 to 100. const size_t QUEUE_SIZE_THRESHOLD_PERCENTAGE = 75; +namespace { +char * base64Encode(const char *buffer, int length, bool newLine) +{ + BIO *bmem = NULL; + BIO *b64 = NULL; + BUF_MEM *bptr; + + b64 = BIO_new(BIO_f_base64()); + if (!newLine) { + BIO_set_flags(b64, BIO_FLAGS_BASE64_NO_NL); + } + bmem = BIO_new(BIO_s_mem()); + b64 = BIO_push(b64, bmem); + BIO_write(b64, buffer, length); + BIO_flush(b64); + BIO_get_mem_ptr(b64, &bptr); + BIO_set_close(b64, BIO_NOCLOSE); + + char *buff = (char *)malloc(bptr->length + 1); + memcpy(buff, bptr->data, bptr->length); + buff[bptr->length] = 0; + BIO_free_all(b64); + + return buff; +} + +char * base64Decode(char *input, int length, bool newLine) +{ + BIO *b64 = NULL; + BIO *bmem = NULL; + char *buffer = (char *)malloc(length); + memset(buffer, 0, length); + b64 = BIO_new(BIO_f_base64()); + if (!newLine) { + BIO_set_flags(b64, BIO_FLAGS_BASE64_NO_NL); + } + bmem = BIO_new_mem_buf(input, length); + bmem = BIO_push(b64, bmem); + BIO_read(bmem, buffer, length); + BIO_free_all(bmem); + + return buffer; +} +} + void DoPurgeDir(void* arg) { std::unique_ptr path(static_cast(arg)); LOG(INFO) << "Delete dir: " << *path << " start"; @@ -1429,8 +1477,8 @@ void PikaServer::InitStorageOptions() { cloud_fs_opts.src_bucket.SetRegion(g_pika_conf->cloud_src_bucket_region()); cloud_fs_opts.dest_bucket.SetBucketName(g_pika_conf->cloud_dest_bucket_suffix(), g_pika_conf->cloud_dest_bucket_prefix()); cloud_fs_opts.dest_bucket.SetRegion(g_pika_conf->cloud_dest_bucket_region()); - //cloud_fs_opts.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, - // std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); + cloud_fs_opts.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); #endif } @@ -1827,7 +1875,7 @@ void PikaServer::CacheConfigInit(cache::CacheConfig& cache_cfg) { bool PikaServer::UploadMetaToSentinel(const std::string& local_path, const std::string& s3_bucket, const std::string& remote_path) { - sentinel_addr_ = "http://127.0.0.1:18080/api/topom/upload-s3"; + sentinel_addr_ = "http://127.0.0.1:9876/api/topom/upload-s3"; Aws::String url(sentinel_addr_); if (sentinel_client_ == nullptr) { sentinel_client_ = CreateHttpClient(Aws::Client::ClientConfiguration()); @@ -1856,7 +1904,10 @@ bool PikaServer::UploadMetaToSentinel(const std::string& local_path, LOG(WARNING) << "read file failed, local_path: " << local_path << " fread size: " << result << "fsize: " << f_size; } - std::string content(buffer, result); + char* base64_enc = base64Encode(buffer, result, false); + std::string content(base64_enc, strlen(base64_enc)); + LOG(WARNING) << "raw data size: " << result << " encode size: " << strlen(base64_enc) << " enc str: " << base64_enc; + free(base64_enc); // construct request body Json::JsonValue request_doc; From f1f75331c7998913c7dfb22d0d1c8e796f31b55c Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Thu, 25 Apr 2024 18:53:57 +0800 Subject: [PATCH 083/116] support flushdb command --- CMakeLists.txt | 4 +- src/pika_admin.cc | 4 ++ src/pika_command.cc | 4 +- src/pika_db.cc | 6 +++ src/pika_repl_bgworker.cc | 13 +++++- src/storage/include/storage/storage.h | 3 ++ src/storage/include/storage/storage_define.h | 12 +++++ src/storage/src/redis.cc | 48 +++++++++++++++++++- src/storage/src/redis.h | 3 ++ src/storage/src/storage.cc | 15 ++++++ 10 files changed, 105 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 092a62d4c1..d0d587c6ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -590,9 +590,9 @@ ExternalProject_Add(rocksdb ${LIBJEMALLOC_NAME} URL #temporary for debug, skip download from github - http://10.224.129.40:8000/rocksdb_cloud.tar.gz + http://10.224.129.40:8000/archive/rocksdb_cloud.tar.gz URL_HASH - MD5=fbcf0e166bfddf367063caa1dc583db8 + MD5=5f4b946417d6585431138a54e35cff29 DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND diff --git a/src/pika_admin.cc b/src/pika_admin.cc index b60f74ac5d..a73c2644e8 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -599,7 +599,9 @@ void FlushallCmd::FlushAllWithoutLock() { return; } DoWithoutLock(db); +#ifndef USE_S3 DoBinlog(g_pika_rm->GetSyncMasterDBs()[p_info]); +#endif } if (res_.ok()) { res_.SetRes(CmdRes::kOk); @@ -686,7 +688,9 @@ void FlushdbCmd::FlushAllDBsWithoutLock() { return; } DoWithoutLock(); +#ifndef USE_S3 DoBinlog(); +#endif } void FlushdbCmd::DoWithoutLock() { diff --git a/src/pika_command.cc b/src/pika_command.cc index f7b9820f70..6a6265f211 100644 --- a/src/pika_command.cc +++ b/src/pika_command.cc @@ -54,10 +54,10 @@ void InitCmdTable(CmdTable* cmd_table) { cmd_table->insert(std::pair>(kCmdNameBgsave, std::move(bgsaveptr))); std::unique_ptr compactptr = - std::make_unique(kCmdNameCompact, -1, kCmdFlagsRead | kCmdFlagsAdmin | kCmdFlagsSlow); + std::make_unique(kCmdNameCompact, -1, kCmdFlagsRead | kCmdFlagsAdmin | kCmdFlagsSlow | kCmdFlagsSuspend); cmd_table->insert(std::pair>(kCmdNameCompact, std::move(compactptr))); - std::unique_ptr compactrangeptr = std::make_unique(kCmdNameCompactRange, 5, kCmdFlagsRead | kCmdFlagsAdmin); + std::unique_ptr compactrangeptr = std::make_unique(kCmdNameCompactRange, 5, kCmdFlagsRead | kCmdFlagsAdmin | kCmdFlagsSuspend); cmd_table->insert(std::pair>(kCmdNameCompactRange, std::move(compactrangeptr))); std::unique_ptr purgelogsto = std::make_unique(kCmdNamePurgelogsto, -2, kCmdFlagsRead | kCmdFlagsAdmin); diff --git a/src/pika_db.cc b/src/pika_db.cc index e662030eea..e04ff1cb8c 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -211,6 +211,12 @@ DisplayCacheInfo DB::GetCacheInfo() { } bool DB::FlushDBWithoutLock() { +#ifdef USE_S3 + LOG(INFO) << db_name_ << " flushing db..."; + auto st = storage_->FlushDB(); + LOG(INFO) << db_name_ << " flushing db done, status: " << st.ToString(); + return st.ok(); +#endif if (bgsave_info_.bgsaving) { return false; } diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index 792cf0538a..414c8b887c 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -14,6 +14,7 @@ #include "pstd/include/pstd_defer.h" #include "src/pstd/include/scope_record_lock.h" #include "pika_cloud_binlog.pb.h" +#include "storage/storage_define.h" extern PikaServer* g_pika_server; extern std::unique_ptr g_pika_rm; @@ -155,7 +156,17 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { } db->Logger()->Put(binlog_res.binlog()); auto storage = g_pika_server->GetDB(worker->db_name_)->storage(); - if (binlog_item.type() == 0 && storage->ShouldSkip(binlog_item.rocksdb_id(), binlog_item.content())) { + if (binlog_item.type() == storage::RocksDBRecordType::kMemtableWrite && + storage->ShouldSkip(binlog_item.rocksdb_id(), binlog_item.content())) { + continue; + } + if (binlog_item.type() == storage::RocksDBRecordType::kFlushDB) { + auto s = storage->FlushDBAtSlave(binlog_item.rocksdb_id()); + if (!s.ok()) { + slave_db->SetReplState(ReplState::kTryConnect); + LOG(WARNING) << "flushdb at slave node failed, error: " << s.ToString(); + return; + } continue; } auto s = storage->ApplyWAL(binlog_item.rocksdb_id(), binlog_item.type(), binlog_item.content()); diff --git a/src/storage/include/storage/storage.h b/src/storage/include/storage/storage.h index 0ef91d9526..e640629f96 100644 --- a/src/storage/include/storage/storage.h +++ b/src/storage/include/storage/storage.h @@ -191,6 +191,8 @@ class Storage { Status Open(const StorageOptions& storage_options, const std::string& db_path, std::shared_ptr wal_writer = nullptr); + Status FlushDB(); + Status LoadCursorStartKey(const DataType& dtype, int64_t cursor, char* type, std::string* start_key); Status StoreCursorStartKey(const DataType& dtype, int64_t cursor, char type, const std::string& next_key); @@ -202,6 +204,7 @@ class Storage { Status ApplyWAL(int rocksdb_id, int type, const std::string& content); bool ShouldSkip(int rocksdb_id, const std::string& content); + Status FlushDBAtSlave(int rocksdb_id); // Strings Commands diff --git a/src/storage/include/storage/storage_define.h b/src/storage/include/storage/storage_define.h index 7dbd614169..a52b158baa 100644 --- a/src/storage/include/storage/storage_define.h +++ b/src/storage/include/storage/storage_define.h @@ -129,5 +129,17 @@ inline const char* SeekUserkeyDelim(const char* ptr, int length) { return ptr; } +#ifdef USE_S3 + // this enum is an extention of ReplicationLogRecord's Type + // reserves kMemtableWrite, kMemtableSwitch, kManifestWrite, + // add kFlushDB which indicates a pika's flushdb call. + enum RocksDBRecordType : uint32_t { + kMemtableWrite, + kMemtableSwitch, + kManifestWrite, + kFlushDB = 10, // newly add enum, specity pika's flushdb operation + }; +#endif + } // end namespace storage #endif diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index d1bf3ad96e..984f112cdd 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -7,6 +7,8 @@ #include "rocksdb/env.h" #include "db/write_batch_internal.h" +#include "file/filename.h" +#include "cloud/filename.h" #include "src/redis.h" #include "rocksdb/options.h" @@ -32,7 +34,8 @@ Redis::Redis(Storage* const s, int32_t index, std::shared_ptr w : storage_(s), index_(index), lock_mgr_(std::make_shared(1000, 0, std::make_shared())), small_compaction_threshold_(5000), - small_compaction_duration_threshold_(10000) { + small_compaction_duration_threshold_(10000), + wal_writer_(wal_writer) { statistics_store_ = std::make_unique>(); scan_cursors_store_ = std::make_unique>(); spop_counts_store_ = std::make_unique>(); @@ -70,12 +73,53 @@ void Redis::Close() { #endif } +Status Redis::FlushDBAtSlave() { + Close(); + pstd::DeleteDir(db_path_); + auto s = Open(storage_options_, db_path_); + return s; +} + +Status Redis::FlushDB() { + rocksdb::CancelAllBackgroundWork(db_, true); + std::string s3_bucket = storage_options_.cloud_fs_options.dest_bucket.GetBucketName(); + std::string local_dbid; + auto s = ReadFileToString(cfs_->GetBaseFileSystem().get(), rocksdb::IdentityFileName(db_path_), &local_dbid); + LOG(INFO) << "local_dbid: " << local_dbid << " status: " << s.ToString(); + if (!s.ok()) { + return s; + } + s = cfs_->DeleteDbid(s3_bucket, local_dbid); + LOG(INFO) << " deletedbid status: " << s.ToString(); + if (!s.ok()) { + return s; + } + s = cfs_->DeleteCloudObject(s3_bucket, MakeCloudManifestFile(db_path_, "")); + LOG(INFO) << "deletecloudmanifestfromdest tatus: " << s.ToString(); + if (!s.ok()) { + return s; + } + s = cfs_->DeleteCloudObject(s3_bucket, rocksdb::IdentityFileName(db_path_)); + LOG(INFO) << "deleteidentityfile status: " << s.ToString(); + if (!s.ok()) { + return s; + } + cfs_->SwitchMaster(false); + Close(); + pstd::DeleteDir(db_path_); + wal_writer_->Put("flushdb", 0/*db_id*/, index_, kFlushDB); + Open(storage_options_, db_path_); + return s; +} + Status Redis::Open(const StorageOptions& tmp_storage_options, const std::string& db_path) { StorageOptions storage_options(tmp_storage_options); #ifdef USE_S3 db_path_ = db_path; storage_options_ = tmp_storage_options; + storage_options_.cloud_fs_options.dest_bucket.SetObjectPath(db_path_); + storage_options_.cloud_fs_options.src_bucket.SetObjectPath(db_path_); storage_options.cloud_fs_options.roll_cloud_manifest_on_open = true; storage_options.cloud_fs_options.resync_on_open = true; storage_options.cloud_fs_options.resync_manifest_on_open = true; @@ -647,7 +691,7 @@ std::string LogListener::OnReplicationLogRecord(rocksdb::ReplicationLogRecord re LOG(WARNING) << "write binlogitem " << " db_id: " << db_id << " type: " << record.type; auto s = wal_writer_->Put(record.contents, db_id, - redis_inst->GetIndex(), uint32_t(record.type)); + redis_inst->GetIndex(), RocksDBRecordType(record.type)); if (!s.ok()) { LOG(ERROR) << "write binlog failed, db_id: " << db_id << " rocksdb_id: " << redis_inst->GetIndex(); diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 15a006e88e..16ff551e66 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -115,6 +115,7 @@ class Redis { // Common Commands Status Open(const StorageOptions& storage_options, const std::string& db_path); void Close(); + Status FlushDB(); virtual Status CompactRange(const DataType& option_type, const rocksdb::Slice* begin, const rocksdb::Slice* end, const ColumnFamilyType& type = kMetaAndData); @@ -398,6 +399,7 @@ class Redis { #ifdef USE_S3 Status ApplyWAL(int type, const std::string& content); bool ShouldSkip(const std::string& content); + Status FlushDBAtSlave(); Status SwitchMaster(bool is_old_master, bool is_new_master); void ResetLogListener(std::shared_ptr handle) { log_listener_ = handle; @@ -447,6 +449,7 @@ class Redis { std::string db_path_; rocksdb::DBCloud* db_ = nullptr; std::shared_ptr log_listener_; + std::shared_ptr wal_writer_; StorageOptions storage_options_; std::atomic is_master_ = {true}; #else diff --git a/src/storage/src/storage.cc b/src/storage/src/storage.cc index 57c9671484..56a2859e2f 100644 --- a/src/storage/src/storage.cc +++ b/src/storage/src/storage.cc @@ -109,6 +109,16 @@ Status Storage::Open(const StorageOptions& storage_options, const std::string& d return Status::OK(); } +Status Storage::FlushDB() { + for (int index = 0; index < db_instance_num_; index++) { + auto s = insts_[index]->FlushDB(); + if (!s.ok()) { + return s; + } + } + return Status::OK(); +} + Status Storage::LoadCursorStartKey(const DataType& dtype, int64_t cursor, char* type, std::string* start_key) { std::string index_key = DataTypeTag[dtype] + std::to_string(cursor); std::string index_value; @@ -2479,6 +2489,11 @@ bool Storage::ShouldSkip(int rocksdb_id, const std::string& content) { auto& inst = insts_[rocksdb_id]; return inst->ShouldSkip(content); } + +Status Storage::FlushDBAtSlave(int rocksdb_id) { + auto& inst = insts_[rocksdb_id]; + return inst->FlushDBAtSlave(); +} #endif } // namespace storage From ad1f07a74765608132d39d55b7a39ef78af1c5fa Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Fri, 26 Apr 2024 12:14:30 +0800 Subject: [PATCH 084/116] support flushdb command --- src/storage/src/redis.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 984f112cdd..d9f12e1d3f 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -107,8 +107,8 @@ Status Redis::FlushDB() { cfs_->SwitchMaster(false); Close(); pstd::DeleteDir(db_path_); - wal_writer_->Put("flushdb", 0/*db_id*/, index_, kFlushDB); Open(storage_options_, db_path_); + wal_writer_->Put("flushdb", 0/*db_id*/, index_, kFlushDB); return s; } From 662cbc4e653ffb0b842295d124545028bf95388a Mon Sep 17 00:00:00 2001 From: baixin Date: Fri, 26 Apr 2024 14:22:45 +0800 Subject: [PATCH 085/116] support bgsave --- include/pika_db.h | 4 + src/pika_admin.cc | 4 + src/pika_db.cc | 34 ++ src/storage/tests/cloud_clone_test.cc | 442 ++++++++++++++++++++++++++ 4 files changed, 484 insertions(+) create mode 100644 src/storage/tests/cloud_clone_test.cc diff --git a/include/pika_db.h b/include/pika_db.h index 6d4ed980cc..396b211f7d 100644 --- a/include/pika_db.h +++ b/include/pika_db.h @@ -94,6 +94,7 @@ class DB : public std::enable_shared_from_this, public pstd::noncopyable { std::shared_ptr storage() const; void GetBgSaveMetaData(std::vector* fileNames, std::string* snapshot_uuid); void BgSaveDB(); + void BgSaveCloudDB(); void SetBinlogIoError(); void SetBinlogIoErrorrelieve(); bool IsBinlogIoError(); @@ -191,12 +192,15 @@ class DB : public std::enable_shared_from_this, public pstd::noncopyable { * BgSave use */ static void DoBgSave(void* arg); + static void DoBgCloudSave(void* arg); bool RunBgsaveEngine(); + bool RunBgsaveCloudEngine(); bool InitBgsaveEnv(); bool InitBgsaveEngine(); void ClearBgsave(); void FinishBgsave(); + void FinishBgsaveCloud(); BgSaveInfo bgsave_info_; pstd::Mutex bgsave_protector_; std::shared_ptr bgsave_engine_; diff --git a/src/pika_admin.cc b/src/pika_admin.cc index b60f74ac5d..97b4307496 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -2780,6 +2780,10 @@ void DelbackupCmd::DoInitial() { } void DelbackupCmd::Do() { + if (g_pika_conf->pika_mode() == PIKA_CLOUD) { + res_.SetRes(CmdRes::kOk); + return; + } std::string db_sync_prefix = g_pika_conf->bgsave_prefix(); std::string db_sync_path = g_pika_conf->bgsave_path(); std::vector dump_dir; diff --git a/src/pika_db.cc b/src/pika_db.cc index e662030eea..225796980c 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -74,6 +74,18 @@ void DB::BgSaveDB() { g_pika_server->BGSaveTaskSchedule(&DoBgSave, static_cast(bg_task_arg)); } +void DB::BgSaveCloudDB() { + std::shared_lock l(dbs_rw_); + std::lock_guard ml(bgsave_protector_); + if (bgsave_info_.bgsaving) { + return; + } + bgsave_info_.bgsaving = true; + auto bg_task_arg = new BgTaskArg(); + bg_task_arg->db = shared_from_this(); + g_pika_server->BGSaveTaskSchedule(&DoBgCloudSave, static_cast(bg_task_arg)); +} + void DB::SetBinlogIoError() { return binlog_io_error_.store(true); } void DB::SetBinlogIoErrorrelieve() { return binlog_io_error_.store(false); } bool DB::IsBinlogIoError() { return binlog_io_error_.load(); } @@ -291,6 +303,13 @@ void DB::DoBgSave(void* arg) { bg_task_arg->db->FinishBgsave(); } +void DB::DoBgCloudSave(void* arg) { + std::unique_ptr bg_task_arg(static_cast(arg)); + // Do BgSave + bool success = bg_task_arg->db->RunBgsaveCloudEngine(); + bg_task_arg->db->FinishBgsaveCloud(); +} + bool DB::RunBgsaveEngine() { // Prepare for Bgsaving if (!InitBgsaveEnv() || !InitBgsaveEngine()) { @@ -317,6 +336,16 @@ bool DB::RunBgsaveEngine() { return true; } +bool DB::RunBgsaveCloudEngine() { + rocksdb::Status s = bgsave_engine_->CreateNewCloudBackup(); + if (!s.ok()) { + LOG(WARNING) << db_name_ << " create new backup failed :" << s.ToString(); + return false; + } + LOG(INFO) << db_name_ << " create new backup finished."; + return true; +} + BgSaveInfo DB::bgsave_info() { std::lock_guard l(bgsave_protector_); return bgsave_info_; @@ -328,6 +357,11 @@ void DB::FinishBgsave() { g_pika_server->UpdateLastSave(time(nullptr)); } +void DB::FinishBgsaveCloud() { + std::lock_guard l(bgsave_protector_); + bgsave_info_.bgsaving = false; +} + // Prepare engine, need bgsave_protector protect bool DB::InitBgsaveEnv() { std::lock_guard l(bgsave_protector_); diff --git a/src/storage/tests/cloud_clone_test.cc b/src/storage/tests/cloud_clone_test.cc new file mode 100644 index 0000000000..626436a84d --- /dev/null +++ b/src/storage/tests/cloud_clone_test.cc @@ -0,0 +1,442 @@ +#include +#include +#include +#include +#include +#include "glog/logging.h" + +#include "pstd/include/env.h" +#include "storage/storage.h" +#include "src/redis.h" +#include "storage/util.h" +#include "rocksdb/cloud/db_cloud.h" + +using namespace storage; +using namespace rocksdb; + + +class CloudTest : public ::testing::Test { +public: + CloudTest() = default; + ~CloudTest() override = default; + + void SetUp() override { + storage_options.options.create_if_missing = true; + storage_options.options.avoid_flush_during_shutdown = true; + auto& cloud_fs_opts = storage_options.cloud_fs_options; + cloud_fs_opts.endpoint_override = "http://10.224.129.40:9000"; + cloud_fs_opts.credentials.InitializeSimple("minioadmin", "minioadmin"); + ASSERT_TRUE(cloud_fs_opts.credentials.HasValid().ok()); + cloud_fs_opts.src_bucket.SetBucketName("database.unit.test", "pika."); + cloud_fs_opts.dest_bucket.SetBucketName("database.unit.test", "pika."); + storage_options.options.max_log_file_size = 0; + } + + void TearDown() override { + } + + static void SetUpTestSuite() {} + static void TearDownTestSuite() {} + + StorageOptions storage_options; + storage::Status s; + std::string path; +}; + +// This is the local directory where the db is stored. The same +// path name is used to store data inside the specified cloud +// storage bucket. +std::string kDBPath = "db"; + +// This is the local directory where the clone is stored. The same +// pathname is used to store data in the specified cloud bucket. +std::string kClonePath = "clone_db"; +std::string kBucketSuffix = "cloud.clone.example."; +std::string kBucketSuffix2_src = "cloud2.clone.example."; +std::string kBucketSuffix2_dest = "cloud2.clone.example.dst."; +// +// This is the name of the cloud storage bucket where the db +// is made durable. If you are using AWS, you have to manually +// ensure that this bucket name is unique to you and does not +// conflict with any other S3 users who might have already created +// this bucket name. +// In this example, the database and its clone are both stored in +// the same bucket (obviously with different pathnames). +// + +std::string kRegion = "us-west-2"; + +Status CloneDB(const std::string& clone_name, const std::string& src_bucket, + const std::string& src_object_path, + const std::string& dest_bucket, + const std::string& dest_object_path, + const CloudFileSystemOptions& cloud_fs_options, + std::unique_ptr* cloud_db, std::unique_ptr* cloud_env) { + // The local directory where the clone resides + //std::string cname = kClonePath + "/" + clone_name; + // Create new AWS env + + CloudFileSystemOptions cloud_fs_options2; + + cloud_fs_options2.endpoint_override = "http://10.224.129.40:9000"; + cloud_fs_options2.credentials.InitializeSimple("minioadmin", "minioadmin"); + //ASSERT_TRUE(cloud_fs_options.credentials.HasValid().ok()); + + //cloud_fs_options.credentials.InitializeSimple( + // getenv("AWS_ACCESS_KEY_ID"), getenv("AWS_SECRET_ACCESS_KEY")); + if (!cloud_fs_options2.credentials.HasValid().ok()) { + fprintf( + stderr, + "Please set env variables " + "AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY with cloud credentials"); + return rocksdb::Status::OK(); + } + + // Append the user name to the bucket name in an attempt to make it + // globally unique. S3 bucket-namess need to be globlly unique. + // If you want to rerun this example, then unique user-name suffix here. + char* user = getenv("USER"); + kBucketSuffix2_src.append(user); kBucketSuffix2_dest.append(user); + + const std::string bucketPrefix = "rockset."; + // create a bucket name for debugging purposes + const std::string bucketName_src = bucketPrefix + kBucketSuffix2_src; + const std::string bucketName_dest = bucketPrefix + kBucketSuffix2_dest; + + // Needed if using bucket prefix other than the default "rockset." + cloud_fs_options2.src_bucket.SetBucketName(kBucketSuffix2_src, bucketPrefix); + cloud_fs_options2.dest_bucket.SetBucketName(kBucketSuffix2_dest, bucketPrefix); + + CloudFileSystem* cfs; + /*Status st = CloudFileSystem::NewAwsFileSystem( + FileSystem::Default(), src_bucket, src_object_path, kRegion, dest_bucket, + dest_object_path, kRegion, cloud_fs_options2, nullptr, &cfs);*/ + + Status st = CloudFileSystem::NewAwsFileSystem( + FileSystem::Default(), src_bucket, src_object_path, kRegion, kBucketSuffix2_src, + dest_object_path, kRegion, cloud_fs_options2, nullptr, &cfs); + + if (!st.ok()) { + fprintf(stderr, + "Unable to create an AWS environment with " + "bucket %s", + src_bucket.c_str()); + return st; + } + std::shared_ptr fs(cfs); + *cloud_env = NewCompositeEnv(fs); + + // Create options and use the AWS env that we created earlier + Options options; + options.env = cloud_env->get(); + + // No persistent cache + std::string persistent_cache = ""; + + // create a bucket name for debugging purposes + // const std::string bucketName = cfs->GetSrcBucketName(); + + // open clone + DBCloud* db; + st = DBCloud::Open(options, kClonePath, persistent_cache, 0, &db); + if (!st.ok()) { + fprintf(stderr, "Unable to open clone at path %s in bucket %s. %s\n", + kClonePath.c_str(), kBucketSuffix2_src.c_str(), st.ToString().c_str()); + return st; + } + //std::unique_ptr cloud_db2; + std::cout << "bx..." << std::endl; + cloud_db->reset(db); + std::cout << "by..." << std::endl; + cloud_db->get()->Savepoint(); + //cloud_db->get()-> + // DBCloudImpl db_impl(cloud_db->get(), NewCompositeEnv(fs)); + // DBCloudImpl::DBCloudImpl(DB* db, std::unique_ptr local_env) + // : DBCloud(db), cfs_(nullptr), local_env_(std::move(local_env)) {} + //DBCloudImpl + + return Status::OK(); +} + +TEST_F(CloudTest, clone_s3) { + // cloud environment config options here + CloudFileSystemOptions cloud_fs_options; + + cloud_fs_options.endpoint_override = "http://10.224.129.40:9000"; + cloud_fs_options.credentials.InitializeSimple("minioadmin", "minioadmin"); + //ASSERT_TRUE(cloud_fs_options.credentials.HasValid().ok()); + + //cloud_fs_options.credentials.InitializeSimple( + // getenv("AWS_ACCESS_KEY_ID"), getenv("AWS_SECRET_ACCESS_KEY")); + if (!cloud_fs_options.credentials.HasValid().ok()) { + fprintf( + stderr, + "Please set env variables " + "AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY with cloud credentials"); + return; + } + + // Append the user name to the bucket name in an attempt to make it + // globally unique. S3 bucket-namess need to be globlly unique. + // If you want to rerun this example, then unique user-name suffix here. + char* user = getenv("USER"); + kBucketSuffix.append(user); + + const std::string bucketPrefix = "rockset."; + // create a bucket name for debugging purposes + const std::string bucketName = bucketPrefix + kBucketSuffix; + + // Needed if using bucket prefix other than the default "rockset." + cloud_fs_options.src_bucket.SetBucketName(kBucketSuffix, bucketPrefix); + cloud_fs_options.dest_bucket.SetBucketName(kBucketSuffix, bucketPrefix); + + // Create a new AWS cloud env Status + CloudFileSystem* cfs; + Status s = CloudFileSystem::NewAwsFileSystem( + FileSystem::Default(), kBucketSuffix, kDBPath, kRegion, kBucketSuffix, + kDBPath, kRegion, cloud_fs_options, nullptr, &cfs); + if (!s.ok()) { + fprintf(stderr, "Unable to create cloud env in bucket %s. %s\n", + bucketName.c_str(), s.ToString().c_str()); + return; + } + + + // Store a reference to a cloud env. A new cloud env object should be + // associated with every new cloud-db. + auto cloud_env = NewCompositeEnv(std::shared_ptr(cfs)); + + // Create options and use the AWS env that we created earlier + Options options; + options.env = cloud_env.get(); + options.create_if_missing = true; + + // No persistent cache + std::string persistent_cache = ""; + + // Create and Open DB + DBCloud* db; + s = DBCloud::Open(options, kDBPath, persistent_cache, 0, &db); + if (!s.ok()) { + fprintf(stderr, "Unable to open db at path %s in bucket %s. %s\n", + kDBPath.c_str(), bucketName.c_str(), s.ToString().c_str()); + return; + } + + // Put key-value into main db + s = db->Put(WriteOptions(), "key1", "value"); + assert(s.ok()); + std::string value; + + // get value from main db + s = db->Get(ReadOptions(), "key1", &value); + assert(s.ok()); + assert(value == "value"); + + // Flush all data from main db to sst files. + db->Flush(FlushOptions()); + + // Create a clone of the db and and verify that all's well. + // In real applications, a Clone would typically be created + // by a separate process. + //std::unique_ptr clone_db; + std::unique_ptr clone_env; + std::unique_ptr clone_db; + s = CloneDB("clone1", kBucketSuffix, kDBPath, kBucketSuffix, kClonePath, + cloud_fs_options, &clone_db, &clone_env); + if (!s.ok()) { + fprintf(stderr, "Unable to clone db at path %s in bucket %s. %s\n", + kDBPath.c_str(), bucketName.c_str(), s.ToString().c_str()); + return; + } + //sleep(30); + // insert a key-value in the clone. + s = clone_db->Put(WriteOptions(), "name", "dhruba"); + assert(s.ok()); + + // assert that values from the main db appears in the clone + s = clone_db->Get(ReadOptions(), "key1", &value); + assert(s.ok()); + assert(value == "value"); + + clone_db->Flush(FlushOptions()); + clone_db.release(); + //sleep(20); + + + + delete db; + + fprintf(stdout, "Successfully used db at %s and clone at %s in bucket %s.\n", + kDBPath.c_str(), kClonePath.c_str(), bucketName.c_str()); + return; +} + + +TEST_F(CloudTest, get_clone_s3) { + // cloud environment config options here + CloudFileSystemOptions cloud_fs_options; + + cloud_fs_options.endpoint_override = "http://10.224.129.40:9000"; + cloud_fs_options.credentials.InitializeSimple("minioadmin", "minioadmin"); + //ASSERT_TRUE(cloud_fs_options.credentials.HasValid().ok()); + + //cloud_fs_options.credentials.InitializeSimple( + // getenv("AWS_ACCESS_KEY_ID"), getenv("AWS_SECRET_ACCESS_KEY")); + if (!cloud_fs_options.credentials.HasValid().ok()) { + fprintf( + stderr, + "Please set env variables " + "AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY with cloud credentials"); + return; + } + + // Append the user name to the bucket name in an attempt to make it + // globally unique. S3 bucket-namess need to be globlly unique. + // If you want to rerun this example, then unique user-name suffix here. + char* user = getenv("USER"); + kBucketSuffix.append(user); + + const std::string bucketPrefix = "rockset."; + // create a bucket name for debugging purposes + const std::string bucketName = bucketPrefix + kBucketSuffix; + + // Needed if using bucket prefix other than the default "rockset." + cloud_fs_options.src_bucket.SetBucketName(kBucketSuffix, bucketPrefix); + cloud_fs_options.dest_bucket.SetBucketName(kBucketSuffix, bucketPrefix); + + // Create a new AWS cloud env Status + CloudFileSystem* cfs; + Status s = CloudFileSystem::NewAwsFileSystem( + FileSystem::Default(), kBucketSuffix, kDBPath, kRegion, kBucketSuffix, + kDBPath, kRegion, cloud_fs_options, nullptr, &cfs); + if (!s.ok()) { + fprintf(stderr, "Unable to create cloud env in bucket %s. %s\n", + bucketName.c_str(), s.ToString().c_str()); + return; + } + + + // Store a reference to a cloud env. A new cloud env object should be + // associated with every new cloud-db. + auto cloud_env = NewCompositeEnv(std::shared_ptr(cfs)); + + // Create options and use the AWS env that we created earlier + Options options; + options.env = cloud_env.get(); + options.create_if_missing = true; + + // No persistent cache + std::string persistent_cache = ""; + + // Create and Open DB + DBCloud* db; + s = DBCloud::Open(options, kDBPath, persistent_cache, 0, &db); + if (!s.ok()) { + fprintf(stderr, "Unable to open db at path %s in bucket %s. %s\n", + kDBPath.c_str(), bucketName.c_str(), s.ToString().c_str()); + return; + } + + // Put key-value into main db +/* + std::string value; + s = db->Get(ReadOptions(), "name", &value); + std::cout << "value1: " << value << std::endl; + // get value from main db + s = db->Get(ReadOptions(), "key1", &value); + std::cout << "value2: " << value << std::endl; + assert(s.ok()); + assert(value == "value");*//* + + for (int i = 0; i < 10000000; i++) { + if (i % 10000 == 0) {std::cout << i << std::endl; db->Flush(FlushOptions());} + std::string key = "uu1:"+i; + std::string value = std::string(2048, 'y'); + db->Put(WriteOptions(), key, value); + } + + + return; +} + +TEST_F(CloudTest, delete_s3) { + // cloud environment config options here + CloudFileSystemOptions cloud_fs_options; + + cloud_fs_options.endpoint_override = "http://10.224.129.40:9000"; + cloud_fs_options.credentials.InitializeSimple("minioadmin", "minioadmin"); + //ASSERT_TRUE(cloud_fs_options.credentials.HasValid().ok()); + + //cloud_fs_options.credentials.InitializeSimple( + // getenv("AWS_ACCESS_KEY_ID"), getenv("AWS_SECRET_ACCESS_KEY")); + if (!cloud_fs_options.credentials.HasValid().ok()) { + fprintf( + stderr, + "Please set env variables " + "AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY with cloud credentials"); + return; + } + + // Append the user name to the bucket name in an attempt to make it + // globally unique. S3 bucket-namess need to be globlly unique. + // If you want to rerun this example, then unique user-name suffix here. + char* user = getenv("USER"); + kBucketSuffix.append(user); + + const std::string bucketPrefix = "rockset."; + // create a bucket name for debugging purposes + const std::string bucketName = bucketPrefix + kBucketSuffix; + + // Needed if using bucket prefix other than the default "rockset." + cloud_fs_options.src_bucket.SetBucketName(kBucketSuffix, bucketPrefix); + cloud_fs_options.dest_bucket.SetBucketName(kBucketSuffix, bucketPrefix); + + // Create a new AWS cloud env Status + CloudFileSystem* cfs; + Status s = CloudFileSystem::NewAwsFileSystem( + FileSystem::Default(), kBucketSuffix, kDBPath, kRegion, kBucketSuffix, + kDBPath, kRegion, cloud_fs_options, nullptr, &cfs); + if (!s.ok()) { + fprintf(stderr, "Unable to create cloud env in bucket %s. %s\n", + bucketName.c_str(), s.ToString().c_str()); + return; + } + + + // Store a reference to a cloud env. A new cloud env object should be + // associated with every new cloud-db. + auto cloud_env = NewCompositeEnv(std::shared_ptr(cfs)); + + // Create options and use the AWS env that we created earlier + Options options; + options.env = cloud_env.get(); + options.create_if_missing = true; + + // No persistent cache + std::string persistent_cache = ""; + + // Create and Open DB + DBCloud* db; + s = DBCloud::Open(options, kDBPath, persistent_cache, 0, &db); + if (!s.ok()) { + fprintf(stderr, "Unable to open db at path %s in bucket %s. %s\n", + kDBPath.c_str(), bucketName.c_str(), s.ToString().c_str()); + return; + } + //cfs->DeleteCloudFileFromDest(); + +} + +int main(int argc, char** argv) { + if (!pstd::FileExists("./log")) { + pstd::CreatePath("./log"); + } + FLAGS_log_dir = "./log"; + FLAGS_minloglevel = 0; + FLAGS_max_log_size = 1800; + FLAGS_logbufsecs = 0; + ::google::InitGoogleLogging("cloud_clone_test"); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From db95c47b86c6f0a28d5083504b3bd1dc0e7edfdd Mon Sep 17 00:00:00 2001 From: baixin Date: Fri, 26 Apr 2024 16:32:57 +0800 Subject: [PATCH 086/116] support bgsave --- src/storage/include/storage/backupable.h | 2 ++ src/storage/src/backupable.cc | 45 ++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/src/storage/include/storage/backupable.h b/src/storage/include/storage/backupable.h index e190993c29..0e919f7568 100644 --- a/src/storage/include/storage/backupable.h +++ b/src/storage/include/storage/backupable.h @@ -49,6 +49,8 @@ class BackupEngine { Status CreateNewBackup(const std::string& dir); + Status CreateNewCloudBackup(); + void StopBackup(); Status CreateNewBackupSpecify(const std::string& dir, int index); diff --git a/src/storage/src/backupable.cc b/src/storage/src/backupable.cc index 4acd8dee72..798e513b4d 100644 --- a/src/storage/src/backupable.cc +++ b/src/storage/src/backupable.cc @@ -6,9 +6,12 @@ #include #include +#include "include/pika_server.h" #include "storage/backupable.h" #include "storage/storage.h" +extern PikaServer* g_pika_server; +const std::string kRegion = "us-west-2"; namespace storage { BackupEngine::~BackupEngine() { @@ -141,6 +144,48 @@ Status BackupEngine::CreateNewBackup(const std::string& dir) { return s; } +Status BackupEngine::CreateNewCloudBackup() { + Status s = Status::OK(); + rocksdb::CloudFileSystemOptions cloud_fs_options = g_pika_server->storage_options().cloud_fs_options; + std::string src_bucket = cloud_fs_options.src_bucket.GetBucketName(); + std::string src_object_path = cloud_fs_options.src_bucket.GetObjectPath(); + + cloud_fs_options.src_bucket.SetBucketName("database.backup.src", "pika."); + cloud_fs_options.dest_bucket.SetBucketName("database.backup.dst", "pika."); + + rocksdb::CloudFileSystem* cfs; + //todo: multi db + s = rocksdb::CloudFileSystem::NewAwsFileSystem( + rocksdb::FileSystem::Default(), src_bucket, src_object_path, kRegion, "database.backup.src", + "clone_db", kRegion, cloud_fs_options, nullptr, &cfs); + + if (!s.ok()) { + LOG(WARNING) << "Unable to create an AWS environment with bucket, " << s.ToString(); + return s; + } + std::shared_ptr fs(cfs); + auto cloud_env = NewCompositeEnv(fs); + + // Create options and use the AWS env that we created earlier + Options options; + options.env = cloud_env.get(); + + // No persistent cache + std::string persistent_cache = ""; + + // open clone + rocksdb::DBCloud* db; + s = rocksdb::DBCloud::Open(options, "clone_db", persistent_cache, 0, &db); + if (!s.ok()) { + LOG(WARNING) << "Unable to open clone at path clone_db in bucket database.backup.dst, "<< s.ToString(); + return s; + } + + db->Savepoint(); + return s; +} + + void BackupEngine::StopBackup() { // DEPRECATED } From aedc280bf3d62a093ec3dfa45ed4ac0a392a1434 Mon Sep 17 00:00:00 2001 From: baixin Date: Fri, 26 Apr 2024 19:39:04 +0800 Subject: [PATCH 087/116] clean code --- include/pika_db.h | 9 +++-- include/pika_server.h | 1 + src/pika_admin.cc | 6 ++- src/pika_db.cc | 20 ++++++---- src/pika_server.cc | 3 ++ src/storage/include/storage/backupable.h | 2 +- src/storage/src/backupable.cc | 8 +--- src/storage/tests/cloud_clone_test.cc | 51 +----------------------- 8 files changed, 31 insertions(+), 69 deletions(-) diff --git a/include/pika_db.h b/include/pika_db.h index 396b211f7d..7ea4c4a7f8 100644 --- a/include/pika_db.h +++ b/include/pika_db.h @@ -94,7 +94,7 @@ class DB : public std::enable_shared_from_this, public pstd::noncopyable { std::shared_ptr storage() const; void GetBgSaveMetaData(std::vector* fileNames, std::string* snapshot_uuid); void BgSaveDB(); - void BgSaveCloudDB(); + void CloudBgSaveDB(); void SetBinlogIoError(); void SetBinlogIoErrorrelieve(); bool IsBinlogIoError(); @@ -192,15 +192,15 @@ class DB : public std::enable_shared_from_this, public pstd::noncopyable { * BgSave use */ static void DoBgSave(void* arg); - static void DoBgCloudSave(void* arg); + static void DoCloudBgSave(void* arg); bool RunBgsaveEngine(); - bool RunBgsaveCloudEngine(); + bool RunCloudBgsaveEngine(rocksdb::CloudFileSystemOptions& cloud_fs_options); bool InitBgsaveEnv(); bool InitBgsaveEngine(); void ClearBgsave(); void FinishBgsave(); - void FinishBgsaveCloud(); + void FinishCloudBgsave(); BgSaveInfo bgsave_info_; pstd::Mutex bgsave_protector_; std::shared_ptr bgsave_engine_; @@ -208,6 +208,7 @@ class DB : public std::enable_shared_from_this, public pstd::noncopyable { struct BgTaskArg { std::shared_ptr db; + rocksdb::CloudFileSystemOptions cloud_fs_options; }; #endif diff --git a/include/pika_server.h b/include/pika_server.h index ca958dfa68..27830b2426 100644 --- a/include/pika_server.h +++ b/include/pika_server.h @@ -65,6 +65,7 @@ enum TaskType { kStartKeyScan, kStopKeyScan, kBgSave, + kCloudBgSave, kCompactRangeStrings, kCompactRangeHashes, kCompactRangeSets, diff --git a/src/pika_admin.cc b/src/pika_admin.cc index 97b4307496..26334996db 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -337,7 +337,11 @@ void BgsaveCmd::DoInitial() { } void BgsaveCmd::Do() { - g_pika_server->DoSameThingSpecificDB(bgsave_dbs_, {TaskType::kBgSave}); + if (g_pika_conf->pika_mode() == PIKA_CLOUD) { + g_pika_server->DoSameThingSpecificDB(bgsave_dbs_, {TaskType::kCloudBgSave}); + } else { + g_pika_server->DoSameThingSpecificDB(bgsave_dbs_, {TaskType::kBgSave}); + } LogCommand(); res_.AppendContent("+Background saving started"); } diff --git a/src/pika_db.cc b/src/pika_db.cc index 225796980c..9044c9d726 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -74,7 +74,7 @@ void DB::BgSaveDB() { g_pika_server->BGSaveTaskSchedule(&DoBgSave, static_cast(bg_task_arg)); } -void DB::BgSaveCloudDB() { +void DB::CloudBgSaveDB() { std::shared_lock l(dbs_rw_); std::lock_guard ml(bgsave_protector_); if (bgsave_info_.bgsaving) { @@ -83,7 +83,8 @@ void DB::BgSaveCloudDB() { bgsave_info_.bgsaving = true; auto bg_task_arg = new BgTaskArg(); bg_task_arg->db = shared_from_this(); - g_pika_server->BGSaveTaskSchedule(&DoBgCloudSave, static_cast(bg_task_arg)); + bg_task_arg->cloud_fs_options = g_pika_server->storage_options().cloud_fs_options; + g_pika_server->BGSaveTaskSchedule(&DoCloudBgSave, static_cast(bg_task_arg)); } void DB::SetBinlogIoError() { return binlog_io_error_.store(true); } @@ -303,11 +304,14 @@ void DB::DoBgSave(void* arg) { bg_task_arg->db->FinishBgsave(); } -void DB::DoBgCloudSave(void* arg) { +void DB::DoCloudBgSave(void* arg) { std::unique_ptr bg_task_arg(static_cast(arg)); // Do BgSave - bool success = bg_task_arg->db->RunBgsaveCloudEngine(); - bg_task_arg->db->FinishBgsaveCloud(); + bool success = bg_task_arg->db->RunCloudBgsaveEngine(bg_task_arg->cloud_fs_options); + if (success) { + //todo + } + bg_task_arg->db->FinishCloudBgsave(); } bool DB::RunBgsaveEngine() { @@ -336,8 +340,8 @@ bool DB::RunBgsaveEngine() { return true; } -bool DB::RunBgsaveCloudEngine() { - rocksdb::Status s = bgsave_engine_->CreateNewCloudBackup(); +bool DB::RunCloudBgsaveEngine(rocksdb::CloudFileSystemOptions& cloud_fs_options) { + rocksdb::Status s = bgsave_engine_->CreateNewCloudBackup(cloud_fs_options); if (!s.ok()) { LOG(WARNING) << db_name_ << " create new backup failed :" << s.ToString(); return false; @@ -357,7 +361,7 @@ void DB::FinishBgsave() { g_pika_server->UpdateLastSave(time(nullptr)); } -void DB::FinishBgsaveCloud() { +void DB::FinishCloudBgsave() { std::lock_guard l(bgsave_protector_); bgsave_info_.bgsaving = false; } diff --git a/src/pika_server.cc b/src/pika_server.cc index b81813242b..394bdb1bc9 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -438,6 +438,9 @@ Status PikaServer::DoSameThingSpecificDB(const std::set& dbs, const case TaskType::kBgSave: db_item.second->BgSaveDB(); break; + case TaskType::kCloudBgSave: + db_item.second->CloudBgSaveDB(); + break; case TaskType::kCompactRangeStrings: db_item.second->CompactRange(storage::DataType::kStrings, arg.argv[0], arg.argv[1]); break; diff --git a/src/storage/include/storage/backupable.h b/src/storage/include/storage/backupable.h index 0e919f7568..9bd0d176b9 100644 --- a/src/storage/include/storage/backupable.h +++ b/src/storage/include/storage/backupable.h @@ -49,7 +49,7 @@ class BackupEngine { Status CreateNewBackup(const std::string& dir); - Status CreateNewCloudBackup(); + Status CreateNewCloudBackup(rocksdb::CloudFileSystemOptions& cloud_fs_options); void StopBackup(); diff --git a/src/storage/src/backupable.cc b/src/storage/src/backupable.cc index 798e513b4d..f8dd3aa56f 100644 --- a/src/storage/src/backupable.cc +++ b/src/storage/src/backupable.cc @@ -6,12 +6,11 @@ #include #include -#include "include/pika_server.h" #include "storage/backupable.h" #include "storage/storage.h" -extern PikaServer* g_pika_server; const std::string kRegion = "us-west-2"; + namespace storage { BackupEngine::~BackupEngine() { @@ -144,9 +143,8 @@ Status BackupEngine::CreateNewBackup(const std::string& dir) { return s; } -Status BackupEngine::CreateNewCloudBackup() { +Status BackupEngine::CreateNewCloudBackup(rocksdb::CloudFileSystemOptions& cloud_fs_options) { Status s = Status::OK(); - rocksdb::CloudFileSystemOptions cloud_fs_options = g_pika_server->storage_options().cloud_fs_options; std::string src_bucket = cloud_fs_options.src_bucket.GetBucketName(); std::string src_object_path = cloud_fs_options.src_bucket.GetObjectPath(); @@ -160,7 +158,6 @@ Status BackupEngine::CreateNewCloudBackup() { "clone_db", kRegion, cloud_fs_options, nullptr, &cfs); if (!s.ok()) { - LOG(WARNING) << "Unable to create an AWS environment with bucket, " << s.ToString(); return s; } std::shared_ptr fs(cfs); @@ -177,7 +174,6 @@ Status BackupEngine::CreateNewCloudBackup() { rocksdb::DBCloud* db; s = rocksdb::DBCloud::Open(options, "clone_db", persistent_cache, 0, &db); if (!s.ok()) { - LOG(WARNING) << "Unable to open clone at path clone_db in bucket database.backup.dst, "<< s.ToString(); return s; } diff --git a/src/storage/tests/cloud_clone_test.cc b/src/storage/tests/cloud_clone_test.cc index 626436a84d..940ee3383d 100644 --- a/src/storage/tests/cloud_clone_test.cc +++ b/src/storage/tests/cloud_clone_test.cc @@ -72,18 +72,10 @@ Status CloneDB(const std::string& clone_name, const std::string& src_bucket, const std::string& dest_object_path, const CloudFileSystemOptions& cloud_fs_options, std::unique_ptr* cloud_db, std::unique_ptr* cloud_env) { - // The local directory where the clone resides - //std::string cname = kClonePath + "/" + clone_name; - // Create new AWS env - CloudFileSystemOptions cloud_fs_options2; cloud_fs_options2.endpoint_override = "http://10.224.129.40:9000"; cloud_fs_options2.credentials.InitializeSimple("minioadmin", "minioadmin"); - //ASSERT_TRUE(cloud_fs_options.credentials.HasValid().ok()); - - //cloud_fs_options.credentials.InitializeSimple( - // getenv("AWS_ACCESS_KEY_ID"), getenv("AWS_SECRET_ACCESS_KEY")); if (!cloud_fs_options2.credentials.HasValid().ok()) { fprintf( stderr, @@ -108,10 +100,6 @@ Status CloneDB(const std::string& clone_name, const std::string& src_bucket, cloud_fs_options2.dest_bucket.SetBucketName(kBucketSuffix2_dest, bucketPrefix); CloudFileSystem* cfs; - /*Status st = CloudFileSystem::NewAwsFileSystem( - FileSystem::Default(), src_bucket, src_object_path, kRegion, dest_bucket, - dest_object_path, kRegion, cloud_fs_options2, nullptr, &cfs);*/ - Status st = CloudFileSystem::NewAwsFileSystem( FileSystem::Default(), src_bucket, src_object_path, kRegion, kBucketSuffix2_src, dest_object_path, kRegion, cloud_fs_options2, nullptr, &cfs); @@ -132,10 +120,6 @@ Status CloneDB(const std::string& clone_name, const std::string& src_bucket, // No persistent cache std::string persistent_cache = ""; - - // create a bucket name for debugging purposes - // const std::string bucketName = cfs->GetSrcBucketName(); - // open clone DBCloud* db; st = DBCloud::Open(options, kClonePath, persistent_cache, 0, &db); @@ -149,12 +133,6 @@ Status CloneDB(const std::string& clone_name, const std::string& src_bucket, cloud_db->reset(db); std::cout << "by..." << std::endl; cloud_db->get()->Savepoint(); - //cloud_db->get()-> - // DBCloudImpl db_impl(cloud_db->get(), NewCompositeEnv(fs)); - // DBCloudImpl::DBCloudImpl(DB* db, std::unique_ptr local_env) - // : DBCloud(db), cfs_(nullptr), local_env_(std::move(local_env)) {} - //DBCloudImpl - return Status::OK(); } @@ -164,10 +142,6 @@ TEST_F(CloudTest, clone_s3) { cloud_fs_options.endpoint_override = "http://10.224.129.40:9000"; cloud_fs_options.credentials.InitializeSimple("minioadmin", "minioadmin"); - //ASSERT_TRUE(cloud_fs_options.credentials.HasValid().ok()); - - //cloud_fs_options.credentials.InitializeSimple( - // getenv("AWS_ACCESS_KEY_ID"), getenv("AWS_SECRET_ACCESS_KEY")); if (!cloud_fs_options.credentials.HasValid().ok()) { fprintf( stderr, @@ -249,7 +223,7 @@ TEST_F(CloudTest, clone_s3) { kDBPath.c_str(), bucketName.c_str(), s.ToString().c_str()); return; } - //sleep(30); + // insert a key-value in the clone. s = clone_db->Put(WriteOptions(), "name", "dhruba"); assert(s.ok()); @@ -261,9 +235,6 @@ TEST_F(CloudTest, clone_s3) { clone_db->Flush(FlushOptions()); clone_db.release(); - //sleep(20); - - delete db; @@ -279,10 +250,6 @@ TEST_F(CloudTest, get_clone_s3) { cloud_fs_options.endpoint_override = "http://10.224.129.40:9000"; cloud_fs_options.credentials.InitializeSimple("minioadmin", "minioadmin"); - //ASSERT_TRUE(cloud_fs_options.credentials.HasValid().ok()); - - //cloud_fs_options.credentials.InitializeSimple( - // getenv("AWS_ACCESS_KEY_ID"), getenv("AWS_SECRET_ACCESS_KEY")); if (!cloud_fs_options.credentials.HasValid().ok()) { fprintf( stderr, @@ -339,7 +306,6 @@ TEST_F(CloudTest, get_clone_s3) { } // Put key-value into main db -/* std::string value; s = db->Get(ReadOptions(), "name", &value); std::cout << "value1: " << value << std::endl; @@ -347,16 +313,7 @@ TEST_F(CloudTest, get_clone_s3) { s = db->Get(ReadOptions(), "key1", &value); std::cout << "value2: " << value << std::endl; assert(s.ok()); - assert(value == "value");*//* - - for (int i = 0; i < 10000000; i++) { - if (i % 10000 == 0) {std::cout << i << std::endl; db->Flush(FlushOptions());} - std::string key = "uu1:"+i; - std::string value = std::string(2048, 'y'); - db->Put(WriteOptions(), key, value); - } - - + assert(value == "value"); return; } @@ -366,10 +323,6 @@ TEST_F(CloudTest, delete_s3) { cloud_fs_options.endpoint_override = "http://10.224.129.40:9000"; cloud_fs_options.credentials.InitializeSimple("minioadmin", "minioadmin"); - //ASSERT_TRUE(cloud_fs_options.credentials.HasValid().ok()); - - //cloud_fs_options.credentials.InitializeSimple( - // getenv("AWS_ACCESS_KEY_ID"), getenv("AWS_SECRET_ACCESS_KEY")); if (!cloud_fs_options.credentials.HasValid().ok()) { fprintf( stderr, From f527891044d327cba82dae964176d5b127cd1047 Mon Sep 17 00:00:00 2001 From: baixin Date: Sun, 28 Apr 2024 09:40:12 +0800 Subject: [PATCH 088/116] clean code --- include/pika_db.h | 2 +- src/pika_admin.cc | 16 ++++++------ src/pika_db.cc | 11 +++------ src/storage/src/backupable.cc | 4 +-- src/storage/tests/cloud_clone_test.cc | 35 +++++++++++++++++++++++++-- 5 files changed, 46 insertions(+), 22 deletions(-) diff --git a/include/pika_db.h b/include/pika_db.h index 7ea4c4a7f8..34862481db 100644 --- a/include/pika_db.h +++ b/include/pika_db.h @@ -194,7 +194,7 @@ class DB : public std::enable_shared_from_this, public pstd::noncopyable { static void DoBgSave(void* arg); static void DoCloudBgSave(void* arg); bool RunBgsaveEngine(); - bool RunCloudBgsaveEngine(rocksdb::CloudFileSystemOptions& cloud_fs_options); + void RunCloudBgsaveEngine(rocksdb::CloudFileSystemOptions& cloud_fs_options); bool InitBgsaveEnv(); bool InitBgsaveEngine(); diff --git a/src/pika_admin.cc b/src/pika_admin.cc index 26334996db..962178762e 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -3274,17 +3274,15 @@ void PKPingCmd::DoInitial() { } } - if (g_pika_conf->pika_mode() == PIKA_CLOUD) { - if (g_pika_server->role() == PIKA_ROLE_MASTER) { - for (auto const& slave : g_pika_server->slaves_) { - if (std::find(masters_addr_.begin(), masters_addr_.end(), slave.ip_port) != masters_addr_.end()) { - g_pika_server->set_group_id(group_id_); - g_pika_server->set_lease_term_id(term_id_); - } - } + if (g_pika_conf->pika_mode() == PIKA_CLOUD + && g_pika_server->role() == PIKA_ROLE_MASTER) { + for (auto const& slave : g_pika_server->slaves_) { + if (std::find(masters_addr_.begin(), masters_addr_.end(), slave.ip_port) != masters_addr_.end()) { + g_pika_server->set_group_id(group_id_); + g_pika_server->set_lease_term_id(term_id_); + } } } - } void PKPingCmd::Do() { diff --git a/src/pika_db.cc b/src/pika_db.cc index 9044c9d726..d873757cbf 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -306,11 +306,7 @@ void DB::DoBgSave(void* arg) { void DB::DoCloudBgSave(void* arg) { std::unique_ptr bg_task_arg(static_cast(arg)); - // Do BgSave - bool success = bg_task_arg->db->RunCloudBgsaveEngine(bg_task_arg->cloud_fs_options); - if (success) { - //todo - } + bg_task_arg->db->RunCloudBgsaveEngine(bg_task_arg->cloud_fs_options); bg_task_arg->db->FinishCloudBgsave(); } @@ -340,14 +336,13 @@ bool DB::RunBgsaveEngine() { return true; } -bool DB::RunCloudBgsaveEngine(rocksdb::CloudFileSystemOptions& cloud_fs_options) { +void DB::RunCloudBgsaveEngine(rocksdb::CloudFileSystemOptions& cloud_fs_options) { rocksdb::Status s = bgsave_engine_->CreateNewCloudBackup(cloud_fs_options); if (!s.ok()) { LOG(WARNING) << db_name_ << " create new backup failed :" << s.ToString(); - return false; + return; } LOG(INFO) << db_name_ << " create new backup finished."; - return true; } BgSaveInfo DB::bgsave_info() { diff --git a/src/storage/src/backupable.cc b/src/storage/src/backupable.cc index f8dd3aa56f..c0094760fa 100644 --- a/src/storage/src/backupable.cc +++ b/src/storage/src/backupable.cc @@ -145,14 +145,14 @@ Status BackupEngine::CreateNewBackup(const std::string& dir) { Status BackupEngine::CreateNewCloudBackup(rocksdb::CloudFileSystemOptions& cloud_fs_options) { Status s = Status::OK(); + std::string src_bucket = cloud_fs_options.src_bucket.GetBucketName(); std::string src_object_path = cloud_fs_options.src_bucket.GetObjectPath(); - cloud_fs_options.src_bucket.SetBucketName("database.backup.src", "pika."); cloud_fs_options.dest_bucket.SetBucketName("database.backup.dst", "pika."); rocksdb::CloudFileSystem* cfs; - //todo: multi db + s = rocksdb::CloudFileSystem::NewAwsFileSystem( rocksdb::FileSystem::Default(), src_bucket, src_object_path, kRegion, "database.backup.src", "clone_db", kRegion, cloud_fs_options, nullptr, &cfs); diff --git a/src/storage/tests/cloud_clone_test.cc b/src/storage/tests/cloud_clone_test.cc index 940ee3383d..ada3c9648c 100644 --- a/src/storage/tests/cloud_clone_test.cc +++ b/src/storage/tests/cloud_clone_test.cc @@ -50,6 +50,7 @@ std::string kDBPath = "db"; // This is the local directory where the clone is stored. The same // pathname is used to store data in the specified cloud bucket. +//std::string kClonePath = "db"; std::string kClonePath = "clone_db"; std::string kBucketSuffix = "cloud.clone.example."; std::string kBucketSuffix2_src = "cloud2.clone.example."; @@ -132,7 +133,38 @@ Status CloneDB(const std::string& clone_name, const std::string& src_bucket, std::cout << "bx..." << std::endl; cloud_db->reset(db); std::cout << "by..." << std::endl; + + CloudFileSystem* cfs_bak; + st = CloudFileSystem::NewAwsFileSystem( + FileSystem::Default(), kBucketSuffix2_src, dest_object_path, kRegion, kBucketSuffix2_dest, + dest_object_path, kRegion, cloud_fs_options2, nullptr, &cfs_bak); + + if (!st.ok()) { + fprintf(stderr, + "Unable to create an AWS environment with " + "bucket %s", + src_bucket.c_str()); + return st; + } + std::shared_ptr fs_bak(cfs_bak); + auto cloud_env_bak = NewCompositeEnv(fs_bak); + // Create options and use the AWS env that we created earlier + Options options2; + options2.env = cloud_env_bak.get(); + + // No persistent cache + std::string persistent_cache_bak = ""; + // open clone + DBCloud* db_bak; + st = DBCloud::Open(options2, kClonePath, persistent_cache_bak, 0, &db_bak); + if (!st.ok()) { + fprintf(stderr, "Unable to open clone at path %s in bucket %s. %s\n", + kClonePath.c_str(), kBucketSuffix2_src.c_str(), st.ToString().c_str()); + return st; + } + cloud_db->get()->Savepoint(); + //db_bak->Savepoint(); return Status::OK(); } @@ -233,14 +265,13 @@ TEST_F(CloudTest, clone_s3) { assert(s.ok()); assert(value == "value"); - clone_db->Flush(FlushOptions()); + //clone_db->Flush(FlushOptions()); clone_db.release(); delete db; fprintf(stdout, "Successfully used db at %s and clone at %s in bucket %s.\n", kDBPath.c_str(), kClonePath.c_str(), bucketName.c_str()); - return; } From a80ba1d2559d0329991da81b72fa5911494a2618 Mon Sep 17 00:00:00 2001 From: baixin Date: Sun, 28 Apr 2024 11:31:40 +0800 Subject: [PATCH 089/116] for test --- src/storage/tests/cloud_clone_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/storage/tests/cloud_clone_test.cc b/src/storage/tests/cloud_clone_test.cc index ada3c9648c..24ef98daea 100644 --- a/src/storage/tests/cloud_clone_test.cc +++ b/src/storage/tests/cloud_clone_test.cc @@ -273,7 +273,7 @@ TEST_F(CloudTest, clone_s3) { fprintf(stdout, "Successfully used db at %s and clone at %s in bucket %s.\n", kDBPath.c_str(), kClonePath.c_str(), bucketName.c_str()); } - +/* TEST_F(CloudTest, get_clone_s3) { // cloud environment config options here @@ -410,7 +410,7 @@ TEST_F(CloudTest, delete_s3) { } //cfs->DeleteCloudFileFromDest(); -} +}*/ int main(int argc, char** argv) { if (!pstd::FileExists("./log")) { From 821b2331b76990db72eb86d1bfe507fc316e603d Mon Sep 17 00:00:00 2001 From: baixin Date: Sun, 28 Apr 2024 17:48:34 +0800 Subject: [PATCH 090/116] add support delbackup cmd --- src/pika_admin.cc | 73 +++++++++++++++++++-- src/storage/tests/cloud_clone_test.cc | 93 +++++++++++++++++++++++++-- 2 files changed, 157 insertions(+), 9 deletions(-) diff --git a/src/pika_admin.cc b/src/pika_admin.cc index 962178762e..af6bafc5e0 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -12,18 +12,23 @@ #include #include -#include +#include +#include #include +#include +#include +#include +#include +#include #include "include/build_version.h" #include "include/pika_cmd_table_manager.h" +#include "include/pika_conf.h" #include "include/pika_rm.h" #include "include/pika_server.h" #include "include/pika_version.h" -#include "include/pika_conf.h" #include "pstd/include/rsync.h" - using pstd::Status; using namespace Aws::Utils; @@ -2785,9 +2790,69 @@ void DelbackupCmd::DoInitial() { void DelbackupCmd::Do() { if (g_pika_conf->pika_mode() == PIKA_CLOUD) { - res_.SetRes(CmdRes::kOk); + Aws::SDKOptions options; + Aws::InitAPI(options); + + Aws::Client::ClientConfiguration cfg; + cfg.endpointOverride = g_pika_conf->cloud_endpoint_override(); + cfg.scheme = Aws::Http::Scheme::HTTP; + cfg.verifySSL = false; + + Aws::Auth::AWSCredentials cred(g_pika_conf->cloud_access_key(), + g_pika_conf->cloud_secret_key()); + Aws::S3::S3Client s3_client(cred, cfg, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + false, Aws::S3::US_EAST_1_REGIONAL_ENDPOINT_OPTION::NOT_SET); + + std::string bucket_name = g_pika_server->storage_options().cloud_fs_options.src_bucket.GetBucketName(); + Aws::S3::Model::DeleteBucketRequest request_del_bucket; + Aws::S3::Model::ListObjectsRequest request_list_object; + + request_del_bucket.SetBucket(bucket_name); + request_list_object.SetBucket(bucket_name); + + bool truncated = false; + //list object and delete file + do { + auto list_objects = s3_client.ListObjects(request_list_object); + if (list_objects.IsSuccess()) { + for (const auto& object : list_objects.GetResult().GetContents()) + { + Aws::S3::Model::DeleteObjectRequest request_del_object; + request_del_object.SetBucket(bucket_name); + request_del_object.SetKey(object.GetKey()); + auto object_del_result = s3_client.DeleteObject(request_del_object); + if (!object_del_result.IsSuccess()) { + res_.SetRes(CmdRes::kErrOther, "DeleteFile error: " + object_del_result.GetError().GetMessage()); + Aws::ShutdownAPI(options); + return; + } + } + + // check if the next page is empty + truncated = list_objects.GetResult().GetIsTruncated(); + if (truncated) { + request_list_object.SetMarker(list_objects.GetResult().GetNextMarker()); + } + } else { + res_.SetRes(CmdRes::kErrOther, "ListObjects error: " + list_objects.GetError().GetMessage()); + Aws::ShutdownAPI(options); + return; + } + } while (truncated); + + //del bucket + auto bucket_del_result = s3_client.DeleteBucket(request_del_bucket); + if (!bucket_del_result.IsSuccess()) { + res_.SetRes(CmdRes::kErrOther, "DeleteBucket error: " + bucket_del_result.GetError().GetMessage()); + } else { + res_.SetRes(CmdRes::kOk); + } + + Aws::ShutdownAPI(options); return; } + std::string db_sync_prefix = g_pika_conf->bgsave_prefix(); std::string db_sync_path = g_pika_conf->bgsave_path(); std::vector dump_dir; diff --git a/src/storage/tests/cloud_clone_test.cc b/src/storage/tests/cloud_clone_test.cc index 24ef98daea..5282b10a4f 100644 --- a/src/storage/tests/cloud_clone_test.cc +++ b/src/storage/tests/cloud_clone_test.cc @@ -1,8 +1,15 @@ -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include +#include #include "glog/logging.h" #include "pstd/include/env.h" @@ -273,7 +280,6 @@ TEST_F(CloudTest, clone_s3) { fprintf(stdout, "Successfully used db at %s and clone at %s in bucket %s.\n", kDBPath.c_str(), kClonePath.c_str(), bucketName.c_str()); } -/* TEST_F(CloudTest, get_clone_s3) { // cloud environment config options here @@ -410,7 +416,84 @@ TEST_F(CloudTest, delete_s3) { } //cfs->DeleteCloudFileFromDest(); -}*/ +} +TEST_F(CloudTest, del_bucket_s3) { + Aws::SDKOptions options; + Aws::InitAPI(options); + + Aws::Client::ClientConfiguration cfg; + cfg.endpointOverride = "10.224.129.40:9000"; + cfg.scheme = Aws::Http::Scheme::HTTP; + cfg.verifySSL = false; + + Aws::Auth::AWSCredentials cred("minioadmin", "minioadmin"); // ak,sk + Aws::S3::S3Client s3_client(cred, cfg, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + false, Aws::S3::US_EAST_1_REGIONAL_ENDPOINT_OPTION::NOT_SET); + + auto response = s3_client.ListBuckets(); + if (response.IsSuccess()) { + auto buckets = response.GetResult().GetBuckets(); + for (auto iter = buckets.begin(); iter != buckets.end(); ++iter) { + std::cout << iter->GetName() << "\t" << iter->GetCreationDate().ToLocalTimeString(Aws::Utils::DateFormat::ISO_8601) << std::endl; + } + } else { + std::cout << "Error while ListBuckets " << response.GetError().GetExceptionName() + << " " << response.GetError().GetMessage() << std::endl; + } + + + + // Aws::S3::S3Client s3_client; + Aws::S3::Model::DeleteBucketRequest request; + request.SetBucket("rockset.cloud2.clone.example.dst.charlieqiao"); + //s3_client.DeleteBucketAsync(request); + + Aws::S3::Model::ListObjectsRequest requ; + requ.SetBucket("rockset.cloud2.clone.example.dst.charlieqiao"); + + bool truncated = false; + do + { + auto outcome = s3_client.ListObjects(requ); + if (outcome.IsSuccess()) + { + std::cout << "list....obinect" << std::endl; + for (const auto& object : outcome.GetResult().GetContents()) + { + Aws::S3::Model::DeleteObjectRequest request; + std::cout << "Folder: " << object.GetKey() << std::endl; + request.SetBucket("rockset.cloud2.clone.example.dst.charlieqiao"); + request.SetKey(object.GetKey()); + auto outcome = s3_client.DeleteObject(request); + if (outcome.IsSuccess()) { + std::cout << "File deleted successfully" << std::endl; + } else { + std::cout << "Failed to delete file:" << outcome.GetError().GetMessage() << std::endl; + } + } + + // 检查是否有下一页 + truncated = outcome.GetResult().GetIsTruncated(); + if (truncated) + { + requ.SetMarker(outcome.GetResult().GetNextMarker()); + } + } + else + { + std::cout << "ListObjects error: " << outcome.GetError().GetMessage() << std::endl; + break; + } + } while (truncated); + + auto outcome = s3_client.DeleteBucket(request); + if (!outcome.IsSuccess()) { + std::cout << "DeleteBucket error: " << outcome.GetError().GetMessage() << std::endl; + } + + Aws::ShutdownAPI(options); +} int main(int argc, char** argv) { if (!pstd::FileExists("./log")) { From 5b461b3778bfde3ecd4cc681976256da8b91882f Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 29 Apr 2024 20:38:39 +0800 Subject: [PATCH 091/116] add bgsave multi db --- CMakeLists.txt | 9 ++- src/pika_db.cc | 2 +- src/storage/include/storage/backupable.h | 4 +- src/storage/src/backupable.cc | 85 +++++++++++++++--------- src/storage/tests/cloud_clone_test.cc | 41 ++---------- 5 files changed, 71 insertions(+), 70 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 092a62d4c1..2df587aeb4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -592,7 +592,7 @@ ExternalProject_Add(rocksdb #temporary for debug, skip download from github http://10.224.129.40:8000/rocksdb_cloud.tar.gz URL_HASH - MD5=fbcf0e166bfddf367063caa1dc583db8 + MD5=0c108533cddb48c49b154507ad012558 DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND @@ -795,7 +795,7 @@ if (USE_PIKA_TOOLS) endif() aux_source_directory(src DIR_SRCS) -# # generate version +# generate version string(TIMESTAMP TS "%Y-%m-%d %H:%M:%S" UTC) set(PIKA_BUILD_DATE "${TS}" CACHE STRING "the time we first built pika") @@ -853,6 +853,11 @@ if (USE_S3) target_link_libraries(${PROJECT_NAME} ${AWSSDK_LINK_LIBRARIES}) endif() +# SSL +find_package(OpenSSL REQUIRED) +include_directories(${OPENSSL_INCLUDE_DIR}) +target_link_libraries(${PROJECT_NAME} OpenSSL::SSL OpenSSL::Crypto) + add_dependencies(${PROJECT_NAME} gflags gtest diff --git a/src/pika_db.cc b/src/pika_db.cc index d873757cbf..f729572813 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -337,7 +337,7 @@ bool DB::RunBgsaveEngine() { } void DB::RunCloudBgsaveEngine(rocksdb::CloudFileSystemOptions& cloud_fs_options) { - rocksdb::Status s = bgsave_engine_->CreateNewCloudBackup(cloud_fs_options); + rocksdb::Status s = bgsave_engine_->CreateNewCloudBackup(cloud_fs_options, g_pika_conf.get()); if (!s.ok()) { LOG(WARNING) << db_name_ << " create new backup failed :" << s.ToString(); return; diff --git a/src/storage/include/storage/backupable.h b/src/storage/include/storage/backupable.h index 9bd0d176b9..d6af6994a2 100644 --- a/src/storage/include/storage/backupable.h +++ b/src/storage/include/storage/backupable.h @@ -11,6 +11,7 @@ #include "rocksdb/db.h" #include "db_checkpoint.h" +#include "include/pika_conf.h" #include "storage.h" #include "util.h" @@ -49,7 +50,8 @@ class BackupEngine { Status CreateNewBackup(const std::string& dir); - Status CreateNewCloudBackup(rocksdb::CloudFileSystemOptions& cloud_fs_options); + Status CreateNewCloudBackup(rocksdb::CloudFileSystemOptions& cloud_fs_options, + PikaConf* pika_conf) ; void StopBackup(); diff --git a/src/storage/src/backupable.cc b/src/storage/src/backupable.cc index c0094760fa..c9ba71becb 100644 --- a/src/storage/src/backupable.cc +++ b/src/storage/src/backupable.cc @@ -6,6 +6,7 @@ #include #include +#include "include/pika_conf.h" #include "storage/backupable.h" #include "storage/storage.h" @@ -143,41 +144,65 @@ Status BackupEngine::CreateNewBackup(const std::string& dir) { return s; } -Status BackupEngine::CreateNewCloudBackup(rocksdb::CloudFileSystemOptions& cloud_fs_options) { - Status s = Status::OK(); - - std::string src_bucket = cloud_fs_options.src_bucket.GetBucketName(); - std::string src_object_path = cloud_fs_options.src_bucket.GetObjectPath(); - cloud_fs_options.src_bucket.SetBucketName("database.backup.src", "pika."); - cloud_fs_options.dest_bucket.SetBucketName("database.backup.dst", "pika."); - - rocksdb::CloudFileSystem* cfs; - - s = rocksdb::CloudFileSystem::NewAwsFileSystem( - rocksdb::FileSystem::Default(), src_bucket, src_object_path, kRegion, "database.backup.src", - "clone_db", kRegion, cloud_fs_options, nullptr, &cfs); - - if (!s.ok()) { - return s; +std::string GenBackUpDirectory(std::string& db_path) { + // dbpath :1."db/"、2."db"、3."bak/db" + size_t lastSepPos = db_path.rfind('/'); + if (lastSepPos != std::string::npos) { + if (db_path.back() == '/') { + db_path.replace(lastSepPos, std::string::npos, "_bak/"); + return db_path; + } else { + return db_path + "_bak/"; + } + } else { + return db_path.append("_bak/"); } - std::shared_ptr fs(cfs); - auto cloud_env = NewCompositeEnv(fs); +} - // Create options and use the AWS env that we created earlier - Options options; - options.env = cloud_env.get(); +std::string DBPath(const std::string& path, const std::string& db_name) { + char buf[100]; + snprintf(buf, sizeof(buf), "%s/", db_name.data()); + return path + buf; +} - // No persistent cache - std::string persistent_cache = ""; +Status BackupEngine::CreateNewCloudBackup(rocksdb::CloudFileSystemOptions& cloud_fs_options, + PikaConf* pika_conf) { + Status s = Status::OK(); - // open clone - rocksdb::DBCloud* db; - s = rocksdb::DBCloud::Open(options, "clone_db", persistent_cache, 0, &db); - if (!s.ok()) { - return s; + std::string src_bucket = cloud_fs_options.src_bucket.GetBucketName(); + cloud_fs_options.src_bucket.SetBucketName(src_bucket + ".backup"); + cloud_fs_options.dest_bucket.SetBucketName(src_bucket + ".backup"); + std::string db_path_tmp = pika_conf->db_path(); + std::string clone_path = GenBackUpDirectory(db_path_tmp); + + for (auto& db_pika : pika_conf->db_structs()) { + std::string db_path = DBPath(pika_conf->db_path(), db_pika.db_name); + std::string clone_db_path = DBPath(clone_path, db_pika.db_name); + for (int i = 0; i < db_pika.db_instance_num; i++) { + rocksdb::CloudFileSystem* cfs; + s = rocksdb::CloudFileSystem::NewAwsFileSystem( + rocksdb::FileSystem::Default(), src_bucket, + db_path + std::to_string(i), kRegion, + src_bucket + ".backup",clone_db_path + std::to_string(i), + kRegion, cloud_fs_options, nullptr, &cfs); + + if (!s.ok()) { + return s; + } + std::shared_ptr fs(cfs); + auto cloud_env = NewCompositeEnv(fs); + Options options; + options.env = cloud_env.get(); + // open clone + rocksdb::DBCloud* db; + s = rocksdb::DBCloud::Open(options, clone_db_path + std::to_string(i), + "", 0, &db); + if (!s.ok()) { + return s; + } + db->Savepoint(); + } } - - db->Savepoint(); return s; } diff --git a/src/storage/tests/cloud_clone_test.cc b/src/storage/tests/cloud_clone_test.cc index 5282b10a4f..8c64b86eb9 100644 --- a/src/storage/tests/cloud_clone_test.cc +++ b/src/storage/tests/cloud_clone_test.cc @@ -58,7 +58,7 @@ std::string kDBPath = "db"; // This is the local directory where the clone is stored. The same // pathname is used to store data in the specified cloud bucket. //std::string kClonePath = "db"; -std::string kClonePath = "clone_db"; +std::string kClonePath = "clone"; std::string kBucketSuffix = "cloud.clone.example."; std::string kBucketSuffix2_src = "cloud2.clone.example."; std::string kBucketSuffix2_dest = "cloud2.clone.example.dst."; @@ -101,7 +101,8 @@ Status CloneDB(const std::string& clone_name, const std::string& src_bucket, const std::string bucketPrefix = "rockset."; // create a bucket name for debugging purposes const std::string bucketName_src = bucketPrefix + kBucketSuffix2_src; - const std::string bucketName_dest = bucketPrefix + kBucketSuffix2_dest; + const std::string bucketName_dest = bucketPrefix + kBucketSuffix2_src; + //const std::string bucketName_dest = bucketPrefix + kBucketSuffix2_dest; // Needed if using bucket prefix other than the default "rockset." cloud_fs_options2.src_bucket.SetBucketName(kBucketSuffix2_src, bucketPrefix); @@ -137,42 +138,9 @@ Status CloneDB(const std::string& clone_name, const std::string& src_bucket, return st; } //std::unique_ptr cloud_db2; - std::cout << "bx..." << std::endl; cloud_db->reset(db); - std::cout << "by..." << std::endl; - - CloudFileSystem* cfs_bak; - st = CloudFileSystem::NewAwsFileSystem( - FileSystem::Default(), kBucketSuffix2_src, dest_object_path, kRegion, kBucketSuffix2_dest, - dest_object_path, kRegion, cloud_fs_options2, nullptr, &cfs_bak); - - if (!st.ok()) { - fprintf(stderr, - "Unable to create an AWS environment with " - "bucket %s", - src_bucket.c_str()); - return st; - } - std::shared_ptr fs_bak(cfs_bak); - auto cloud_env_bak = NewCompositeEnv(fs_bak); - // Create options and use the AWS env that we created earlier - Options options2; - options2.env = cloud_env_bak.get(); - - // No persistent cache - std::string persistent_cache_bak = ""; - // open clone - DBCloud* db_bak; - st = DBCloud::Open(options2, kClonePath, persistent_cache_bak, 0, &db_bak); - if (!st.ok()) { - fprintf(stderr, "Unable to open clone at path %s in bucket %s. %s\n", - kClonePath.c_str(), kBucketSuffix2_src.c_str(), st.ToString().c_str()); - return st; - } - - cloud_db->get()->Savepoint(); - //db_bak->Savepoint(); return Status::OK(); + } TEST_F(CloudTest, clone_s3) { @@ -272,6 +240,7 @@ TEST_F(CloudTest, clone_s3) { assert(s.ok()); assert(value == "value"); + clone_db->Savepoint(); //clone_db->Flush(FlushOptions()); clone_db.release(); From 294b051384b25b204fd43f3cebc050d3b24f99c4 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 7 May 2024 15:23:19 +0800 Subject: [PATCH 092/116] support redis cache in pikacloud --- include/pika_db.h | 3 + src/pika_db.cc | 21 +++++++ src/pika_repl_bgworker.cc | 20 ++----- src/storage/include/storage/storage.h | 3 +- src/storage/src/redis.cc | 80 ++++++++++++++++++++++++++- src/storage/src/redis.h | 3 +- src/storage/src/storage.cc | 5 +- 7 files changed, 113 insertions(+), 22 deletions(-) diff --git a/include/pika_db.h b/include/pika_db.h index 6d4ed980cc..cada9fb8e7 100644 --- a/include/pika_db.h +++ b/include/pika_db.h @@ -159,6 +159,9 @@ class DB : public std::enable_shared_from_this, public pstd::noncopyable { */ rocksdb::Status SwitchMaster(bool is_old_master, bool is_new_master); + rocksdb::Status ApplyWAL(int rocksdb_id, + int type, const std::string& content); + private: bool opened_ = false; std::string dbsync_path_; diff --git a/src/pika_db.cc b/src/pika_db.cc index e04ff1cb8c..050f0b87a4 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -666,3 +666,24 @@ bool DB::FlushDB() { rocksdb::Status DB::SwitchMaster(bool is_old_master, bool is_new_master) { return storage_->SwitchMaster(is_old_master, is_new_master); } + +rocksdb::Status DB::ApplyWAL(int rocksdb_id, + int type, const std::string& content) { + if (type == storage::RocksDBRecordType::kMemtableWrite && + storage_->ShouldSkip(rocksdb_id, content)) { + return rocksdb::Status::OK(); + } + if (type == storage::RocksDBRecordType::kFlushDB) { + auto s = storage_->FlushDBAtSlave(rocksdb_id); + return s; + } + std::unordered_set redis_keys; + auto s = storage_->ApplyWAL(rocksdb_id, type, content, &redis_keys); + if (!s.ok()) { + return s; + } + for (const auto& key : redis_keys) { + cache_->Del({key}); + } + return s; +} diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index 414c8b887c..8df65190cf 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -155,23 +155,11 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { return; } db->Logger()->Put(binlog_res.binlog()); - auto storage = g_pika_server->GetDB(worker->db_name_)->storage(); - if (binlog_item.type() == storage::RocksDBRecordType::kMemtableWrite && - storage->ShouldSkip(binlog_item.rocksdb_id(), binlog_item.content())) { - continue; - } - if (binlog_item.type() == storage::RocksDBRecordType::kFlushDB) { - auto s = storage->FlushDBAtSlave(binlog_item.rocksdb_id()); - if (!s.ok()) { - slave_db->SetReplState(ReplState::kTryConnect); - LOG(WARNING) << "flushdb at slave node failed, error: " << s.ToString(); - return; - } - continue; - } - auto s = storage->ApplyWAL(binlog_item.rocksdb_id(), binlog_item.type(), binlog_item.content()); + auto db = g_pika_server->GetDB(worker->db_name_); + auto s = db->ApplyWAL(binlog_item.rocksdb_id(), binlog_item.type(), binlog_item.content()); if (!s.ok()) { - LOG(WARNING) << "rocksdb apply wal failed, error: " << s.ToString(); + LOG(WARNING) << "applywal at slave node failed, error: " << s.ToString(); + slave_db->SetReplState(ReplState::kTryConnect); return; } } else { diff --git a/src/storage/include/storage/storage.h b/src/storage/include/storage/storage.h index e640629f96..c02d945dbe 100644 --- a/src/storage/include/storage/storage.h +++ b/src/storage/include/storage/storage.h @@ -201,7 +201,8 @@ class Storage { std::unique_ptr& GetDBInstance(const std::string& key); - Status ApplyWAL(int rocksdb_id, int type, const std::string& content); + Status ApplyWAL(int rocksdb_id, int type, const std::string& content, + std::unordered_set* redis_keys); bool ShouldSkip(int rocksdb_id, const std::string& content); Status FlushDBAtSlave(int rocksdb_id); diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index d9f12e1d3f..00845a6b03 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -659,7 +659,73 @@ bool Redis::ShouldSkip(const std::string& content) { return rocksdb::WriteBatchInternal::Sequence(&batch) != sq_number + 1; } -Status Redis::ApplyWAL(int type, const std::string& content) { +class WriteBatchHandler : public rocksdb::WriteBatch::Handler { +public: + WriteBatchHandler(std::unordered_set* redis_keys) + : redis_keys_(redis_keys) {} + + Status PutCF(uint32_t column_family_id, const Slice& key, + const Slice& value) override { + return DeleteCF(column_family_id, key); + } + + Status DeleteCF(uint32_t column_family_id, const Slice& key) override { + switch (column_family_id) { + case kStringsCF: + ParsedBaseKey pbk(key); + redis_keys_->insert(PCacheKeyPrefixK + pbk.Key().ToString()); + break; + case kHashesMetaCF: + ParsedBeseMetaKey pbk(key); + redis_keys_->insert(PCacheKeyPrefixH + pbk.Key().ToString()); + break; + case kHashesDataCF: + ParsedHashesDataKey pbk(key); + redis_keys_->insert(PCacheKeyPrefixH + pbk.Key().ToString()); + break; + case kSetsMetaCF: + ParsedBeseMetaKey pbk(key); + redis_keys_->insert(PCacheKeyPrefixS + pbk.Key().ToString()); + break; + case kSetsDataCF: + ParsedSetsMemberKey pbk(key); + redis_keys_->insert(PCacheKeyPrefixS + pbk.Key().ToString()); + break; + case kListsMetaCF: + ParsedBaseMetaKey pbk(key); + redis_keys_->insert(PCacheKeyPrefixL + pbk.Key().ToString()); + break; + case kListsDataCF: + ParsedListsDataKey pbk(key); + redis_keys_->insert(PCacheKeyPrefixL + pbk.Key().ToString()); + break; + case kZsetsMetaCF: + ParsedBaseMetaKey pbk(key); + redis_keys_->insert(PCacheKeyPrefixZ + pbk.Key().ToString()); + break; + case kZsetsDataCF: + ParsedZSetsMemberKey pbk(key); + redis_keys_->insert(PCacheKeyPrefixZ + pbk.Key().ToString()); + break; + case kZsetsScoreCF: + ParsedZSetsScoreKey pbk(key); + redis_keys_->insert(PCacheKeyPrefixZ + pbk.Key().ToString()); + break; + case kStreamsMetaCF: + LOG(INFO) << "rediscache don't cache stream type"; + break; + case kStreamsDataCF: + LOG(INFO) << "rediscache don't cache stream type"; + break; + }; + return Status::OK(); + } +private: + std::unordered_set* redis_keys_; +}; + +Status Redis::ApplyWAL(int type, const std::string& content, + std::unordered_set* redis_keys) { rocksdb::ReplicationLogRecord::Type rtype = static_cast(type); rocksdb::ReplicationLogRecord rlr; rocksdb::DBCloud::ApplyReplicationLogRecordInfo info; @@ -670,10 +736,20 @@ Status Redis::ApplyWAL(int type, const std::string& content) { LOG(WARNING) << "applying rocksdb WAL, rocksdb_id: " << index_ << " log record type: " << rtype << " status: " << s.ToString(); + if (!s.ok()) { + return s; + } + if (type != 0) { + return s; + } + + rocksdb::WriteBatch batch; + s = rocksdb::WriteBatchInternal::SetContents(&batch, content); + WriteBatchHandler handler(redis_keys); + s = batch.Iterate(&handler); return s; } - std::string LogListener::OnReplicationLogRecord(rocksdb::ReplicationLogRecord record) { Redis* redis_inst = (Redis*)inst_; //TODO(wangshaoyi): get from storage diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 16ff551e66..514fed5ab4 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -397,7 +397,8 @@ class Redis { } #ifdef USE_S3 - Status ApplyWAL(int type, const std::string& content); + Status ApplyWAL(int type, const std::string& content, + std::unordered_set* redis_keys); bool ShouldSkip(const std::string& content); Status FlushDBAtSlave(); Status SwitchMaster(bool is_old_master, bool is_new_master); diff --git a/src/storage/src/storage.cc b/src/storage/src/storage.cc index 56a2859e2f..ef9e370efc 100644 --- a/src/storage/src/storage.cc +++ b/src/storage/src/storage.cc @@ -2479,9 +2479,10 @@ Status Storage::SwitchMaster(bool is_old_master, bool is_new_master) { } Status Storage::ApplyWAL(int rocksdb_id, - int type, const std::string& content) { + int type, const std::string& content, + std::unordered_set* redis_keys) { auto& inst = insts_[rocksdb_id]; - return inst->ApplyWAL(type, content); + return inst->ApplyWAL(type, content, redis_keys); } From f9e340622f416cafef50179208bd4ae8baa56e54 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 7 May 2024 15:57:33 +0800 Subject: [PATCH 093/116] fix compile error --- CMakeLists.txt | 2 +- src/pika_db.cc | 1 + src/pika_repl_bgworker.cc | 3 +- src/storage/src/redis.cc | 62 +++++++++++++++++++++++---------------- 4 files changed, 40 insertions(+), 28 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d0d587c6ce..701b3c174c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -592,7 +592,7 @@ ExternalProject_Add(rocksdb #temporary for debug, skip download from github http://10.224.129.40:8000/archive/rocksdb_cloud.tar.gz URL_HASH - MD5=5f4b946417d6585431138a54e35cff29 + MD5=3302dfcfc2fda422197d0517a1335edb DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND diff --git a/src/pika_db.cc b/src/pika_db.cc index 050f0b87a4..2f815b78aa 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -7,6 +7,7 @@ #include #include "include/pika_db.h" +#include "storage/storage_define.h" #include "include/pika_cmd_table_manager.h" #include "include/pika_rm.h" diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index 8df65190cf..7edb47c40d 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -155,8 +155,7 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { return; } db->Logger()->Put(binlog_res.binlog()); - auto db = g_pika_server->GetDB(worker->db_name_); - auto s = db->ApplyWAL(binlog_item.rocksdb_id(), binlog_item.type(), binlog_item.content()); + auto s = g_pika_server->GetDB(worker->db_name_)->ApplyWAL(binlog_item.rocksdb_id(), binlog_item.type(), binlog_item.content()); if (!s.ok()) { LOG(WARNING) << "applywal at slave node failed, error: " << s.ToString(); slave_db->SetReplState(ReplState::kTryConnect); diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 00845a6b03..e5bf7ab986 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -671,53 +671,65 @@ class WriteBatchHandler : public rocksdb::WriteBatch::Handler { Status DeleteCF(uint32_t column_family_id, const Slice& key) override { switch (column_family_id) { - case kStringsCF: + case kStringsCF: { ParsedBaseKey pbk(key); - redis_keys_->insert(PCacheKeyPrefixK + pbk.Key().ToString()); + redis_keys_->insert("K" + pbk.Key().ToString()); break; - case kHashesMetaCF: - ParsedBeseMetaKey pbk(key); - redis_keys_->insert(PCacheKeyPrefixH + pbk.Key().ToString()); + } + case kHashesMetaCF: { + ParsedBaseMetaKey pbk(key); + redis_keys_->insert("H" + pbk.Key().ToString()); break; - case kHashesDataCF: + } + case kHashesDataCF: { ParsedHashesDataKey pbk(key); - redis_keys_->insert(PCacheKeyPrefixH + pbk.Key().ToString()); + redis_keys_->insert("H" + pbk.Key().ToString()); break; - case kSetsMetaCF: - ParsedBeseMetaKey pbk(key); - redis_keys_->insert(PCacheKeyPrefixS + pbk.Key().ToString()); + } + case kSetsMetaCF: { + ParsedBaseMetaKey pbk(key); + redis_keys_->insert("S" + pbk.Key().ToString()); break; - case kSetsDataCF: + } + case kSetsDataCF: { ParsedSetsMemberKey pbk(key); - redis_keys_->insert(PCacheKeyPrefixS + pbk.Key().ToString()); + redis_keys_->insert("S" + pbk.Key().ToString()); break; - case kListsMetaCF: + } + case kListsMetaCF: { ParsedBaseMetaKey pbk(key); - redis_keys_->insert(PCacheKeyPrefixL + pbk.Key().ToString()); + redis_keys_->insert("L" + pbk.Key().ToString()); break; - case kListsDataCF: + } + case kListsDataCF: { ParsedListsDataKey pbk(key); - redis_keys_->insert(PCacheKeyPrefixL + pbk.Key().ToString()); + redis_keys_->insert("L" + pbk.key().ToString()); break; - case kZsetsMetaCF: + } + case kZsetsMetaCF: { ParsedBaseMetaKey pbk(key); - redis_keys_->insert(PCacheKeyPrefixZ + pbk.Key().ToString()); + redis_keys_->insert("Z" + pbk.Key().ToString()); break; - case kZsetsDataCF: + } + case kZsetsDataCF: { ParsedZSetsMemberKey pbk(key); - redis_keys_->insert(PCacheKeyPrefixZ + pbk.Key().ToString()); + redis_keys_->insert("Z" + pbk.Key().ToString()); break; - case kZsetsScoreCF: + } + case kZsetsScoreCF: { ParsedZSetsScoreKey pbk(key); - redis_keys_->insert(PCacheKeyPrefixZ + pbk.Key().ToString()); + redis_keys_->insert("Z" + pbk.key().ToString()); break; - case kStreamsMetaCF: + } + case kStreamsMetaCF: { LOG(INFO) << "rediscache don't cache stream type"; break; - case kStreamsDataCF: + } + case kStreamsDataCF: { LOG(INFO) << "rediscache don't cache stream type"; break; - }; + } + } return Status::OK(); } private: From c19c7f831da9f896d7c016968332c29ba219ae13 Mon Sep 17 00:00:00 2001 From: baixin Date: Wed, 8 May 2024 14:54:39 +0800 Subject: [PATCH 094/116] clean code --- src/pika_db.cc | 4 ++-- src/storage/src/backupable.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pika_db.cc b/src/pika_db.cc index f729572813..72b7e70988 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -339,10 +339,10 @@ bool DB::RunBgsaveEngine() { void DB::RunCloudBgsaveEngine(rocksdb::CloudFileSystemOptions& cloud_fs_options) { rocksdb::Status s = bgsave_engine_->CreateNewCloudBackup(cloud_fs_options, g_pika_conf.get()); if (!s.ok()) { - LOG(WARNING) << db_name_ << " create new backup failed :" << s.ToString(); + LOG(WARNING) << db_name_ << " create new cloud backup failed :" << s.ToString(); return; } - LOG(INFO) << db_name_ << " create new backup finished."; + LOG(INFO) << db_name_ << " create new cloud backup finished."; } BgSaveInfo DB::bgsave_info() { diff --git a/src/storage/src/backupable.cc b/src/storage/src/backupable.cc index c9ba71becb..db1f50ab49 100644 --- a/src/storage/src/backupable.cc +++ b/src/storage/src/backupable.cc @@ -10,7 +10,7 @@ #include "storage/backupable.h" #include "storage/storage.h" -const std::string kRegion = "us-west-2"; +extern const std::string kRegion; namespace storage { From d83063d45c19e8a1a95a5baf33a39bf621bd965f Mon Sep 17 00:00:00 2001 From: baixin Date: Thu, 9 May 2024 10:35:10 +0800 Subject: [PATCH 095/116] clean code --- include/pika_define.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/pika_define.h b/include/pika_define.h index cf2062b63f..b812c8afb7 100644 --- a/include/pika_define.h +++ b/include/pika_define.h @@ -332,6 +332,11 @@ constexpr int PIKA_CACHE_READ = 1; constexpr int PIKA_LOCAL = 0; constexpr int PIKA_CLOUD = 1; +/* + * cloud tmp conf + */ +const std::string kRegion = "us-west-2"; + /* * cache size */ From b6e9bd30a7b4c2f7be4a46124a333ec4c6341b98 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Thu, 9 May 2024 14:25:47 +0800 Subject: [PATCH 096/116] use rocksdb-cloud sst-file-cache --- CMakeLists.txt | 5 +++-- src/storage/src/redis.cc | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 701b3c174c..a263bd29fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -590,9 +590,10 @@ ExternalProject_Add(rocksdb ${LIBJEMALLOC_NAME} URL #temporary for debug, skip download from github - http://10.224.129.40:8000/archive/rocksdb_cloud.tar.gz + #http://10.224.129.40:8000/archive/rocksdb_cloud.tar.gz + /home/wangshaoyi/work/rocksdb_cloud.tar.gz URL_HASH - MD5=3302dfcfc2fda422197d0517a1335edb + MD5=864ec1f5b1edf91373f5922d77485562 DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index e5bf7ab986..94b9b6e87f 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -124,6 +124,7 @@ Status Redis::Open(const StorageOptions& tmp_storage_options, const std::string& storage_options.cloud_fs_options.resync_on_open = true; storage_options.cloud_fs_options.resync_manifest_on_open = true; storage_options.cloud_fs_options.skip_dbid_verification = true; + storage_options.cloud_fs_options.sst_file_cache = rocksdb::NewLRUCache(10 * 1024 * 1024 * 1024); storage_options.options.replication_log_listener = log_listener_; is_master_.store(tmp_storage_options.cloud_fs_options.is_master); if (!tmp_storage_options.cloud_fs_options.is_master) { From 04ee23c3a3d70c107fa4fdbd3f520eab6c73b317 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Thu, 9 May 2024 14:27:53 +0800 Subject: [PATCH 097/116] add comment for debug code --- src/pika_server.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pika_server.cc b/src/pika_server.cc index b81813242b..6302b573a8 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1827,6 +1827,7 @@ void PikaServer::CacheConfigInit(cache::CacheConfig& cache_cfg) { bool PikaServer::UploadMetaToSentinel(const std::string& local_path, const std::string& s3_bucket, const std::string& remote_path) { + //TODO(baixin): remove debug code sentinel_addr_ = "http://127.0.0.1:18080/api/topom/upload-s3"; Aws::String url(sentinel_addr_); if (sentinel_client_ == nullptr) { From 33cb85210f9a66369a672d9c04e729657bd2c667 Mon Sep 17 00:00:00 2001 From: baixin Date: Thu, 9 May 2024 16:17:27 +0800 Subject: [PATCH 098/116] clean code --- include/pika_define.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/pika_define.h b/include/pika_define.h index b812c8afb7..73c10e6031 100644 --- a/include/pika_define.h +++ b/include/pika_define.h @@ -334,6 +334,7 @@ constexpr int PIKA_CLOUD = 1; /* * cloud tmp conf + * todo: TBD based on deployment status */ const std::string kRegion = "us-west-2"; From e0c07562fc0a5a5232959af0758f21a2e95747b2 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Fri, 10 May 2024 11:34:00 +0800 Subject: [PATCH 099/116] pikacloud support local cache --- include/pika_conf.h | 7 +++++++ src/pika_conf.cc | 6 ++++++ src/pika_server.cc | 1 + src/storage/include/storage/storage.h | 1 + src/storage/src/redis.cc | 2 +- 5 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/pika_conf.h b/include/pika_conf.h index 2b1f23e00b..6407ccd63f 100644 --- a/include/pika_conf.h +++ b/include/pika_conf.h @@ -355,6 +355,9 @@ class PikaConf : public pstd::BaseConf { void UnsetCacheDisableFlag() { tmp_cache_disable_flag_ = false; } bool enable_blob_files() { return enable_blob_files_; } int64_t min_blob_size() { return min_blob_size_; } +#ifdef USE_S3 + int64_t SSTCacheSize() const { return sst_cache_size_; } +#endif int64_t blob_file_size() { return blob_file_size_; } std::string blob_compression_type() { return blob_compression_type_; } bool enable_blob_garbage_collection() { return enable_blob_garbage_collection_; } @@ -825,6 +828,10 @@ class PikaConf : public pstd::BaseConf { int64_t blob_file_size_ = 256 * 1024 * 1024; // 256M std::string blob_compression_type_ = "none"; +#ifdef USE_S3 + int64_t sst_cache_size_ = 10 << 30; +#endif + // rocksdb-cloud options std::string cloud_endpoint_override_; std::string cloud_access_key_; diff --git a/src/pika_conf.cc b/src/pika_conf.cc index 47b6f9bf59..80745fd9a4 100644 --- a/src/pika_conf.cc +++ b/src/pika_conf.cc @@ -552,6 +552,12 @@ int PikaConf::Load() { if (min_blob_size_ <= 0) { min_blob_size_ = 4096; } +#ifdef USE_S3 + GetConfInt64("sst-cache-size", &sst_cache_size_); + if (sst_cache_size_ <= 0) { + sst_cache_size_ = 10 << 30; + } +#endif GetConfInt64Human("blob-file-size", &blob_file_size_); if (blob_file_size_ <= 0) { blob_file_size_ = 256 * 1024 * 1024; diff --git a/src/pika_server.cc b/src/pika_server.cc index b81813242b..0d72b6afb9 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -108,6 +108,7 @@ PikaServer::PikaServer() // init role std::string slaveof = g_pika_conf->slaveof(); #ifdef USE_S3 + storage_options_.sst_cache_size_ = g_pika_conf->SSTCacheSize(); storage_options_.cloud_fs_options.is_master = true; #endif if (!slaveof.empty()) { diff --git a/src/storage/include/storage/storage.h b/src/storage/include/storage/storage.h index c02d945dbe..22b39e464d 100644 --- a/src/storage/include/storage/storage.h +++ b/src/storage/include/storage/storage.h @@ -77,6 +77,7 @@ struct StorageOptions { size_t small_compaction_duration_threshold = 10000; #ifdef USE_S3 rocksdb::CloudFileSystemOptions cloud_fs_options; // rocksdb-cloud option + int64_t sst_cache_size_; #endif Status ResetOptions(const OptionType& option_type, const std::unordered_map& options_map); }; diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 94b9b6e87f..5b3d9d7ad8 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -124,7 +124,7 @@ Status Redis::Open(const StorageOptions& tmp_storage_options, const std::string& storage_options.cloud_fs_options.resync_on_open = true; storage_options.cloud_fs_options.resync_manifest_on_open = true; storage_options.cloud_fs_options.skip_dbid_verification = true; - storage_options.cloud_fs_options.sst_file_cache = rocksdb::NewLRUCache(10 * 1024 * 1024 * 1024); + storage_options.cloud_fs_options.sst_file_cache = rocksdb::NewLRUCache(storage_options_.sst_cache_size_); storage_options.options.replication_log_listener = log_listener_; is_master_.store(tmp_storage_options.cloud_fs_options.is_master); if (!tmp_storage_options.cloud_fs_options.is_master) { From 019d41e2d797f54a443fe3d84fd8ff0425cc13d2 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Fri, 10 May 2024 19:10:52 +0800 Subject: [PATCH 100/116] fix by review comments --- CMakeLists.txt | 5 ++--- include/pika_conf.h | 2 +- src/pika_conf.cc | 2 +- src/pika_db.cc | 4 ++-- src/storage/include/storage/storage.h | 2 +- src/storage/include/storage/storage_define.h | 2 +- src/storage/src/redis.cc | 9 ++++----- 7 files changed, 12 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a263bd29fc..701b3c174c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -590,10 +590,9 @@ ExternalProject_Add(rocksdb ${LIBJEMALLOC_NAME} URL #temporary for debug, skip download from github - #http://10.224.129.40:8000/archive/rocksdb_cloud.tar.gz - /home/wangshaoyi/work/rocksdb_cloud.tar.gz + http://10.224.129.40:8000/archive/rocksdb_cloud.tar.gz URL_HASH - MD5=864ec1f5b1edf91373f5922d77485562 + MD5=3302dfcfc2fda422197d0517a1335edb DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND diff --git a/include/pika_conf.h b/include/pika_conf.h index 6407ccd63f..ae62366094 100644 --- a/include/pika_conf.h +++ b/include/pika_conf.h @@ -829,7 +829,7 @@ class PikaConf : public pstd::BaseConf { std::string blob_compression_type_ = "none"; #ifdef USE_S3 - int64_t sst_cache_size_ = 10 << 30; + int64_t sst_cache_size_ = 10LL << 30; #endif // rocksdb-cloud options diff --git a/src/pika_conf.cc b/src/pika_conf.cc index 80745fd9a4..c732f4bf15 100644 --- a/src/pika_conf.cc +++ b/src/pika_conf.cc @@ -555,7 +555,7 @@ int PikaConf::Load() { #ifdef USE_S3 GetConfInt64("sst-cache-size", &sst_cache_size_); if (sst_cache_size_ <= 0) { - sst_cache_size_ = 10 << 30; + sst_cache_size_ = 10LL << 30; } #endif GetConfInt64Human("blob-file-size", &blob_file_size_); diff --git a/src/pika_db.cc b/src/pika_db.cc index 2f815b78aa..2b74903cf5 100644 --- a/src/pika_db.cc +++ b/src/pika_db.cc @@ -670,11 +670,11 @@ rocksdb::Status DB::SwitchMaster(bool is_old_master, bool is_new_master) { rocksdb::Status DB::ApplyWAL(int rocksdb_id, int type, const std::string& content) { - if (type == storage::RocksDBRecordType::kMemtableWrite && + if (type == static_cast(storage::RocksDBRecordType::kMemtableWrite) && storage_->ShouldSkip(rocksdb_id, content)) { return rocksdb::Status::OK(); } - if (type == storage::RocksDBRecordType::kFlushDB) { + if (type == static_cast(storage::RocksDBRecordType::kFlushDB)) { auto s = storage_->FlushDBAtSlave(rocksdb_id); return s; } diff --git a/src/storage/include/storage/storage.h b/src/storage/include/storage/storage.h index 22b39e464d..7fee31d6ee 100644 --- a/src/storage/include/storage/storage.h +++ b/src/storage/include/storage/storage.h @@ -77,7 +77,7 @@ struct StorageOptions { size_t small_compaction_duration_threshold = 10000; #ifdef USE_S3 rocksdb::CloudFileSystemOptions cloud_fs_options; // rocksdb-cloud option - int64_t sst_cache_size_; + int64_t sst_cache_size_ = 10LL << 30; #endif Status ResetOptions(const OptionType& option_type, const std::unordered_map& options_map); }; diff --git a/src/storage/include/storage/storage_define.h b/src/storage/include/storage/storage_define.h index a52b158baa..4e97c20b91 100644 --- a/src/storage/include/storage/storage_define.h +++ b/src/storage/include/storage/storage_define.h @@ -133,7 +133,7 @@ inline const char* SeekUserkeyDelim(const char* ptr, int length) { // this enum is an extention of ReplicationLogRecord's Type // reserves kMemtableWrite, kMemtableSwitch, kManifestWrite, // add kFlushDB which indicates a pika's flushdb call. - enum RocksDBRecordType : uint32_t { + enum class RocksDBRecordType : uint32_t { kMemtableWrite, kMemtableSwitch, kManifestWrite, diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 5b3d9d7ad8..de98568ae0 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -76,8 +76,7 @@ void Redis::Close() { Status Redis::FlushDBAtSlave() { Close(); pstd::DeleteDir(db_path_); - auto s = Open(storage_options_, db_path_); - return s; + return Open(storage_options_, db_path_); } Status Redis::FlushDB() { @@ -108,7 +107,7 @@ Status Redis::FlushDB() { Close(); pstd::DeleteDir(db_path_); Open(storage_options_, db_path_); - wal_writer_->Put("flushdb", 0/*db_id*/, index_, kFlushDB); + wal_writer_->Put("flushdb", 0/*db_id*/, index_, static_cast(RocksDBRecordType::kFlushDB)); return s; } @@ -734,7 +733,7 @@ class WriteBatchHandler : public rocksdb::WriteBatch::Handler { return Status::OK(); } private: - std::unordered_set* redis_keys_; + std::unordered_set* redis_keys_ = nullptr; }; Status Redis::ApplyWAL(int type, const std::string& content, @@ -780,7 +779,7 @@ std::string LogListener::OnReplicationLogRecord(rocksdb::ReplicationLogRecord re LOG(WARNING) << "write binlogitem " << " db_id: " << db_id << " type: " << record.type; auto s = wal_writer_->Put(record.contents, db_id, - redis_inst->GetIndex(), RocksDBRecordType(record.type)); + redis_inst->GetIndex(), record.type); if (!s.ok()) { LOG(ERROR) << "write binlog failed, db_id: " << db_id << " rocksdb_id: " << redis_inst->GetIndex(); From 5e9aea8467ef3dee7f03fe1a648ea7d584808009 Mon Sep 17 00:00:00 2001 From: baixin Date: Fri, 10 May 2024 20:53:31 +0800 Subject: [PATCH 101/116] test 360_s3 --- src/storage/tests/cloud_clone_test.cc | 98 +++++++++++++++++++++++---- 1 file changed, 86 insertions(+), 12 deletions(-) diff --git a/src/storage/tests/cloud_clone_test.cc b/src/storage/tests/cloud_clone_test.cc index 8c64b86eb9..077fa76045 100644 --- a/src/storage/tests/cloud_clone_test.cc +++ b/src/storage/tests/cloud_clone_test.cc @@ -72,8 +72,78 @@ std::string kBucketSuffix2_dest = "cloud2.clone.example.dst."; // the same bucket (obviously with different pathnames). // -std::string kRegion = "us-west-2"; +std::string kRegion = "us-east-1"; +TEST_F(CloudTest, test_360_s3) { + // cloud environment config options here + CloudFileSystemOptions cloud_fs_options; + + cloud_fs_options.endpoint_override = "beijing2.xstore.qihoo.net"; + cloud_fs_options.credentials.InitializeSimple("YHDIJ1LCITN7YHLETHLW", "fR5b2hEOzeogmiR01FzvYpb9BNt8eSrt0crHy510"); + if (!cloud_fs_options.credentials.HasValid().ok()) { + fprintf( + stderr, + "Please set env variables " + "AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY with cloud credentials"); + return; + } + + std::string bucketName = "xx"; + cloud_fs_options.src_bucket.SetBucketName("pulsar-s3-test-beijing2"); + cloud_fs_options.dest_bucket.SetBucketName("pulsar-s3-test-beijing2"); + // Create a new AWS cloud env Status + CloudFileSystem* cfs; + Status s = CloudFileSystem::NewAwsFileSystem( + FileSystem::Default(), "pulsar-s3-test-beijing2", kDBPath, kRegion, "pulsar-s3-test-beijing2", + kDBPath, kRegion, cloud_fs_options, nullptr, &cfs); + if (!s.ok()) { + fprintf(stderr, "Unable to create cloud env in bucket %s. %s\n", + bucketName.c_str(), s.ToString().c_str()); + return; + } + + + // Store a reference to a cloud env. A new cloud env object should be + // associated with every new cloud-db. + auto cloud_env = NewCompositeEnv(std::shared_ptr(cfs)); + + // Create options and use the AWS env that we created earlier + Options options; + options.env = cloud_env.get(); + options.create_if_missing = true; + + // No persistent cache + std::string persistent_cache = ""; + + // Create and Open DB + DBCloud* db; + s = DBCloud::Open(options, kDBPath, persistent_cache, 0, &db); + + if (!s.ok()) { + fprintf(stderr, "--------------xxx Unable to open db at path %s in bucket %s. %s\n", + kDBPath.c_str(), bucketName.c_str(), s.ToString().c_str()); + return; + } + + // Put key-value into main db + s = db->Put(WriteOptions(), "key1", "value"); + assert(s.ok()); + std::string value; + + // get value from main db + s = db->Get(ReadOptions(), "key1", &value); + assert(s.ok()); + assert(value == "value"); + + // Flush all data from main db to sst files. + db->Flush(FlushOptions()); + + delete db; + + fprintf(stdout, "Successfully used db at %s and clone at %s in bucket %s.\n", + kDBPath.c_str(), kClonePath.c_str(), bucketName.c_str()); +} +/* Status CloneDB(const std::string& clone_name, const std::string& src_bucket, const std::string& src_object_path, const std::string& dest_bucket, @@ -96,7 +166,8 @@ Status CloneDB(const std::string& clone_name, const std::string& src_bucket, // globally unique. S3 bucket-namess need to be globlly unique. // If you want to rerun this example, then unique user-name suffix here. char* user = getenv("USER"); - kBucketSuffix2_src.append(user); kBucketSuffix2_dest.append(user); + kBucketSuffix2_src.append(user); + kBucketSuffix2_dest.append(user); const std::string bucketPrefix = "rockset."; // create a bucket name for debugging purposes @@ -133,7 +204,7 @@ Status CloneDB(const std::string& clone_name, const std::string& src_bucket, DBCloud* db; st = DBCloud::Open(options, kClonePath, persistent_cache, 0, &db); if (!st.ok()) { - fprintf(stderr, "Unable to open clone at path %s in bucket %s. %s\n", + fprintf(stderr, "iiiiii-----------------Unable to open clone at path %s in bucket %s. %s\n", kClonePath.c_str(), kBucketSuffix2_src.c_str(), st.ToString().c_str()); return st; } @@ -160,7 +231,7 @@ TEST_F(CloudTest, clone_s3) { // Append the user name to the bucket name in an attempt to make it // globally unique. S3 bucket-namess need to be globlly unique. // If you want to rerun this example, then unique user-name suffix here. - char* user = getenv("USER"); + /*char* user = getenv("USER"); kBucketSuffix.append(user); const std::string bucketPrefix = "rockset."; @@ -169,12 +240,14 @@ TEST_F(CloudTest, clone_s3) { // Needed if using bucket prefix other than the default "rockset." cloud_fs_options.src_bucket.SetBucketName(kBucketSuffix, bucketPrefix); - cloud_fs_options.dest_bucket.SetBucketName(kBucketSuffix, bucketPrefix); - + cloud_fs_options.dest_bucket.SetBucketName(kBucketSuffix, bucketPrefix);*//* + std::string bucketName = "xx"; + cloud_fs_options.src_bucket.SetBucketName("database", "pika."); + cloud_fs_options.dest_bucket.SetBucketName("database", "pika."); // Create a new AWS cloud env Status CloudFileSystem* cfs; Status s = CloudFileSystem::NewAwsFileSystem( - FileSystem::Default(), kBucketSuffix, kDBPath, kRegion, kBucketSuffix, + FileSystem::Default(), "database", kDBPath, kRegion, "database", kDBPath, kRegion, cloud_fs_options, nullptr, &cfs); if (!s.ok()) { fprintf(stderr, "Unable to create cloud env in bucket %s. %s\n", @@ -198,8 +271,9 @@ TEST_F(CloudTest, clone_s3) { // Create and Open DB DBCloud* db; s = DBCloud::Open(options, kDBPath, persistent_cache, 0, &db); + if (!s.ok()) { - fprintf(stderr, "Unable to open db at path %s in bucket %s. %s\n", + fprintf(stderr, "--------------xxx Unable to open db at path %s in bucket %s. %s\n", kDBPath.c_str(), bucketName.c_str(), s.ToString().c_str()); return; } @@ -226,7 +300,7 @@ TEST_F(CloudTest, clone_s3) { s = CloneDB("clone1", kBucketSuffix, kDBPath, kBucketSuffix, kClonePath, cloud_fs_options, &clone_db, &clone_env); if (!s.ok()) { - fprintf(stderr, "Unable to clone db at path %s in bucket %s. %s\n", + fprintf(stderr, "-------yy----Unable to clone db at path %s in bucket %s. %s\n", kDBPath.c_str(), bucketName.c_str(), s.ToString().c_str()); return; } @@ -248,8 +322,8 @@ TEST_F(CloudTest, clone_s3) { fprintf(stdout, "Successfully used db at %s and clone at %s in bucket %s.\n", kDBPath.c_str(), kClonePath.c_str(), bucketName.c_str()); -} - +}*/ +/* TEST_F(CloudTest, get_clone_s3) { // cloud environment config options here CloudFileSystemOptions cloud_fs_options; @@ -462,7 +536,7 @@ TEST_F(CloudTest, del_bucket_s3) { } Aws::ShutdownAPI(options); -} +}*/ int main(int argc, char** argv) { if (!pstd::FileExists("./log")) { From bf6ef73a918c38f549c0aa5a384718b959559a77 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Fri, 10 May 2024 18:44:15 +0800 Subject: [PATCH 102/116] remove debug log --- CMakeLists.txt | 2 +- src/pika_cloud_binlog.cc | 2 -- src/pika_repl_bgworker.cc | 4 ---- src/storage/src/redis.cc | 17 ++--------------- 4 files changed, 3 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a263bd29fc..8f87168101 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -593,7 +593,7 @@ ExternalProject_Add(rocksdb #http://10.224.129.40:8000/archive/rocksdb_cloud.tar.gz /home/wangshaoyi/work/rocksdb_cloud.tar.gz URL_HASH - MD5=864ec1f5b1edf91373f5922d77485562 + MD5=345d9b6a73c302caf4a2f51313ce70e9 DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND diff --git a/src/pika_cloud_binlog.cc b/src/pika_cloud_binlog.cc index f1f2f22823..03fb4b87f2 100644 --- a/src/pika_cloud_binlog.cc +++ b/src/pika_cloud_binlog.cc @@ -170,7 +170,6 @@ Status CloudBinlog::GetOldestBinlogToKeep(uint32_t* filenum, uint32_t* term, uin if (term) { *term = version_->term_; } - LOG(WARNING) << "oldest binlog filenum to keep is: " << *filenum; return Status::OK(); } @@ -221,7 +220,6 @@ Status CloudBinlog::Put(const std::string& item, uint32_t db_id, uint32_t rocksd } version_->keep_filenum_ = keep_filenum; - LOG(WARNING) << "rocksdb_id: " << rocksdb_id << "type: " << type << " oldest filenum to keep: " << keep_filenum; return s; } diff --git a/src/pika_repl_bgworker.cc b/src/pika_repl_bgworker.cc index 7edb47c40d..925efb9a25 100644 --- a/src/pika_repl_bgworker.cc +++ b/src/pika_repl_bgworker.cc @@ -84,8 +84,6 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { } } - LOG(WARNING) << "slave receive binlogsync, begin offset: "<< pb_begin.ToString() << " end offset: " << pb_end.ToString(); - if (pb_begin == LogOffset()) { only_keepalive = true; } @@ -135,7 +133,6 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { // empty binlog treated as keepalive packet if (binlog_res.binlog().empty()) { - LOG(WARNING) << "slave receive empty binlog item"; continue; } @@ -193,7 +190,6 @@ void PikaReplBgWorker::HandleBGWorkerWriteBinlog(void* arg) { ack_end = productor_status; ack_end.l_offset.term = pb_end.l_offset.term; } - LOG(WARNING) << "slave Reply to master, ack_start: "<< ack_start.ToString() << " ack_end: " << ack_end.ToString() << "pb_end: " << pb_end.ToString(); g_pika_rm->SendBinlogSyncAckRequest(db_name, ack_start, ack_end); } diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 5b3d9d7ad8..02ca44730c 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -581,18 +581,14 @@ Status Redis::OpenCloudEnv(rocksdb::CloudFileSystemOptions opts, const std::stri } Status Redis::ReOpenRocksDB(const storage::StorageOptions& opt) { - LOG(WARNING) << "ReOpenRocksDB, closing old rocksdb"; Close(); - LOG(WARNING) << "ReOpenRocksDB, opening new rocksdb"; Open(opt, db_path_); return Status::OK(); } Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { - DEFER { - LOG(WARNING) << "is_old_master: " << is_old_master << " is_new_master: " << is_new_master << " done"; - }; - LOG(WARNING) << "is_old_master: " << is_old_master << " is_new_master: " << is_new_master; + LOG(WARNING) << "switchMaster from " << (is_old_master ? "master" : "slave") + << " to " << (is_new_master ? "master" : "slave"); if (is_old_master && is_new_master) { // Do nothing return Status::OK(); @@ -627,7 +623,6 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { } uint64_t remote_manifest_sequence = 0; cfs_->GetMaxManifestSequenceFromCurrentManifest(db_->GetName(), &remote_manifest_sequence); - LOG(WARNING) << "switchmaster, remote_manifest_sequence: " << remote_manifest_sequence << " local_manifest_sequence: " << local_manifest_sequence; // local version behind remote, directly reopen if (local_manifest_sequence < remote_manifest_sequence) { return ReOpenRocksDB(storage_options); @@ -642,12 +637,10 @@ Status Redis::SwitchMaster(bool is_old_master, bool is_new_master) { for (const auto& cf : handles_) { db_->SetOptions(cf, db_options); } - LOG(WARNING) << "flush memtables ..."; rocksdb::FlushOptions fops; fops.wait = true; db_->Flush(fops, handles_); - LOG(WARNING) << "flush memtables done"; return Status::OK(); } return Status::OK(); @@ -746,9 +739,6 @@ Status Redis::ApplyWAL(int type, const std::string& content, rlr.type = rtype; auto s = db_->ApplyReplicationLogRecord(rlr, "", nullptr, true, &info, rocksdb::DB::AR_EVICT_OBSOLETE_FILES); - LOG(WARNING) << "applying rocksdb WAL, rocksdb_id: " << index_ - << " log record type: " << rtype - << " status: " << s.ToString(); if (!s.ok()) { return s; } @@ -773,12 +763,9 @@ std::string LogListener::OnReplicationLogRecord(rocksdb::ReplicationLogRecord re } if (!redis_inst->IsMaster()) { - LOG(WARNING) << "rocksdb not master, skip write binlog"; return "0"; } - LOG(WARNING) << "write binlogitem " << " db_id: " << db_id << " type: " << record.type; - auto s = wal_writer_->Put(record.contents, db_id, redis_inst->GetIndex(), RocksDBRecordType(record.type)); if (!s.ok()) { From 900ebc6a70939b4663a71b505f61b16d8d7f0a66 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Mon, 13 May 2024 12:06:05 +0800 Subject: [PATCH 103/116] remove debug code --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f87168101..a263bd29fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -593,7 +593,7 @@ ExternalProject_Add(rocksdb #http://10.224.129.40:8000/archive/rocksdb_cloud.tar.gz /home/wangshaoyi/work/rocksdb_cloud.tar.gz URL_HASH - MD5=345d9b6a73c302caf4a2f51313ce70e9 + MD5=864ec1f5b1edf91373f5922d77485562 DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND From f26bd31871fea77f700842e79e2873946c2aef0f Mon Sep 17 00:00:00 2001 From: baixin Date: Mon, 13 May 2024 14:23:59 +0800 Subject: [PATCH 104/116] fix review comment --- src/pika_admin.cc | 2 ++ src/pika_server.cc | 1 - src/storage/src/backupable.cc | 2 ++ src/storage/tests/cloud_clone_test.cc | 45 ++++++++++++++------------- 4 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/pika_admin.cc b/src/pika_admin.cc index af6bafc5e0..793a1e210a 100644 --- a/src/pika_admin.cc +++ b/src/pika_admin.cc @@ -2842,6 +2842,8 @@ void DelbackupCmd::Do() { } while (truncated); //del bucket + //todo: At present, this operation is not supported online. + // It will be modified according to deployment in the future auto bucket_del_result = s3_client.DeleteBucket(request_del_bucket); if (!bucket_del_result.IsSuccess()) { res_.SetRes(CmdRes::kErrOther, "DeleteBucket error: " + bucket_del_result.GetError().GetMessage()); diff --git a/src/pika_server.cc b/src/pika_server.cc index 394bdb1bc9..1752235320 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1830,7 +1830,6 @@ void PikaServer::CacheConfigInit(cache::CacheConfig& cache_cfg) { bool PikaServer::UploadMetaToSentinel(const std::string& local_path, const std::string& s3_bucket, const std::string& remote_path) { - sentinel_addr_ = "http://127.0.0.1:18080/api/topom/upload-s3"; Aws::String url(sentinel_addr_); if (sentinel_client_ == nullptr) { sentinel_client_ = CreateHttpClient(Aws::Client::ClientConfiguration()); diff --git a/src/storage/src/backupable.cc b/src/storage/src/backupable.cc index db1f50ab49..9691cf59de 100644 --- a/src/storage/src/backupable.cc +++ b/src/storage/src/backupable.cc @@ -175,6 +175,8 @@ Status BackupEngine::CreateNewCloudBackup(rocksdb::CloudFileSystemOptions& cloud std::string db_path_tmp = pika_conf->db_path(); std::string clone_path = GenBackUpDirectory(db_path_tmp); + //todo: At present, this operation is not supported online. + // It will be modified according to deployment in the future for (auto& db_pika : pika_conf->db_structs()) { std::string db_path = DBPath(pika_conf->db_path(), db_pika.db_name); std::string clone_db_path = DBPath(clone_path, db_pika.db_name); diff --git a/src/storage/tests/cloud_clone_test.cc b/src/storage/tests/cloud_clone_test.cc index 077fa76045..d1438413f0 100644 --- a/src/storage/tests/cloud_clone_test.cc +++ b/src/storage/tests/cloud_clone_test.cc @@ -13,10 +13,11 @@ #include "glog/logging.h" #include "pstd/include/env.h" -#include "storage/storage.h" +#include "pstd_defer.h" +#include "rocksdb/cloud/db_cloud.h" #include "src/redis.h" +#include "storage/storage.h" #include "storage/util.h" -#include "rocksdb/cloud/db_cloud.h" using namespace storage; using namespace rocksdb; @@ -116,11 +117,11 @@ TEST_F(CloudTest, test_360_s3) { std::string persistent_cache = ""; // Create and Open DB - DBCloud* db; + DBCloud* db = nullptr; s = DBCloud::Open(options, kDBPath, persistent_cache, 0, &db); if (!s.ok()) { - fprintf(stderr, "--------------xxx Unable to open db at path %s in bucket %s. %s\n", + fprintf(stderr, "Unable to open db at path %s in bucket %s. %s\n", kDBPath.c_str(), bucketName.c_str(), s.ToString().c_str()); return; } @@ -137,13 +138,14 @@ TEST_F(CloudTest, test_360_s3) { // Flush all data from main db to sst files. db->Flush(FlushOptions()); - - delete db; + DEFER { + delete db; + }; fprintf(stdout, "Successfully used db at %s and clone at %s in bucket %s.\n", kDBPath.c_str(), kClonePath.c_str(), bucketName.c_str()); } -/* + Status CloneDB(const std::string& clone_name, const std::string& src_bucket, const std::string& src_object_path, const std::string& dest_bucket, @@ -201,10 +203,10 @@ Status CloneDB(const std::string& clone_name, const std::string& src_bucket, // No persistent cache std::string persistent_cache = ""; // open clone - DBCloud* db; + DBCloud* db = nullptr; st = DBCloud::Open(options, kClonePath, persistent_cache, 0, &db); if (!st.ok()) { - fprintf(stderr, "iiiiii-----------------Unable to open clone at path %s in bucket %s. %s\n", + fprintf(stderr, "Unable to open clone at path %s in bucket %s. %s\n", kClonePath.c_str(), kBucketSuffix2_src.c_str(), st.ToString().c_str()); return st; } @@ -231,7 +233,7 @@ TEST_F(CloudTest, clone_s3) { // Append the user name to the bucket name in an attempt to make it // globally unique. S3 bucket-namess need to be globlly unique. // If you want to rerun this example, then unique user-name suffix here. - /*char* user = getenv("USER"); + char* user = getenv("USER"); kBucketSuffix.append(user); const std::string bucketPrefix = "rockset."; @@ -240,8 +242,7 @@ TEST_F(CloudTest, clone_s3) { // Needed if using bucket prefix other than the default "rockset." cloud_fs_options.src_bucket.SetBucketName(kBucketSuffix, bucketPrefix); - cloud_fs_options.dest_bucket.SetBucketName(kBucketSuffix, bucketPrefix);*//* - std::string bucketName = "xx"; + cloud_fs_options.dest_bucket.SetBucketName(kBucketSuffix, bucketPrefix); cloud_fs_options.src_bucket.SetBucketName("database", "pika."); cloud_fs_options.dest_bucket.SetBucketName("database", "pika."); // Create a new AWS cloud env Status @@ -269,11 +270,11 @@ TEST_F(CloudTest, clone_s3) { std::string persistent_cache = ""; // Create and Open DB - DBCloud* db; + DBCloud* db = nullptr; s = DBCloud::Open(options, kDBPath, persistent_cache, 0, &db); if (!s.ok()) { - fprintf(stderr, "--------------xxx Unable to open db at path %s in bucket %s. %s\n", + fprintf(stderr, "Unable to open db at path %s in bucket %s. %s\n", kDBPath.c_str(), bucketName.c_str(), s.ToString().c_str()); return; } @@ -300,7 +301,7 @@ TEST_F(CloudTest, clone_s3) { s = CloneDB("clone1", kBucketSuffix, kDBPath, kBucketSuffix, kClonePath, cloud_fs_options, &clone_db, &clone_env); if (!s.ok()) { - fprintf(stderr, "-------yy----Unable to clone db at path %s in bucket %s. %s\n", + fprintf(stderr, "Unable to clone db at path %s in bucket %s. %s\n", kDBPath.c_str(), bucketName.c_str(), s.ToString().c_str()); return; } @@ -318,12 +319,14 @@ TEST_F(CloudTest, clone_s3) { //clone_db->Flush(FlushOptions()); clone_db.release(); - delete db; +DEFER { +delete db; +}; fprintf(stdout, "Successfully used db at %s and clone at %s in bucket %s.\n", kDBPath.c_str(), kClonePath.c_str(), bucketName.c_str()); -}*/ -/* +} + TEST_F(CloudTest, get_clone_s3) { // cloud environment config options here CloudFileSystemOptions cloud_fs_options; @@ -377,7 +380,7 @@ TEST_F(CloudTest, get_clone_s3) { std::string persistent_cache = ""; // Create and Open DB - DBCloud* db; + DBCloud* db = nullptr; s = DBCloud::Open(options, kDBPath, persistent_cache, 0, &db); if (!s.ok()) { fprintf(stderr, "Unable to open db at path %s in bucket %s. %s\n", @@ -450,7 +453,7 @@ TEST_F(CloudTest, delete_s3) { std::string persistent_cache = ""; // Create and Open DB - DBCloud* db; + DBCloud* db = nullptr; s = DBCloud::Open(options, kDBPath, persistent_cache, 0, &db); if (!s.ok()) { fprintf(stderr, "Unable to open db at path %s in bucket %s. %s\n", @@ -536,7 +539,7 @@ TEST_F(CloudTest, del_bucket_s3) { } Aws::ShutdownAPI(options); -}*/ +} int main(int argc, char** argv) { if (!pstd::FileExists("./log")) { From 7e64828e75bd1c7c60e24749bd7bf502bcd8a40d Mon Sep 17 00:00:00 2001 From: baixin Date: Tue, 14 May 2024 13:22:43 +0800 Subject: [PATCH 105/116] fix analyze manifest bug --- codis/go.mod | 6 ++++++ codis/go.sum | 23 +++++++++++++++++++++++ codis/pkg/topom/topom_api.go | 11 ++++++++--- codis/pkg/topom/topom_group.go | 15 +++++++++++---- 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/codis/go.mod b/codis/go.mod index e4af7493af..0dccd51976 100644 --- a/codis/go.mod +++ b/codis/go.mod @@ -8,6 +8,7 @@ replace google.golang.org/grpc => google.golang.org/grpc v1.29.0 require ( github.com/BurntSushi/toml v0.3.1 + github.com/aws/aws-sdk-go v1.30.12 github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815 github.com/emirpasic/gods v1.18.1 github.com/garyburd/redigo v1.6.4 @@ -18,6 +19,7 @@ require ( github.com/martini-contrib/render v0.0.0-20150707142108-ec18f8345a11 github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414 github.com/spinlock/jemalloc-go v0.0.0-20201010032256-e81523fb8524 + github.com/stretchr/testify v1.8.0 go.etcd.io/etcd/client/v2 v2.305.7 golang.org/x/net v0.17.0 gopkg.in/alexcesaro/statsd.v2 v2.0.0 @@ -26,10 +28,14 @@ require ( require ( github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0 // indirect github.com/coreos/go-semver v0.3.1 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect go.etcd.io/etcd/api/v3 v3.5.7 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.7 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/codis/go.sum b/codis/go.sum index f30f9e17be..50dedd373d 100644 --- a/codis/go.sum +++ b/codis/go.sum @@ -1,5 +1,7 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/aws/aws-sdk-go v1.30.12 h1:KrjyosZvkpJjcwMk0RNxMZewQ47v7+ZkbQDXjWsJMs8= +github.com/aws/aws-sdk-go v1.30.12/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0 h1:sDMmm+q/3+BukdIpxwO365v/Rbspp2Nt5XntgQRXq8Q= github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0/go.mod h1:4Zcjuz89kmFXt9morQgcfYZAYZ5n8WHjt81YYWIwtTM= github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4= @@ -15,10 +17,16 @@ github.com/garyburd/redigo v1.6.4 h1:LFu2R3+ZOPgSMWMOL+saa/zXRjw0ID2G8FepO53BGlg github.com/garyburd/redigo v1.6.4/go.mod h1:rTb6epsqigu3kYKBnaF028A7Tf/Aw5s0cqA47doKKqw= github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab h1:xveKWz2iaueeTaUgdetzel+U7exyigDYBryyVfV/rZk= github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8= +github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/influxdata/influxdb v1.11.0 h1:0X+ZsbcOWc6AEi5MHee9BYqXCKmz8IZsljrRYjmV8Qg= github.com/influxdata/influxdb v1.11.0/go.mod h1:V93tJcidY0Zh0LtSONZWnXXGDyt20dtVf+Ddp4EnhaA= +github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik= +github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/martini-contrib/binding v0.0.0-20160701174519-05d3e151b6cf h1:6YSkbjZVghliN7zwJC/U3QQG+OVXOrij3qQ8sxfPIMg= @@ -34,6 +42,7 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c h1:rp5dCmg/yLR3mgFuSOe4oEnDDmGLROTvMragMUXpTQw= github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c/go.mod h1:X07ZCGwUbLaax7L0S3Tw4hpejzu63ZrrQiUe6W0hcy0= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414 h1:AJNDS0kP60X8wwWFvbLPwDuojxubj9pbfK7pjHw0vKg= @@ -41,18 +50,32 @@ github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414/go.mod h1:gi+0 github.com/spinlock/jemalloc-go v0.0.0-20201010032256-e81523fb8524 h1:U+dpuWn15gFCqZkqhpUd5a85X1Oe1Tb+DeGF3nn6Bvs= github.com/spinlock/jemalloc-go v0.0.0-20201010032256-e81523fb8524/go.mod h1:A/ik9Cf2cSgEVcmTWlvTfCxyFgoL1UP/WbevsdDeguc= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= go.etcd.io/etcd/api/v3 v3.5.7 h1:sbcmosSVesNrWOJ58ZQFitHMdncusIifYcrBfwrlJSY= go.etcd.io/etcd/api/v3 v3.5.7/go.mod h1:9qew1gCdDDLu+VwmeG+iFpL+QlpHTo7iubavdVDgCAA= go.etcd.io/etcd/client/pkg/v3 v3.5.7 h1:y3kf5Gbp4e4q7egZdn5T7W9TSHUvkClN6u+Rq9mEOmg= go.etcd.io/etcd/client/pkg/v3 v3.5.7/go.mod h1:o0Abi1MK86iad3YrWhgUsbGx1pmTS+hrORWc2CamuhY= go.etcd.io/etcd/client/v2 v2.305.7 h1:AELPkjNR3/igjbO7CjyF1fPuVPjrblliiKj+Y6xSGOU= go.etcd.io/etcd/client/v2 v2.305.7/go.mod h1:GQGT5Z3TBuAQGvgPfhR7VPySu/SudxmEkRq9BgzFU6s= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= gopkg.in/alexcesaro/statsd.v2 v2.0.0 h1:FXkZSCZIH17vLCO5sO2UucTHsH9pc+17F6pl3JVCwMc= gopkg.in/alexcesaro/statsd.v2 v2.0.0/go.mod h1:i0ubccKGzBVNBpdGV5MocxyA/XlLUJzA7SLonnE4drU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/codis/pkg/topom/topom_api.go b/codis/pkg/topom/topom_api.go index 35fd1df445..585fe90e2f 100644 --- a/codis/pkg/topom/topom_api.go +++ b/codis/pkg/topom/topom_api.go @@ -4,6 +4,7 @@ package topom import ( + "encoding/base64" "encoding/json" "fmt" "io" @@ -12,12 +13,11 @@ import ( "strings" "time" - _ "net/http/pprof" - "github.com/go-martini/martini" "github.com/martini-contrib/binding" "github.com/martini-contrib/gzip" "github.com/martini-contrib/render" + _ "net/http/pprof" "pika/codis/v2/pkg/models" "pika/codis/v2/pkg/utils/errors" @@ -522,8 +522,13 @@ func (s *apiServer) UploadManifestToS3(req *http.Request) (int, string) { if err != nil { return rpc.ApiResponseError(err) } + + content, err := base64.StdEncoding.DecodeString(uploadReq.Content) + if err != nil { + return rpc.ApiResponseError(err) + } if err := s.topom.UploadManifestToS3(uploadReq.GroupId, uploadReq.TermId, uploadReq.S3Bucket, - uploadReq.S3Path, uploadReq.Content); err != nil { + uploadReq.S3Path, content); err != nil { return rpc.ApiResponseError(err) } else { return rpc.ApiResponseJson("OK") diff --git a/codis/pkg/topom/topom_group.go b/codis/pkg/topom/topom_group.go index 612547c491..9836af1e3c 100644 --- a/codis/pkg/topom/topom_group.go +++ b/codis/pkg/topom/topom_group.go @@ -4,6 +4,8 @@ package topom import ( + "bytes" + "encoding/binary" "encoding/json" "os" "time" @@ -794,7 +796,7 @@ func (s *Topom) newSyncActionExecutor(addr string) (func() error, error) { }, nil } -func (s *Topom) UploadManifestToS3(gid int, tid int, bucket string, filename string, content string) error { +func (s *Topom) UploadManifestToS3(gid int, tid int, bucket string, filename string, content []byte) error { ctx, err := s.newContext() if err != nil { return err @@ -824,14 +826,19 @@ func (s *Topom) UploadManifestToS3(gid int, tid int, bucket string, filename str DisableEndpointHostPrefix: aws.Bool(true), }) - file, err := os.Create("./tmp") + file, err := os.Create("./upload-manifest") if err != nil { return errors.Errorf("Create manifest file err :[%s]", err) } defer file.Close() - _, err = file.WriteString(content) + buf := new(bytes.Buffer) + err = binary.Write(buf, binary.LittleEndian, content) if err != nil { - return errors.Errorf("Write manifest err :[%s]", err) + return errors.Errorf("Write binary manifest err :[%s]", err) + } + _, err = file.Write(buf.Bytes()) + if err != nil { + return errors.Errorf("Write manifest file err :[%s]", err) } uploader := s3manager.NewUploader(sess) From ecf0a9780e865221170fa9ebacc6aaf62ae2cd9b Mon Sep 17 00:00:00 2001 From: baixin Date: Tue, 14 May 2024 13:26:39 +0800 Subject: [PATCH 106/116] fix compile on mac bug --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 63e751728e..88bc1cb090 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -902,8 +902,7 @@ target_link_libraries(${PROJECT_NAME} librediscache.a ${LIBUNWIND_LIBRARY} ${JEMALLOC_LIBRARY} - ssl - crypto) +) option(USE_SSL "Enable SSL support" OFF) add_custom_target( From 29ff12518ac75c9e388698bbfaa2df3577c748f7 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Thu, 16 May 2024 16:58:05 +0800 Subject: [PATCH 107/116] change num_shard_bits default value --- CMakeLists.txt | 4 ++-- src/pika_server.cc | 4 ++-- src/storage/src/redis.cc | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 88bc1cb090..cac4658146 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -590,9 +590,9 @@ ExternalProject_Add(rocksdb ${LIBJEMALLOC_NAME} URL #temporary for debug, skip download from github - http://10.224.129.40:8000/archive/rocksdb_cloud.tar.gz + http://10.224.129.40:8000/sst_cache_on_write/rocksdb_cloud.tar.gz URL_HASH - MD5=f97ae648ad0924310e50cefbb1376e82 + MD5=bc68c57bd6df21403550f704774163a5 DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND diff --git a/src/pika_server.cc b/src/pika_server.cc index 4dada1d0de..3a2cfc0b76 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1481,8 +1481,8 @@ void PikaServer::InitStorageOptions() { cloud_fs_opts.src_bucket.SetRegion(g_pika_conf->cloud_src_bucket_region()); cloud_fs_opts.dest_bucket.SetBucketName(g_pika_conf->cloud_dest_bucket_suffix(), g_pika_conf->cloud_dest_bucket_prefix()); cloud_fs_opts.dest_bucket.SetRegion(g_pika_conf->cloud_dest_bucket_region()); - cloud_fs_opts.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, - std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); + //cloud_fs_opts.upload_meta_func = std::bind(&PikaServer::UploadMetaToSentinel, this, + //std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); #endif } diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index c099a0107e..9c2faacce9 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -123,7 +123,7 @@ Status Redis::Open(const StorageOptions& tmp_storage_options, const std::string& storage_options.cloud_fs_options.resync_on_open = true; storage_options.cloud_fs_options.resync_manifest_on_open = true; storage_options.cloud_fs_options.skip_dbid_verification = true; - storage_options.cloud_fs_options.sst_file_cache = rocksdb::NewLRUCache(storage_options_.sst_cache_size_); + storage_options.cloud_fs_options.sst_file_cache = rocksdb::NewLRUCache(storage_options_.sst_cache_size_, 1/*num_shard_bits*/); storage_options.options.replication_log_listener = log_listener_; is_master_.store(tmp_storage_options.cloud_fs_options.is_master); if (!tmp_storage_options.cloud_fs_options.is_master) { From 1c02afe7383ae6e1a2ecfdfdb4d83a36583827bd Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Fri, 31 May 2024 15:42:40 +0800 Subject: [PATCH 108/116] add stall metrics --- src/storage/src/redis.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 514fed5ab4..87c14ffc51 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -15,6 +15,7 @@ #include "pstd/include/pstd_wal.h" #else #include "rocksdb/db.h" +#include "rocksdb/listener.h" #endif #include "rocksdb/slice.h" #include "rocksdb/status.h" @@ -34,6 +35,8 @@ #define SPOP_COMPACT_THRESHOLD_COUNT 500 #define SPOP_COMPACT_THRESHOLD_DURATION (1000 * 1000) // 1000ms +#define STRINGFYENUM(x) #x + namespace storage { using Status = rocksdb::Status; using Slice = rocksdb::Slice; @@ -508,5 +511,15 @@ class LogListener : public rocksdb::ReplicationLogListener { std::shared_ptr wal_writer_ = nullptr; }; +class RocksDBEventListener : public rocksdb::EventListener { + RocksDBEventListener() {} + ~RocksDBEventListener() {} + virtual void OnStallConditionsChanged(const WriteStallInfo& info) override { + LOG(INFO) << "column_family name: " << info.cf_name + << " change from stall condition: " << STRINGFYENUM(info.prev) + << " to stall condition: " << STRINGFYENUM(info.cur); + } +}; + } // namespace storage #endif // SRC_REDIS_H_ From fdc005d8c8baba0d48c210df560bac2c0f207ebc Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Fri, 31 May 2024 15:53:34 +0800 Subject: [PATCH 109/116] fix --- CMakeLists.txt | 7 ++++--- src/storage/CMakeLists.txt | 2 +- src/storage/src/redis.h | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cac4658146..3609f7bb51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -590,9 +590,10 @@ ExternalProject_Add(rocksdb ${LIBJEMALLOC_NAME} URL #temporary for debug, skip download from github - http://10.224.129.40:8000/sst_cache_on_write/rocksdb_cloud.tar.gz + #http://10.224.129.40:8000/sst_cache_on_write/rocksdb_cloud.tar.gz + /home/wangshaoyi/work/rocksdb_cloud.tar.gz URL_HASH - MD5=bc68c57bd6df21403550f704774163a5 + MD5=97d0f43bb293d4f6afb271ba3d2df467 DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND @@ -848,7 +849,7 @@ target_link_directories(${PROJECT_NAME} PUBLIC ${INSTALL_LIBDIR}) if (USE_S3) - find_package(AWSSDK REQUIRED COMPONENTS s3 transfer kinesis) + find_package(AWSSDK REQUIRED COMPONENTS s3-crt transfer kinesis) include_directories(${AWS_INCLUDE_DIR}) target_link_libraries(${PROJECT_NAME} ${AWSSDK_LINK_LIBRARIES}) endif() diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index e12cae9b7d..35db197c09 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -4,7 +4,7 @@ set (CMAKE_CXX_STANDARD 17) project (storage) # Other CMake modules -add_subdirectory(tests) +#add_subdirectory(tests) # add_subdirectory(examples) # add_subdirectory(benchmark) diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 87c14ffc51..2bce4a1f33 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -514,7 +514,7 @@ class LogListener : public rocksdb::ReplicationLogListener { class RocksDBEventListener : public rocksdb::EventListener { RocksDBEventListener() {} ~RocksDBEventListener() {} - virtual void OnStallConditionsChanged(const WriteStallInfo& info) override { + virtual void OnStallConditionsChanged(const rocksdb::WriteStallInfo& info) override { LOG(INFO) << "column_family name: " << info.cf_name << " change from stall condition: " << STRINGFYENUM(info.prev) << " to stall condition: " << STRINGFYENUM(info.cur); From e60d75b3dba934a3d0670fb9f356b4dcc1adcc57 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Fri, 31 May 2024 16:19:31 +0800 Subject: [PATCH 110/116] add benchmark_client metrics --- CMakeLists.txt | 18 +++ tools/benchmark_client/CMakeLists.txt | 4 + tools/benchmark_client/benchmark_client.cc | 142 +++++++++++++++++++-- 3 files changed, 150 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3609f7bb51..792728c3e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -577,6 +577,24 @@ set(PROTOBUF_LIBRARY ${INSTALL_LIBDIR}/${LIB_PROTOBUF}) set(PROTOBUF_PROTOC ${STAGED_INSTALL_PREFIX}/bin/protoc) set(USE_S3 1) +ExternalProject_Add(prometheus_cpp + URL + https://github.com/jupp0r/prometheus-cpp/releases/download/v1.2.4/prometheus-cpp-with-submodules.tar.gz + CMAKE_ARGS + -DBUILD_SHARED_LIBS=ON + -DENABLE_PUSH=OFF + -DENABLE_COMPRESSION=OFF + -DCMAKE_INSTALL_LIBDIR=${INSTALL_LIBDIR} + -DCMAKE_INSTALL_INCLUDEDIR=${INSTALL_INCLUDEDIR} + BUILD_ALWAYS + 1 + BUILD_COMMAND + make -j${CPU_CORE} +) + +set(PROMETHEUS_CPP_CORE_LIB ${INSTALL_LIBDIR}/libprometheus-cpp-core.so) +set(PROMETHEUS_CPP_PULL_LIB ${INSTALL_LIBDIR}/libprometheus-cpp-pull.so) + if (USE_S3) ExternalProject_Add(rocksdb DEPENDS diff --git a/tools/benchmark_client/CMakeLists.txt b/tools/benchmark_client/CMakeLists.txt index 8cdd683174..7640908d68 100644 --- a/tools/benchmark_client/CMakeLists.txt +++ b/tools/benchmark_client/CMakeLists.txt @@ -13,6 +13,10 @@ add_executable(benchmark_client ${BASE_OBJS}) target_include_directories(benchmark_client PRIVATE ${INSTALL_INCLUDEDIR} PRIVATE ${PROJECT_SOURCE_DIR} ${ROCKSDB_SOURCE_DIR} ${GLOG_INCLUDE_DIR}) target_link_libraries(benchmark_client pthread) +target_link_libraries(benchmark_client curl) +target_link_libraries(benchmark_client z) +target_link_libraries(benchmark_client ${PROMETHEUS_CPP_CORE_LIB}) +target_link_libraries(benchmark_client ${PROMETHEUS_CPP_PULL_LIB}) target_link_libraries(benchmark_client ${ROCKSDB_LIBRARY}) target_link_libraries(benchmark_client ${GLOG_LIBRARY}) target_link_libraries(benchmark_client ${SNAPPY_LIBRARY}) diff --git a/tools/benchmark_client/benchmark_client.cc b/tools/benchmark_client/benchmark_client.cc index 2dbd2bb34e..7a244f020c 100644 --- a/tools/benchmark_client/benchmark_client.cc +++ b/tools/benchmark_client/benchmark_client.cc @@ -7,6 +7,7 @@ #include #include #include +#include "unistd.h" #include #include #include @@ -16,10 +17,21 @@ #include "monitoring/histogram.h" #include "hiredis/hiredis.h" +#include "prometheus/client_metric.h" +#include "prometheus/histogram.h" +#include "prometheus/family.h" +#include "prometheus/exposer.h" +#include "prometheus/registry.h" + #include "pstd/include/pstd_status.h" #include "pstd/include/pstd_string.h" #include "pstd/include/env.h" +std::function Observer; +std::function Increment; + +using namespace prometheus; + DEFINE_string(command, "generate", "command to execute, eg: generate/get/set/zadd"); DEFINE_bool(pipeline, false, "whether to enable pipeline"); DEFINE_string(host, "127.0.0.1", "target server's host"); @@ -33,6 +45,7 @@ DEFINE_int32(thread_num, 10, "concurrent thread num"); DEFINE_string(dbs, "0", "dbs name, eg: 0,1,2"); DEFINE_int32(element_count, 1, "elements number in hash/list/set/zset"); DEFINE_bool(compare_value, false, "whether compare result or not"); +DEFINE_string(exporter_addr, "0.0.0.0:9999", "metrics exporter listen addr"); using std::default_random_engine; using pstd::Status; @@ -118,10 +131,10 @@ bool CompareValue(const std::string& expect, const std::string& actual) { } void PrepareKeys(int suffix, std::vector* keys) { - keys->resize(FLAGS_count); + keys->resize(FLAGS_count * FLAGS_element_count); std::string filename = "benchmark_keyfile_" + std::to_string(suffix); FILE* fp = fopen(filename.c_str(), "r"); - for (int idx = 0; idx < FLAGS_count; ++idx) { + for (int idx = 0; idx < FLAGS_count * FLAGS_element_count; ++idx) { char* key = new char[FLAGS_key_size + 2]; fgets(key, FLAGS_key_size + 2, fp); key[FLAGS_key_size] = '\0'; @@ -282,6 +295,61 @@ redisContext* Prepare(ThreadArg* arg) { return c; } +void FreeAndReconnect(redisContext*& c, ThreadArg* arg) { + LOG(INFO) << "request timeout, reconnect"; + redisFree(c); + c = nullptr; + while (!c) { + c = Prepare(arg); + } +} + +Status RunBatchGetCommand(redisContext*& c, ThreadArg* arg) { + std::vector keys; + PrepareKeys(arg->idx, &keys); + + for (int idx = 0; idx < FLAGS_count; ++idx) { + if (idx % 10000 == 0) { + LOG(INFO) << "finish " << idx << " mget"; + } + + std::vector get_argv(FLAGS_element_count + 1); + std::vector get_argvlen(FLAGS_element_count + 1); + get_argv[0] = "mget"; + get_argvlen[0] = 4; + for (int i = 0; i < FLAGS_element_count; ++i) { + get_argv[i + 1] = keys[idx * FLAGS_element_count+ i].c_str(); + get_argvlen[i + 1] = keys[idx * FLAGS_element_count+ i].size(); + } + + int retry_times = 0; + while (true) { + redisReply* res = nullptr; + uint64_t begin = pstd::NowMicros(); + res = reinterpret_cast( + redisCommandArgv(c, get_argv.size(), &(get_argv[0]), &(get_argvlen[0]))); + Increment(1); + Observer((pstd::NowMicros() - begin) / 1000.0); + + // nullptr res, reconnect + if (!res) { + FreeAndReconnect(c, arg); + continue; + } + + // success + if (res->type == REDIS_REPLY_ARRAY) { + freeReplyObject(res); + break; + } + + LOG(ERROR) << "mget failed"; + freeReplyObject(res); + } + } + return Status::OK(); +} + Status RunGetCommand(redisContext*& c, ThreadArg* arg) { redisReply* res = nullptr; std::vector keys; @@ -543,30 +611,39 @@ Status RunSetCommand(redisContext*& c, ThreadArg* arg) { std::vector keys; PrepareKeys(arg->idx, &keys); - for (int idx = 0; idx < FLAGS_count; ++idx) { + for (int idx = 0; idx < FLAGS_count * FLAGS_element_count; ++idx) { if (idx % 10000 == 0) { LOG(INFO) << "finish " << idx << " request"; } - const char* set_argv[3]; - size_t set_argvlen[3]; + const char* set_argv[4]; + size_t set_argvlen[4]; std::string value; std::string key = keys[idx]; GenerateValue(key, FLAGS_value_size, &value); + std::string expire_seconds = "86400"; - set_argv[0] = "set"; - set_argvlen[0] = 3; + set_argv[0] = "setex"; + set_argvlen[0] = 5; set_argv[1] = key.c_str(); set_argvlen[1] = key.size(); - set_argv[2] = value.c_str(); - set_argvlen[2] = value.size(); + set_argv[2] = expire_seconds.c_str(); + set_argvlen[2] = expire_seconds.size(); + set_argv[3] = value.c_str(); + set_argvlen[3] = value.size(); uint64_t begin = pstd::NowMicros(); res = reinterpret_cast( - redisCommandArgv(c, 3, reinterpret_cast(set_argv), + redisCommandArgv(c, 4, reinterpret_cast(set_argv), reinterpret_cast(set_argvlen))); - hist->Add(pstd::NowMicros() - begin); + uint64_t now = pstd::NowMicros(); + if (now - begin > 10 * 1000) { + LOG(ERROR) << "setex costs " << (now - begin) / 1000 << " ms"; + } + Observer((now - begin) / 1000.0); + hist->Add(now - begin); + Increment(1); if (!res) { LOG(INFO) << FLAGS_command << " timeout, key: " << key; @@ -578,7 +655,7 @@ Status RunSetCommand(redisContext*& c, ThreadArg* arg) { } } else if (res->type != REDIS_REPLY_STATUS) { LOG(INFO) << FLAGS_command << " invalid type: " << res->type - << " key: " << key; + << " key: " << key << " response str: " << res->str; arg->stat.error_cnt++; } else { arg->stat.success_cnt++; @@ -808,7 +885,9 @@ void* ThreadMain(void* arg) { } Status s; - if (FLAGS_command == "get") { + if (FLAGS_command == "mget") { + s = RunBatchGetCommand(c, ta); + } else if (FLAGS_command == "get") { s = RunGetCommand(c, ta); } else if (FLAGS_command == "set") { s = RunSetCommand(c, ta); @@ -844,6 +923,41 @@ int main(int argc, char* argv[]) { if (tables.empty()) { exit(-1); } + char host_name[255]; + if (gethostname(host_name, sizeof(host_name)) == -1) { + std::cout << "get hostname failed, exit"; + exit(1); + } + std::string bind_addr = FLAGS_exporter_addr; + Exposer exposer{bind_addr}; + auto registry = std::make_shared(); + exposer.RegisterCollectable(registry, "/metrics"); + + auto& counter_family = BuildCounter() + .Name("request_count") + .Help("How many is the api called") + .Labels({{"hostname", host_name}, {"command", FLAGS_command}}) + .Register(*registry); + + auto& api_counter = counter_family.Add( + {{"prometheus_test_counter", "test_counter"}, {"yet_another_label", "value"}}); + Increment = [&api_counter](double cost) { + api_counter.Increment(cost); + }; + + auto& histogram_family = BuildHistogram() + .Name("request_time") + .Help("analyze the time of request duraiton with histogram") + .Labels({{"hostname", host_name}, {"command", FLAGS_command}}) + .Register(*registry); + auto& task_histogram = histogram_family.Add({{"prometheus_test_histogram", "test_histogram"}, + {"yet_another_lable", "value"}}, Histogram::BucketBoundaries{1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 17, 20, 24, 29, 34, 40, 48, 57, 68, 81, 96, 114, 135, 160, 190, 226, 268, 318, + 378, 449, 533, 633, 752, 894, 1062, 1262, 1500, 1782, 2117, 2516, 2990, 3553, 4222, 5017, 5961, + 7083, 8416, 10000}); + Observer = [&task_histogram](double cost) { + task_histogram.Observe(cost); + }; FLAGS_logtostdout = true; FLAGS_minloglevel = 0; @@ -885,4 +999,4 @@ int main(int argc, char* argv[]) { std::cout << "Timeout Count: " << stat.timeout_cnt << " Error Count: " << stat.error_cnt << std::endl; std::cout << "stats: " << hist->ToString() << std::endl; return 0; -} +} \ No newline at end of file From bbfd9b2062fedf511300211d3c136cefbb6009d0 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Fri, 31 May 2024 17:09:52 +0800 Subject: [PATCH 111/116] add listener --- src/storage/src/redis.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 2bce4a1f33..054d08b479 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -519,6 +519,23 @@ class RocksDBEventListener : public rocksdb::EventListener { << " change from stall condition: " << STRINGFYENUM(info.prev) << " to stall condition: " << STRINGFYENUM(info.cur); } + void OnCompactionCompleted(rocksdb::DB* /*db*/, const rocksdb::CompactionJobInfo& info) override { + LOG(INFO) << " column_family name: " << info.cf_name + << " thread_id: " << info.thread_id + << " job_id: " << info.job_id + << " input level: " << info.base_input_level + << " output level: " << info.output_level + << " elapsed time: " << info.stats.elapsed_micros / 1000 << " ms" + << " total_input_bytes: " << (info.stats.total_input_bytes >> 20) << " MB"; + } + void OnFlushCompleted(rocksdb::DB* /*db*/, + const rocksdb::FlushJobInfo& info) override { + LOG(INFO) << " column_family name: " << info.cf_name + << " thread_id: " << info.thread_id + << " job_id: " << info.job_id + << " triggered_writes_slowdown: " << (info.triggered_writes_slowdown ? "true" : "false") + << " triggered_writes_stop: " << (info.triggered_writes_stop ? "true" : "false"); + } }; } // namespace storage From b0a8c645430310097c45f06153c4626197c57653 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Sun, 2 Jun 2024 17:58:15 +0800 Subject: [PATCH 112/116] add metrics for pikacloud --- CMakeLists.txt | 2 +- src/storage/src/redis.cc | 4 +++- src/storage/src/redis.h | 29 ++++++++++++++++++++++------- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 792728c3e1..f557ed9fd3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -611,7 +611,7 @@ ExternalProject_Add(rocksdb #http://10.224.129.40:8000/sst_cache_on_write/rocksdb_cloud.tar.gz /home/wangshaoyi/work/rocksdb_cloud.tar.gz URL_HASH - MD5=97d0f43bb293d4f6afb271ba3d2df467 + MD5=0a406cde16985e470d284fce045157cf DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 9c2faacce9..9e33d3e841 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -123,8 +123,9 @@ Status Redis::Open(const StorageOptions& tmp_storage_options, const std::string& storage_options.cloud_fs_options.resync_on_open = true; storage_options.cloud_fs_options.resync_manifest_on_open = true; storage_options.cloud_fs_options.skip_dbid_verification = true; - storage_options.cloud_fs_options.sst_file_cache = rocksdb::NewLRUCache(storage_options_.sst_cache_size_, 1/*num_shard_bits*/); + storage_options.cloud_fs_options.sst_file_cache = rocksdb::NewLRUCache(storage_options_.sst_cache_size_, 0/*num_shard_bits*/); storage_options.options.replication_log_listener = log_listener_; + is_master_.store(tmp_storage_options.cloud_fs_options.is_master); if (!tmp_storage_options.cloud_fs_options.is_master) { storage_options.options.disable_auto_flush = true; @@ -142,6 +143,7 @@ Status Redis::Open(const StorageOptions& tmp_storage_options, const std::string& rocksdb::Options db_ops(storage_options.options); db_ops.create_missing_column_families = true; + db_ops.listeners.emplace_back(new RocksDBEventListener(index_)); // db_ops.env = env_; // string column-family options diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index 054d08b479..cb98b4a04c 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -35,7 +35,16 @@ #define SPOP_COMPACT_THRESHOLD_COUNT 500 #define SPOP_COMPACT_THRESHOLD_DURATION (1000 * 1000) // 1000ms -#define STRINGFYENUM(x) #x +static inline std::string StallEnumToString(rocksdb::WriteStallCondition cond) { + switch (cond) { + case rocksdb::WriteStallCondition::kDelayed: + return "delayed"; + case rocksdb::WriteStallCondition::kStopped: + return "stopped"; + case rocksdb::WriteStallCondition::kNormal: + return "normal"; + } +} namespace storage { using Status = rocksdb::Status; @@ -512,15 +521,18 @@ class LogListener : public rocksdb::ReplicationLogListener { }; class RocksDBEventListener : public rocksdb::EventListener { - RocksDBEventListener() {} +public: + RocksDBEventListener(int index) : index_(index) {} ~RocksDBEventListener() {} virtual void OnStallConditionsChanged(const rocksdb::WriteStallInfo& info) override { - LOG(INFO) << "column_family name: " << info.cf_name - << " change from stall condition: " << STRINGFYENUM(info.prev) - << " to stall condition: " << STRINGFYENUM(info.cur); + LOG(INFO) << "rocksdb id: " << index_ + << "column_family name: " << info.cf_name + << " change from stall condition: " << StallEnumToString(info.condition.prev) + << " to stall condition: " << StallEnumToString(info.condition.cur); } void OnCompactionCompleted(rocksdb::DB* /*db*/, const rocksdb::CompactionJobInfo& info) override { - LOG(INFO) << " column_family name: " << info.cf_name + LOG(INFO) << "rocksdb id: " << index_ + << " column_family name: " << info.cf_name << " thread_id: " << info.thread_id << " job_id: " << info.job_id << " input level: " << info.base_input_level @@ -530,12 +542,15 @@ class RocksDBEventListener : public rocksdb::EventListener { } void OnFlushCompleted(rocksdb::DB* /*db*/, const rocksdb::FlushJobInfo& info) override { - LOG(INFO) << " column_family name: " << info.cf_name + LOG(INFO) << "rocksdb id: " << index_ + << " column_family name: " << info.cf_name << " thread_id: " << info.thread_id << " job_id: " << info.job_id << " triggered_writes_slowdown: " << (info.triggered_writes_slowdown ? "true" : "false") << " triggered_writes_stop: " << (info.triggered_writes_stop ? "true" : "false"); } +private: + int index_ = 0; }; } // namespace storage From f9f430d7f3a8818736b4b4dc082ac7f5094f122c Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 4 Jun 2024 14:04:50 +0800 Subject: [PATCH 113/116] specify level0 compaction parameter --- CMakeLists.txt | 2 +- src/pika_server.cc | 3 +++ src/storage/src/redis.h | 6 +++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f557ed9fd3..62244529b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -611,7 +611,7 @@ ExternalProject_Add(rocksdb #http://10.224.129.40:8000/sst_cache_on_write/rocksdb_cloud.tar.gz /home/wangshaoyi/work/rocksdb_cloud.tar.gz URL_HASH - MD5=0a406cde16985e470d284fce045157cf + MD5=bca45663712bf56d6d713074b3663ee1 DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND diff --git a/src/pika_server.cc b/src/pika_server.cc index 3a2cfc0b76..83a452bda6 100644 --- a/src/pika_server.cc +++ b/src/pika_server.cc @@ -1401,6 +1401,9 @@ void PikaServer::InitStorageOptions() { storage_options_.options.max_background_jobs = g_pika_conf->max_background_jobs(); storage_options_.options.max_open_files = g_pika_conf->max_cache_files(); storage_options_.options.max_bytes_for_level_multiplier = g_pika_conf->max_bytes_for_level_multiplier(); + storage_options_.options.level0_file_num_compaction_trigger = 2; + storage_options_.options.level0_slowdown_writes_trigger = 8; + storage_options_.options.level0_stop_writes_trigger = 16; storage_options_.options.optimize_filters_for_hits = g_pika_conf->optimize_filters_for_hits(); storage_options_.options.level_compaction_dynamic_level_bytes = g_pika_conf->level_compaction_dynamic_level_bytes(); diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index cb98b4a04c..c293f57939 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -525,13 +525,13 @@ class RocksDBEventListener : public rocksdb::EventListener { RocksDBEventListener(int index) : index_(index) {} ~RocksDBEventListener() {} virtual void OnStallConditionsChanged(const rocksdb::WriteStallInfo& info) override { - LOG(INFO) << "rocksdb id: " << index_ + LOG(INFO) << "stall condition changed, rocksdb id: " << index_ << "column_family name: " << info.cf_name << " change from stall condition: " << StallEnumToString(info.condition.prev) << " to stall condition: " << StallEnumToString(info.condition.cur); } void OnCompactionCompleted(rocksdb::DB* /*db*/, const rocksdb::CompactionJobInfo& info) override { - LOG(INFO) << "rocksdb id: " << index_ + LOG(INFO) << "compaction completed, rocksdb id: " << index_ << " column_family name: " << info.cf_name << " thread_id: " << info.thread_id << " job_id: " << info.job_id @@ -542,7 +542,7 @@ class RocksDBEventListener : public rocksdb::EventListener { } void OnFlushCompleted(rocksdb::DB* /*db*/, const rocksdb::FlushJobInfo& info) override { - LOG(INFO) << "rocksdb id: " << index_ + LOG(INFO) << "flush completed, rocksdb id: " << index_ << " column_family name: " << info.cf_name << " thread_id: " << info.thread_id << " job_id: " << info.job_id From 55bef511078c9687b1d8da0554422aede590a917 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Sun, 9 Jun 2024 10:45:36 +0800 Subject: [PATCH 114/116] wait pending objects in writing binlog --- CMakeLists.txt | 6 +++--- src/storage/src/redis.cc | 3 +++ src/storage/src/redis.h | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 62244529b8..3e8236dbd8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -608,10 +608,10 @@ ExternalProject_Add(rocksdb ${LIBJEMALLOC_NAME} URL #temporary for debug, skip download from github - #http://10.224.129.40:8000/sst_cache_on_write/rocksdb_cloud.tar.gz - /home/wangshaoyi/work/rocksdb_cloud.tar.gz + http://10.224.129.40:8000/async_upload/rocksdb_cloud.tar.gz + #/home/wangshaoyi/work/rocksdb_cloud.tar.gz URL_HASH - MD5=bca45663712bf56d6d713074b3663ee1 + MD5=31c2188019b0d9ebc11d4df42ce885f2 DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc index 9e33d3e841..bf64ef9aff 100644 --- a/src/storage/src/redis.cc +++ b/src/storage/src/redis.cc @@ -766,6 +766,9 @@ std::string LogListener::OnReplicationLogRecord(rocksdb::ReplicationLogRecord re if (!redis_inst->IsMaster()) { return "0"; } + if (record.type != rocksdb::ReplicationLogRecord::kMemtableWrite) { + redis_inst->cfs_->WaitPendingObjects(); + } auto s = wal_writer_->Put(record.contents, db_id, redis_inst->GetIndex(), record.type); diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h index c293f57939..43f2ec6a45 100644 --- a/src/storage/src/redis.h +++ b/src/storage/src/redis.h @@ -53,6 +53,7 @@ using Slice = rocksdb::Slice; class LogListener; class Redis { public: + friend class LogListener; Redis(Storage* storage, int32_t index, std::shared_ptr wal_writer = nullptr); virtual ~Redis(); From 249e2bc2b5f6ea52ecdb80b4d5bd342cec5f9284 Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Tue, 11 Jun 2024 19:25:57 +0800 Subject: [PATCH 115/116] enable metrics statistic for get command --- tools/benchmark_client/benchmark_client.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/benchmark_client/benchmark_client.cc b/tools/benchmark_client/benchmark_client.cc index 7a244f020c..6c27f0e126 100644 --- a/tools/benchmark_client/benchmark_client.cc +++ b/tools/benchmark_client/benchmark_client.cc @@ -373,7 +373,10 @@ Status RunGetCommand(redisContext*& c, ThreadArg* arg) { res = reinterpret_cast( redisCommandArgv(c, 2, reinterpret_cast(argv), reinterpret_cast(argvlen))); - hist->Add(pstd::NowMicros() - begin); + uint64_t now = pstd::NowMicros(); + Observer((now - begin) / 1000.0); + hist->Add(now - begin); + Increment(1); if (!res) { LOG(INFO) << FLAGS_command << " timeout, key: " << key; From f4cdda6674d039c01340e2bd812c59195f0c638f Mon Sep 17 00:00:00 2001 From: wangshaoyi Date: Fri, 14 Jun 2024 14:02:30 +0800 Subject: [PATCH 116/116] remove unused code --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3e8236dbd8..69db0069a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -609,7 +609,6 @@ ExternalProject_Add(rocksdb URL #temporary for debug, skip download from github http://10.224.129.40:8000/async_upload/rocksdb_cloud.tar.gz - #/home/wangshaoyi/work/rocksdb_cloud.tar.gz URL_HASH MD5=31c2188019b0d9ebc11d4df42ce885f2 DOWNLOAD_NO_PROGRESS