Skip to content
This repository has been archived by the owner on Jan 3, 2024. It is now read-only.

Commit

Permalink
rgw/sfs: Change retry utility into busy retry util
Browse files Browse the repository at this point in the history
Rename to RetrySQLiteBusy. Change behavior to:
 - rethrow non-busy errors immediately. We no longer retry errors that
    we should not like constraint violations
 - abort on critical errors (e.g db corruption)

Signed-off-by: Marcel Lauhoff <[email protected]>
  • Loading branch information
Marcel Lauhoff committed Oct 13, 2023
1 parent 6bf9ed3 commit 4622f24
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 23 deletions.
20 changes: 13 additions & 7 deletions src/rgw/driver/sfs/sqlite/retry.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace rgw::sal::sfs::sqlite {
// after retrying. Catches all non-critical exceptions and makes them
// available via failed_error(). Critical exceptions are passed on.
template <typename Return>
class RetrySQLite {
class RetrySQLiteBusy {
public:
using Func = std::function<Return(void)>;

Expand All @@ -42,11 +42,11 @@ class RetrySQLite {
std::error_code m_failed_error{};

public:
RetrySQLite(Func&& fn) : m_fn(std::forward<Func>(fn)) {}
RetrySQLite(RetrySQLite&&) = delete;
RetrySQLite(const RetrySQLite&) = delete;
RetrySQLite& operator=(const RetrySQLite&) = delete;
RetrySQLite& operator=(RetrySQLite&&) = delete;
RetrySQLiteBusy(Func&& fn) : m_fn(std::forward<Func>(fn)) {}
RetrySQLiteBusy(RetrySQLiteBusy&&) = delete;
RetrySQLiteBusy(const RetrySQLiteBusy&) = delete;
RetrySQLiteBusy& operator=(const RetrySQLiteBusy&) = delete;
RetrySQLiteBusy& operator=(RetrySQLiteBusy&&) = delete;

/// run runs fn with up to m_max_retries retries. It may throw
/// critical-exceptions. Non-critical errors are made available via
Expand All @@ -66,7 +66,13 @@ class RetrySQLite {
} catch (const std::system_error& ex) {
m_failed_error = ex.code();
if (critical_error(ex.code().value())) {
// Rethrow, expect a higher layer to shut us down
ceph_abort_msgf(
"Critical SQLite error %d. Shutting down.", ex.code().value()
);
}
if (!busy_error(ex.code().value())) {
// Rethrow, expect a higher layer to handle (e.g constraint
// violations), reply internal server error or shut us down
throw ex;
}
std::this_thread::sleep_for(10ms * retry);
Expand Down
2 changes: 1 addition & 1 deletion src/rgw/driver/sfs/sqlite/sqlite_buckets.cc
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ std::optional<DBDeletedObjectItems> SQLiteBuckets::delete_bucket_transact(
const std::string& bucket_id, uint max_objects, bool& bucket_deleted
) const {
auto storage = conn->get_storage();
RetrySQLite<DBDeletedObjectItems> retry([&]() {
RetrySQLiteBusy<DBDeletedObjectItems> retry([&]() {
bucket_deleted = false;
DBDeletedObjectItems ret_values;
storage.begin_transaction();
Expand Down
6 changes: 3 additions & 3 deletions src/rgw/driver/sfs/sqlite/sqlite_multipart.cc
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ std::optional<DBMultipartPart> SQLiteMultipart::create_or_reset_part(
) const {
auto storage = conn->get_storage();

RetrySQLite<std::optional<DBMultipartPart>> retry([&]() {
RetrySQLiteBusy<std::optional<DBMultipartPart>> retry([&]() {
auto transaction = storage.transaction_guard();
std::optional<DBMultipartPart> entry = std::nullopt;
auto cnt = storage.count<DBMultipart>(where(
Expand Down Expand Up @@ -466,7 +466,7 @@ SQLiteMultipart::remove_multiparts_by_bucket_id_transact(
) const {
DBDeletedMultipartItems ret_parts;
auto storage = conn->get_storage();
RetrySQLite<DBDeletedMultipartItems> retry([&]() {
RetrySQLiteBusy<DBDeletedMultipartItems> retry([&]() {
auto transaction = storage.transaction_guard();
// get first the list of parts to be deleted up to max_items
ret_parts = storage.select(
Expand Down Expand Up @@ -532,7 +532,7 @@ SQLiteMultipart::remove_done_or_aborted_multiparts_transact(uint max_items
) const {
DBDeletedMultipartItems ret_parts;
auto storage = conn->get_storage();
RetrySQLite<DBDeletedMultipartItems> retry([&]() {
RetrySQLiteBusy<DBDeletedMultipartItems> retry([&]() {
auto transaction = storage.transaction_guard();
// get first the list of parts to be deleted up to max_items
ret_parts = storage.select(
Expand Down
6 changes: 3 additions & 3 deletions src/rgw/driver/sfs/sqlite/sqlite_versioned_objects.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ bool SQLiteVersionedObjects::
const DBVersionedObject& object, std::vector<ObjectState> allowed_states
) const {
auto storage = conn->get_storage();
RetrySQLite<bool> retry([&]() {
RetrySQLiteBusy<bool> retry([&]() {
auto transaction = storage.transaction_guard();
storage.update_all(
set(c(&DBVersionedObject::object_id) = object.object_id,
Expand Down Expand Up @@ -449,7 +449,7 @@ SQLiteVersionedObjects::create_new_versioned_object_transact(
const std::string& version_id
) const {
auto storage = conn->get_storage();
RetrySQLite<DBVersionedObject> retry([&]() {
RetrySQLiteBusy<DBVersionedObject> retry([&]() {
auto transaction = storage.transaction_guard();
auto objs = storage.select(
columns(&DBObject::uuid),
Expand Down Expand Up @@ -491,7 +491,7 @@ SQLiteVersionedObjects::remove_deleted_versions_transact(uint max_objects
) const {
DBDeletedObjectItems ret_objs;
auto storage = conn->get_storage();
RetrySQLite<DBDeletedObjectItems> retry([&]() {
RetrySQLiteBusy<DBDeletedObjectItems> retry([&]() {
auto transaction = storage.transaction_guard();
// get first the list of objects to be deleted up to max_objects
// order by size so when we delete the versions data we are more efficient
Expand Down
19 changes: 10 additions & 9 deletions src/test/rgw/sfs/test_rgw_sfs_retry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "common/ceph_context.h"
#include "driver/sfs/sqlite/retry.h"
#include "driver/sfs/sqlite/sqlite_orm.h"
#include "gtest/gtest.h"
#include "rgw/rgw_sal_sfs.h"
#include "rgw_common.h"

Expand All @@ -25,10 +26,12 @@ class TestSFSRetrySQLite : public ::testing::Test {
void TearDown() override {}
};

class TestSFSRetrySQLiteDeathTest : public TestSFSRetrySQLite {};

TEST_F(TestSFSRetrySQLite, retry_non_crit_till_failure) {
auto exception =
std::system_error{SQLITE_BUSY, sqlite_orm::get_sqlite_error_category()};
RetrySQLite<int> uut([&]() {
RetrySQLiteBusy<int> uut([&]() {
throw exception;
return 0;
});
Expand All @@ -38,21 +41,19 @@ TEST_F(TestSFSRetrySQLite, retry_non_crit_till_failure) {
EXPECT_GT(uut.retries(), 0);
}

TEST_F(TestSFSRetrySQLite, crit_throws_without_retry) {
TEST_F(TestSFSRetrySQLiteDeathTest, crit_aborts) {
GTEST_FLAG_SET(death_test_style, "threadsafe");
auto exception = std::system_error{
SQLITE_CORRUPT, sqlite_orm::get_sqlite_error_category()};
RetrySQLite<int> uut([&]() {
RetrySQLiteBusy<int> uut([&]() {
throw exception;
return 0;
});
EXPECT_THROW(uut.run(), decltype(exception));
EXPECT_FALSE(uut.successful());
EXPECT_EQ(uut.failed_error(), exception.code());
EXPECT_EQ(uut.retries(), 0);
ASSERT_DEATH(uut.run(), "Critical SQLite error");
}

TEST_F(TestSFSRetrySQLite, simple_return_succeeds_immediately) {
RetrySQLite<int> uut([&]() { return 42; });
RetrySQLiteBusy<int> uut([&]() { return 42; });
EXPECT_EQ(uut.run(), 42);
EXPECT_TRUE(uut.successful());
EXPECT_EQ(uut.retries(), 0);
Expand All @@ -62,7 +63,7 @@ TEST_F(TestSFSRetrySQLite, retry_second_time_success) {
auto exception =
std::system_error{SQLITE_BUSY, sqlite_orm::get_sqlite_error_category()};
bool first = true;
RetrySQLite<int> uut([&]() {
RetrySQLiteBusy<int> uut([&]() {
if (first) {
first = false;
throw exception;
Expand Down

0 comments on commit 4622f24

Please sign in to comment.