Skip to content
This repository has been archived by the owner on Jan 3, 2024. It is now read-only.

Retry Util -> Busy Retry Util #226

Merged
merged 2 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/rgw/driver/sfs/sqlite/errors.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,16 @@ bool critical_error(int ec) {
}
}

bool busy_error(int ec) {
switch (ec) {
case SQLITE_BUSY:
case SQLITE_BUSY_RECOVERY:
case SQLITE_BUSY_SNAPSHOT:
case SQLITE_LOCKED:
return true;
default:
return false;
}
}

} // namespace rgw::sal::sfs::sqlite
3 changes: 2 additions & 1 deletion src/rgw/driver/sfs/sqlite/errors.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@
namespace rgw::sal::sfs::sqlite {

bool critical_error(int ec);
bool busy_error(int ec);

}
} // namespace rgw::sal::sfs::sqlite
20 changes: 13 additions & 7 deletions src/rgw/driver/sfs/sqlite/retry.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace rgw::sal::sfs::sqlite {
// after retrying. Catches all non-critical exceptions and makes them
// available via failed_error(). Critical exceptions are passed on.
template <typename Return>
class RetrySQLite {
class RetrySQLiteBusy {
public:
using Func = std::function<Return(void)>;

Expand All @@ -42,11 +42,11 @@ class RetrySQLite {
std::error_code m_failed_error{};

public:
RetrySQLite(Func&& fn) : m_fn(std::forward<Func>(fn)) {}
RetrySQLite(RetrySQLite&&) = delete;
RetrySQLite(const RetrySQLite&) = delete;
RetrySQLite& operator=(const RetrySQLite&) = delete;
RetrySQLite& operator=(RetrySQLite&&) = delete;
RetrySQLiteBusy(Func&& fn) : m_fn(std::forward<Func>(fn)) {}
RetrySQLiteBusy(RetrySQLiteBusy&&) = delete;
RetrySQLiteBusy(const RetrySQLiteBusy&) = delete;
RetrySQLiteBusy& operator=(const RetrySQLiteBusy&) = delete;
RetrySQLiteBusy& operator=(RetrySQLiteBusy&&) = delete;

/// run runs fn with up to m_max_retries retries. It may throw
/// critical-exceptions. Non-critical errors are made available via
Expand All @@ -66,7 +66,13 @@ class RetrySQLite {
} catch (const std::system_error& ex) {
m_failed_error = ex.code();
if (critical_error(ex.code().value())) {
// Rethrow, expect a higher layer to shut us down
ceph_abort_msgf(
"Critical SQLite error %d. Shutting down.", ex.code().value()
);
}
if (!busy_error(ex.code().value())) {
// Rethrow, expect a higher layer to handle (e.g constraint
// violations), reply internal server error or shut us down
throw ex;
}
std::this_thread::sleep_for(10ms * retry);
Expand Down
2 changes: 1 addition & 1 deletion src/rgw/driver/sfs/sqlite/sqlite_buckets.cc
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ std::optional<DBDeletedObjectItems> SQLiteBuckets::delete_bucket_transact(
const std::string& bucket_id, uint max_objects, bool& bucket_deleted
) const {
auto storage = conn->get_storage();
RetrySQLite<DBDeletedObjectItems> retry([&]() {
RetrySQLiteBusy<DBDeletedObjectItems> retry([&]() {
bucket_deleted = false;
DBDeletedObjectItems ret_values;
storage.begin_transaction();
Expand Down
6 changes: 3 additions & 3 deletions src/rgw/driver/sfs/sqlite/sqlite_multipart.cc
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ std::optional<DBMultipartPart> SQLiteMultipart::create_or_reset_part(
) const {
auto storage = conn->get_storage();

RetrySQLite<std::optional<DBMultipartPart>> retry([&]() {
RetrySQLiteBusy<std::optional<DBMultipartPart>> retry([&]() {
auto transaction = storage.transaction_guard();
std::optional<DBMultipartPart> entry = std::nullopt;
auto cnt = storage.count<DBMultipart>(where(
Expand Down Expand Up @@ -466,7 +466,7 @@ SQLiteMultipart::remove_multiparts_by_bucket_id_transact(
) const {
DBDeletedMultipartItems ret_parts;
auto storage = conn->get_storage();
RetrySQLite<DBDeletedMultipartItems> retry([&]() {
RetrySQLiteBusy<DBDeletedMultipartItems> retry([&]() {
auto transaction = storage.transaction_guard();
// get first the list of parts to be deleted up to max_items
ret_parts = storage.select(
Expand Down Expand Up @@ -532,7 +532,7 @@ SQLiteMultipart::remove_done_or_aborted_multiparts_transact(uint max_items
) const {
DBDeletedMultipartItems ret_parts;
auto storage = conn->get_storage();
RetrySQLite<DBDeletedMultipartItems> retry([&]() {
RetrySQLiteBusy<DBDeletedMultipartItems> retry([&]() {
auto transaction = storage.transaction_guard();
// get first the list of parts to be deleted up to max_items
ret_parts = storage.select(
Expand Down
6 changes: 3 additions & 3 deletions src/rgw/driver/sfs/sqlite/sqlite_versioned_objects.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ bool SQLiteVersionedObjects::
const DBVersionedObject& object, std::vector<ObjectState> allowed_states
) const {
auto storage = conn->get_storage();
RetrySQLite<bool> retry([&]() {
RetrySQLiteBusy<bool> retry([&]() {
auto transaction = storage.transaction_guard();
storage.update_all(
set(c(&DBVersionedObject::object_id) = object.object_id,
Expand Down Expand Up @@ -449,7 +449,7 @@ SQLiteVersionedObjects::create_new_versioned_object_transact(
const std::string& version_id
) const {
auto storage = conn->get_storage();
RetrySQLite<DBVersionedObject> retry([&]() {
RetrySQLiteBusy<DBVersionedObject> retry([&]() {
auto transaction = storage.transaction_guard();
auto objs = storage.select(
columns(&DBObject::uuid),
Expand Down Expand Up @@ -491,7 +491,7 @@ SQLiteVersionedObjects::remove_deleted_versions_transact(uint max_objects
) const {
DBDeletedObjectItems ret_objs;
auto storage = conn->get_storage();
RetrySQLite<DBDeletedObjectItems> retry([&]() {
RetrySQLiteBusy<DBDeletedObjectItems> retry([&]() {
auto transaction = storage.transaction_guard();
// get first the list of objects to be deleted up to max_objects
// order by size so when we delete the versions data we are more efficient
Expand Down
19 changes: 10 additions & 9 deletions src/test/rgw/sfs/test_rgw_sfs_retry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "common/ceph_context.h"
#include "driver/sfs/sqlite/retry.h"
#include "driver/sfs/sqlite/sqlite_orm.h"
#include "gtest/gtest.h"
#include "rgw/rgw_sal_sfs.h"
#include "rgw_common.h"

Expand All @@ -25,10 +26,12 @@ class TestSFSRetrySQLite : public ::testing::Test {
void TearDown() override {}
};

class TestSFSRetrySQLiteDeathTest : public TestSFSRetrySQLite {};

TEST_F(TestSFSRetrySQLite, retry_non_crit_till_failure) {
auto exception =
std::system_error{SQLITE_BUSY, sqlite_orm::get_sqlite_error_category()};
RetrySQLite<int> uut([&]() {
RetrySQLiteBusy<int> uut([&]() {
throw exception;
return 0;
});
Expand All @@ -38,21 +41,19 @@ TEST_F(TestSFSRetrySQLite, retry_non_crit_till_failure) {
EXPECT_GT(uut.retries(), 0);
}

TEST_F(TestSFSRetrySQLite, crit_throws_without_retry) {
TEST_F(TestSFSRetrySQLiteDeathTest, crit_aborts) {
GTEST_FLAG_SET(death_test_style, "threadsafe");
auto exception = std::system_error{
SQLITE_CORRUPT, sqlite_orm::get_sqlite_error_category()};
RetrySQLite<int> uut([&]() {
RetrySQLiteBusy<int> uut([&]() {
throw exception;
return 0;
});
EXPECT_THROW(uut.run(), decltype(exception));
EXPECT_FALSE(uut.successful());
EXPECT_EQ(uut.failed_error(), exception.code());
EXPECT_EQ(uut.retries(), 0);
ASSERT_DEATH(uut.run(), "Critical SQLite error");
}

TEST_F(TestSFSRetrySQLite, simple_return_succeeds_immediately) {
RetrySQLite<int> uut([&]() { return 42; });
RetrySQLiteBusy<int> uut([&]() { return 42; });
EXPECT_EQ(uut.run(), 42);
EXPECT_TRUE(uut.successful());
EXPECT_EQ(uut.retries(), 0);
Expand All @@ -62,7 +63,7 @@ TEST_F(TestSFSRetrySQLite, retry_second_time_success) {
auto exception =
std::system_error{SQLITE_BUSY, sqlite_orm::get_sqlite_error_category()};
bool first = true;
RetrySQLite<int> uut([&]() {
RetrySQLiteBusy<int> uut([&]() {
if (first) {
first = false;
throw exception;
Expand Down