Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding notion of a recovery owner for network recovery #6705

Open
wants to merge 39 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
07b152f
Update
gaurav137 Dec 11, 2024
7cd6011
Update
gaurav137 Dec 11, 2024
580670c
Update
gaurav137 Dec 11, 2024
4df08bd
Update
gaurav137 Dec 11, 2024
491aed2
Update
gaurav137 Dec 12, 2024
41e16fd
Update
gaurav137 Dec 12, 2024
90cbdfb
Update
gaurav137 Dec 12, 2024
1c1bda9
Update
gaurav137 Dec 12, 2024
2f7ff7f
Update
gaurav137 Dec 13, 2024
7d0fe02
working
gaurav137 Dec 13, 2024
607b8a4
Update
gaurav137 Dec 13, 2024
986138a
Update
gaurav137 Dec 13, 2024
0ae745c
Update
gaurav137 Dec 13, 2024
33b736b
Update
gaurav137 Dec 13, 2024
96838b7
Update
gaurav137 Dec 13, 2024
7104982
Update
gaurav137 Dec 13, 2024
7639bf9
Update
gaurav137 Dec 13, 2024
8bd2d95
Update
gaurav137 Dec 13, 2024
e5021f1
Update
gaurav137 Dec 13, 2024
0dcc258
Test failure fixes
gaurav137 Dec 13, 2024
e0bc7a7
schema test fix
gaurav137 Dec 13, 2024
b28b452
Update samples/constitutions/default/actions.js
gaurav137 Dec 13, 2024
8dc3f06
picking main
gaurav137 Dec 13, 2024
01df49a
Update
gaurav137 Dec 13, 2024
7546e41
Update
gaurav137 Dec 15, 2024
754adc3
formatting fixes
gaurav137 Dec 16, 2024
123387b
Update
gaurav137 Dec 16, 2024
1228c1d
Update
gaurav137 Dec 18, 2024
12d965d
Merge branch 'main' into dev/gsinha/recovery-owner
achamayou Jan 13, 2025
32aa899
Compilation fixes post picking latest changes from main
gaurav137 Jan 14, 2025
8c931b4
Merge branch 'main' into dev/gsinha/recovery-owner
gaurav137 Jan 14, 2025
ff6ba59
Refactoring LedgerSecretWrappingKey
gaurav137 Jan 17, 2025
476636f
Merge branch 'dev/gsinha/recovery-owner' of https://github.com/gaurav…
gaurav137 Jan 17, 2025
a2f79a9
Merge branch 'main' into dev/gsinha/recovery-owner
gaurav137 Jan 17, 2025
cab53eb
clang-format fixes
gaurav137 Jan 17, 2025
d5ffb66
Renaming methods to remove ambiguity
gaurav137 Jan 20, 2025
5f56142
Merge branch 'main' into dev/gsinha/recovery-owner
gaurav137 Jan 22, 2025
02e7df9
Taking comments
gaurav137 Jan 22, 2025
8416064
Merge branch 'dev/gsinha/recovery-owner' of https://github.com/gaurav…
gaurav137 Jan 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .prettierrc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
trailingComma: "all"
gaurav137 marked this conversation as resolved.
Show resolved Hide resolved
5 changes: 5 additions & 0 deletions doc/host_config_schema/cchost_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,11 @@
"data_json_file": {
"type": ["string", "null"],
"description": "Path to member data file (JSON)"
},
"recovery_owner": {
"type": "boolean",
"default": false,
"description": "Whether the member acts as a recovery owner and gets assigned the full recovery share"
}
},
"required": ["certificate_file"],
Expand Down
26 changes: 20 additions & 6 deletions include/ccf/service/tables/members.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,26 +36,34 @@ namespace ccf
std::optional<ccf::crypto::Pem> encryption_pub_key = std::nullopt;
nlohmann::json member_data = nullptr;

/** If set then the member is to receive a full share ("super-share")
allowing it to single-handedly recover the network without
requiring any other recovery member to submit their shares. */
std::optional<bool> recovery_owner = std::nullopt;

NewMember() {}

NewMember(
const ccf::crypto::Pem& cert_,
const std::optional<ccf::crypto::Pem>& encryption_pub_key_ = std::nullopt,
const nlohmann::json& member_data_ = nullptr) :
const nlohmann::json& member_data_ = nullptr,
const std::optional<bool>& recovery_owner_ = std::nullopt) :
cert(cert_),
encryption_pub_key(encryption_pub_key_),
member_data(member_data_)
member_data(member_data_),
recovery_owner(recovery_owner_)
{}

bool operator==(const NewMember& rhs) const
{
return cert == rhs.cert && encryption_pub_key == rhs.encryption_pub_key &&
member_data == rhs.member_data;
member_data == rhs.member_data && recovery_owner == rhs.recovery_owner;
}
};
DECLARE_JSON_TYPE_WITH_OPTIONAL_FIELDS(NewMember)
DECLARE_JSON_REQUIRED_FIELDS(NewMember, cert)
DECLARE_JSON_OPTIONAL_FIELDS(NewMember, encryption_pub_key, member_data)
DECLARE_JSON_OPTIONAL_FIELDS(
NewMember, encryption_pub_key, member_data, recovery_owner)

struct MemberDetails
{
Expand All @@ -65,14 +73,20 @@ namespace ccf
members for example. */
nlohmann::json member_data = nullptr;

/** If set then the member is to receive a full share ("super-share")
allowing it to single-handedly recover the network without
requiring any other recovery member to submit their shares. */
std::optional<bool> recovery_owner = std::nullopt;

bool operator==(const MemberDetails& rhs) const
{
return status == rhs.status && member_data == rhs.member_data;
return status == rhs.status && member_data == rhs.member_data &&
recovery_owner == rhs.recovery_owner;
}
};
DECLARE_JSON_TYPE_WITH_OPTIONAL_FIELDS(MemberDetails)
DECLARE_JSON_REQUIRED_FIELDS(MemberDetails, status)
DECLARE_JSON_OPTIONAL_FIELDS(MemberDetails, member_data)
DECLARE_JSON_OPTIONAL_FIELDS(MemberDetails, member_data, recovery_owner)

using MemberInfo = ServiceMap<MemberId, MemberDetails>;

Expand Down
12 changes: 12 additions & 0 deletions samples/constitutions/default/actions.js
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,17 @@ const actions = new Map([
function (args) {
checkX509CertBundle(args.cert, "cert");
checkType(args.member_data, "object?", "member_data");
checkType(args.recovery_owner, "boolean?", "recovery_owner");

if (
args.encryption_pub_key == null &&
gaurav137 marked this conversation as resolved.
Show resolved Hide resolved
args.recovery_owner !== null &&
args.recovery_owner !== undefined
) {
throw new Error(
"Cannot specify a recovery_owner value when encryption_pub_key is not specified",
);
}
// Also check that public encryption key is well formed, if it exists

// Check if member exists
Expand Down Expand Up @@ -388,6 +399,7 @@ const actions = new Map([

let member_info = {};
member_info.member_data = args.member_data;
member_info.recovery_owner = args.recovery_owner;
member_info.status = "Accepted";
ccf.kv["public:ccf.gov.members.info"].set(
rawMemberId,
Expand Down
6 changes: 5 additions & 1 deletion src/host/configuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,18 @@ namespace host
std::string certificate_file;
std::optional<std::string> encryption_public_key_file = std::nullopt;
std::optional<std::string> data_json_file = std::nullopt;
std::optional<bool> recovery_owner = std::nullopt;

bool operator==(const ParsedMemberInfo& other) const = default;
};

DECLARE_JSON_TYPE_WITH_OPTIONAL_FIELDS(ParsedMemberInfo);
DECLARE_JSON_REQUIRED_FIELDS(ParsedMemberInfo, certificate_file);
DECLARE_JSON_OPTIONAL_FIELDS(
ParsedMemberInfo, encryption_public_key_file, data_json_file);
ParsedMemberInfo,
encryption_public_key_file,
data_json_file,
recovery_owner);

struct CCHostConfig : public ccf::CCFConfig
{
Expand Down
34 changes: 27 additions & 7 deletions src/host/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,15 +215,32 @@ int main(int argc, char** argv)
"On start, ledger directory should not exist ({})",
config.ledger.directory));
}
// Count members with public encryption key as only these members will be
// handed a recovery share.
// Note that it is acceptable to start a network without any member having
// a recovery share. The service will check that at least one recovery
// member is added before the service can be opened.

for (auto const& m : config.command.start.members)
{
if (
!m.encryption_public_key_file.has_value() &&
m.recovery_owner.has_value())
{
throw std::logic_error(fmt::format(
"No public encryption key has been specified but recovery owner "
"value has been set for a member"));
}
}

// Count members with public encryption key who are not recovery
// owners as only these members will be handed a recovery share
// that accrues towards the recovery threshold.
// Note that it is acceptable to start a network without any member
// having a recovery share. The service will check that at least one
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we support opening deliberately unrecoverable services, and although I am not aware of current use cases, they have come up as potential use cases in the past, so I think we want to leave that open as a possibility.

Copy link
Author

@gaurav137 gaurav137 Dec 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@achamayou not sure which lines you wanted me to change here. I added (a) a check to ensure that if recovery_owner has a value then enc_pub_key must also have a value else throw and (b) count the member_with_pubk_count while skipping the recovery_owner members.
(a) is like a configuration issue while (b) is only ensuring the correctness of the existing check that the count of recovery members and supplied or calculated default recovery threshold values are sane else the logic already throws below.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"The service will check that at least one..."
^ I don't believe this is true now, and I don't want it to become true, because it precludes creating un-recoverable systems, which we think may be desirable in some cases.

This is something that an operator can quite trivially preclude by modifying the transition_service_to_open() transition if they wish to do so, there is no reason to hardcode it outside the constitution.

Copy link
Author

@gaurav137 gaurav137 Dec 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@achamayou "The service will check that at least one..." this check that there must be atleast 1 recovery member (aka participant) and recovery threshold cannot exceed that number existed before this PR and continues to work today. So as of now you cannot open a service that has 0 recovery participants. I preserved the check in main.cpp using members_with_pubk_count and there are checks in internal table access::set_recovery_threshold, remove_member and open_service that continue to work as before. Having owners has not changed the checks around recovery threshold and recovery members (participants).

Copy link
Author

@gaurav137 gaurav137 Jan 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@achamayou, if I understood you correctly, we are saying that:

  1. Open a CCF network with > 0 recovery participants and 0 recovery owners: allowed
  2. Open a CCF network with 0 recovery participants and 0 recovery owners: allowed (new behavior)
  3. Open a CCF network with 0 recovery participants and > 0 recovery owners: allowed (new behavior)
  4. Open a CCF network with > 0 recovery participants and > 0 recovery owners: allowed
  5. Removing all recovery participants or recovery owners is allowed.

Allowing 0 recovery participants would have a ripple affect for the existing checks in internal table access::set_recovery_threshold, remove_member and open_service. All these methods would no longer prevent you from never having 0 recovery members.

Just to reiterate, even without any recovery owner in play, we are saying that CCF now allows you to run a network with 0 recovery participants /owners. Since the current behavior is to prevent having 0 recovery members (ie role is participant) "I don't believe this is true now, and I don't want it to become true, because it precludes creating un-recoverable systems, which we think may be desirable in some cases." sounds like a new feature.

Copy link
Author

@gaurav137 gaurav137 Jan 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may have overthought this, I think you meant that having at least 1 recovery member that is either a participant or a recovery owner is ok for opening a network; we don't have to enforce that at least 1 member must only be a participant. If this is true then we'd have to handle networks that only have recovery owner(s) and thus the recovery threshold value remains 0 for such networks.

Copy link
Author

@gaurav137 gaurav137 Jan 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Infact given that recovery can be performed with either the set of recovery participants or via the owners its sounds logical that a network can be opened with either just recovery participants or with just recovery owners.

So:

  1. Open a CCF network with > 0 recovery participants and 0 recovery owners: allowed
  2. Open a CCF network with 0 recovery participants and 0 recovery owners: not allowed
  3. Open a CCF network with 0 recovery participants and > 0 recovery owners: allowed (new behavior)
  4. Open a CCF network with > 0 recovery participants and > 0 recovery owners: allowed
  5. Removing all recovery participants and/or recovery owners as long as 1 recovery owner/participant remains is allowed.
  6. Removing all recovery participants and all recovery owners is not allowed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed:

  1. You are right, and I was confused about unrecoverable networks being possible. They are not, because of the combined threshold check, and threshold enforcement against the number of active members.
  2. The threshold check when there are active members stays the same
  3. The threshold check when there are no active members but there are owners (any number) is that threshold must be 1

// recovery member (who is not a recovery owner) is added before the
// service can be opened.
size_t members_with_pubk_count = 0;
for (auto const& m : config.command.start.members)
{
if (m.encryption_public_key_file.has_value())
if (
m.encryption_public_key_file.has_value() &&
(!m.recovery_owner.has_value() || !m.recovery_owner.value()))
{
members_with_pubk_count++;
}
Expand Down Expand Up @@ -603,12 +620,14 @@ int main(int argc, char** argv)
for (auto const& m : config.command.start.members)
{
std::optional<ccf::crypto::Pem> public_encryption_key = std::nullopt;
std::optional<bool> recovery_owner = std::nullopt;
if (
m.encryption_public_key_file.has_value() &&
!m.encryption_public_key_file.value().empty())
{
public_encryption_key = ccf::crypto::Pem(
files::slurp(m.encryption_public_key_file.value()));
recovery_owner = m.recovery_owner;
}

nlohmann::json md = nullptr;
Expand All @@ -620,7 +639,8 @@ int main(int argc, char** argv)
startup_config.start.members.emplace_back(
ccf::crypto::Pem(files::slurp(m.certificate_file)),
public_encryption_key,
md);
md,
recovery_owner);
}
startup_config.start.constitution = "";
for (const auto& constitution_path :
Expand Down
7 changes: 4 additions & 3 deletions src/node/gov/handlers/acks.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,11 +266,12 @@ namespace ccf::gov::endpoints
return;
}

// If this is a newly-active recovery member in an open service,
// allocate them a recovery share immediately
// If this is a newly-active recovery member/owner in an open
// service, allocate them a recovery share immediately
if (
newly_active &&
InternalTablesAccess::is_recovery_member(ctx.tx, member_id))
InternalTablesAccess::is_recovery_member_or_owner(
ctx.tx, member_id))
{
auto service_status =
InternalTablesAccess::get_service_status(ctx.tx);
Expand Down
12 changes: 11 additions & 1 deletion src/node/gov/handlers/recovery.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,15 @@ namespace ccf::gov::endpoints
params["share"].template get<std::string>());

size_t submitted_shares_count = 0;
bool full_share_submitted = false;
gaurav137 marked this conversation as resolved.
Show resolved Hide resolved
try
{
submitted_shares_count = share_manager.submit_recovery_share(
ctx.tx, member_id, raw_recovery_share);

full_share_submitted =
share_manager.is_full_share(raw_recovery_share);
gaurav137 marked this conversation as resolved.
Show resolved Hide resolved

OPENSSL_cleanse(
raw_recovery_share.data(), raw_recovery_share.size());
}
Expand Down Expand Up @@ -164,8 +168,13 @@ namespace ccf::gov::endpoints
submitted_shares_count,
threshold);

if (submitted_shares_count >= threshold)
if (submitted_shares_count >= threshold || full_share_submitted)
{
if (full_share_submitted)
{
message += "\nFull recovery share successfully submitted";
}

message += "\nEnd of recovery procedure initiated";
GOV_INFO_FMT("{} - initiating recovery", message);

Expand Down Expand Up @@ -196,6 +205,7 @@ namespace ccf::gov::endpoints
response_body["message"] = message;
response_body["submittedCount"] = submitted_shares_count;
response_body["recoveryThreshold"] = threshold;
response_body["fullShareSubmitted"] = full_share_submitted;

ctx.rpc_ctx->set_response_json(response_body, HTTP_STATUS_OK);
return;
Expand Down
6 changes: 4 additions & 2 deletions src/node/rpc/member_frontend.h
Original file line number Diff line number Diff line change
Expand Up @@ -759,10 +759,12 @@ namespace ccf
auto member_info = members->get(member_id.value());
if (
service_status.value() == ServiceStatus::OPEN &&
InternalTablesAccess::is_recovery_member(ctx.tx, member_id.value()))
InternalTablesAccess::is_recovery_member_or_owner(
ctx.tx, member_id.value()))
{
// When the service is OPEN and the new active member is a recovery
// member, all recovery members are allocated new recovery shares
// member/owner, all recovery members are allocated new recovery
// shares
try
{
share_manager.shuffle_recovery_shares(ctx.tx);
Expand Down
Loading
Loading