Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-14909 pool: property for rdb svc_ops KVS entry max age #13639

Merged
merged 13 commits into from
Jan 30, 2024
Merged
8 changes: 8 additions & 0 deletions src/common/prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,14 @@ daos_prop_valid(daos_prop_t *prop, bool pool, bool input)
return false;
}
break;
case DAOS_PROP_PO_SVC_OPS_ENTRY_AGE:
val = prop->dpp_entries[i].dpe_val;
if ((val < DAOS_PROP_PO_SVC_OPS_ENTRY_AGE_MIN) ||
(val > DAOS_PROP_PO_SVC_OPS_ENTRY_AGE_MAX)) {
D_ERROR("invalid svc_ops_entry_age " DF_U64 ".\n", val);
return false;
}
break;
/* container-only properties */
case DAOS_PROP_CO_LAYOUT_TYPE:
val = prop->dpp_entries[i].dpe_val;
Expand Down
8 changes: 8 additions & 0 deletions src/common/tests_dmg_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,14 @@ dmg_pool_create(const char *dmg_config_file,
D_GOTO(out, rc = -DER_NOMEM);
}

entry = daos_prop_entry_get(prop, DAOS_PROP_PO_SVC_OPS_ENTRY_AGE);
if (entry != NULL) {
args = cmd_push_arg(args, &argcount, "--properties=svc_ops_entry_age:%zu ",
entry->dpe_val);
if (args == NULL)
D_GOTO(out, rc = -DER_NOMEM);
}

entry = daos_prop_entry_get(prop, DAOS_PROP_PO_SPACE_RB);
if (entry != NULL) {
args = cmd_push_arg(args, &argcount, "--properties=space_rb:%zu ",
Expand Down
7 changes: 5 additions & 2 deletions src/control/lib/daos/pool_cont_prop.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,9 @@ const (
//PoolPropertyPerfDomain is pool performance domain
PoolPropertyPerfDomain = C.DAOS_PROP_PO_PERF_DOMAIN
//PoolPropertyReintMode is pool reintegration mode
PoolPropertyReintMode = C.DAOS_PROP_PO_REINT_MODE
PoolPropertySvcOpsEnabled = C.DAOS_PROP_PO_SVC_OPS_ENABLED
PoolPropertyReintMode = C.DAOS_PROP_PO_REINT_MODE
PoolPropertySvcOpsEnabled = C.DAOS_PROP_PO_SVC_OPS_ENABLED
PoolPropertySvcOpsEntryAge = C.DAOS_PROP_PO_SVC_OPS_ENTRY_AGE
)

const (
Expand Down Expand Up @@ -186,6 +187,8 @@ const (
PoolSvcRedunFacMax = C.DAOS_PROP_PO_SVC_REDUN_FAC_MAX
// PoolSvcRedunFacDefault defines the default value of PoolPropertySvcRedunFac.
PoolSvcRedunFacDefault = C.DAOS_PROP_PO_SVC_REDUN_FAC_DEFAULT
PoolSvcOpsEntryAgeMin = C.DAOS_PROP_PO_SVC_OPS_ENTRY_AGE_MIN
PoolSvcOpsEntryAgeMax = C.DAOS_PROP_PO_SVC_OPS_ENTRY_AGE_MAX
)

const (
Expand Down
25 changes: 25 additions & 0 deletions src/control/lib/daos/pool_property.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,31 @@ func PoolProperties() PoolPropertyMap {
valueMarshaler: numericMarshaler,
},
},
"svc_ops_entry_age": {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be a good idea to add some test cases for this to TestControl_PoolProperties. Looks like the svc_ops_enabled property could use some, too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a separate PR with some candidate unit test cases for these two new properties, #13676

Property: PoolProperty{
Number: PoolPropertySvcOpsEntryAge,
Description: "Metadata duplicate operations KVS max entry age, in seconds",
valueHandler: func(s string) (*PoolPropertyValue, error) {
oeErr := errors.Errorf("invalid svc_ops_entry_age %s (valid values: %d-%d)", s, PoolSvcOpsEntryAgeMin, PoolSvcOpsEntryAgeMax)
oeVal, err := strconv.ParseUint(s, 10, 32)
if err != nil {
return nil, oeErr
}
if oeVal < PoolSvcOpsEntryAgeMin || oeVal > PoolSvcOpsEntryAgeMax {
return nil, errors.Wrap(oeErr, "value supplied is out of range")
}
return &PoolPropertyValue{oeVal}, nil
},
valueStringer: func(v *PoolPropertyValue) string {
n, err := v.GetNumber()
if err != nil {
return "not set"
}
return fmt.Sprintf("%d", n)
},
valueMarshaler: numericMarshaler,
},
},
"label": {
Property: PoolProperty{
Number: PoolPropertyLabel,
Expand Down
5 changes: 3 additions & 2 deletions src/include/daos/pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@
#define DAOS_PO_QUERY_PROP_CHECKPOINT_THRESH (1ULL << (PROP_BIT_START + 23))
#define DAOS_PO_QUERY_PROP_REINT_MODE (1ULL << (PROP_BIT_START + 24))
#define DAOS_PO_QUERY_PROP_SVC_OPS_ENABLED (1ULL << (PROP_BIT_START + 25))
#define DAOS_PO_QUERY_PROP_BIT_END 41
#define DAOS_PO_QUERY_PROP_SVC_OPS_ENTRY_AGE (1ULL << (PROP_BIT_START + 26))
#define DAOS_PO_QUERY_PROP_BIT_END 42

#define DAOS_PO_QUERY_PROP_ALL \
(DAOS_PO_QUERY_PROP_LABEL | DAOS_PO_QUERY_PROP_SPACE_RB | DAOS_PO_QUERY_PROP_SELF_HEAL | \
Expand All @@ -65,7 +66,7 @@
DAOS_PO_QUERY_PROP_OBJ_VERSION | DAOS_PO_QUERY_PROP_PERF_DOMAIN | \
DAOS_PO_QUERY_PROP_CHECKPOINT_MODE | DAOS_PO_QUERY_PROP_CHECKPOINT_FREQ | \
DAOS_PO_QUERY_PROP_CHECKPOINT_THRESH | DAOS_PO_QUERY_PROP_REINT_MODE | \
DAOS_PO_QUERY_PROP_SVC_OPS_ENABLED)
DAOS_PO_QUERY_PROP_SVC_OPS_ENABLED | DAOS_PO_QUERY_PROP_SVC_OPS_ENTRY_AGE)

/*
* Version 1 corresponds to 2.2 (aggregation optimizations)
Expand Down
17 changes: 11 additions & 6 deletions src/include/daos_prop.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ enum daos_pool_props {
DAOS_PROP_PO_REINT_MODE,
/** Metadata duplicate operations detection enabled (1) or disabled (0) */
DAOS_PROP_PO_SVC_OPS_ENABLED,
/** Metadata duplicate operations SVC_OPS KVS max entry age (seconds), default 300 */
DAOS_PROP_PO_SVC_OPS_ENTRY_AGE,
DAOS_PROP_PO_MAX,
};

Expand Down Expand Up @@ -246,13 +248,16 @@ enum {
};

#define DAOS_PROP_PO_CHECKPOINT_MODE_DEFAULT DAOS_CHECKPOINT_TIMED
#define DAOS_PROP_PO_CHECKPOINT_FREQ_DEFAULT 5 /* 5 seconds */
#define DAOS_PROP_PO_CHECKPOINT_FREQ_MIN 1 /* 1 seconds */
#define DAOS_PROP_PO_CHECKPOINT_FREQ_DEFAULT 5 /* 5 seconds */
#define DAOS_PROP_PO_CHECKPOINT_FREQ_MIN 1 /* 1 seconds */
#define DAOS_PROP_PO_CHECKPOINT_FREQ_MAX (1 << 20) /* 1 million seconds */
#define DAOS_PROP_PO_CHECKPOINT_THRESH_DEFAULT 50 /* 50 % WAL capacity */
#define DAOS_PROP_PO_CHECKPOINT_THRESH_MAX 75 /* 75 % WAL capacity */
#define DAOS_PROP_PO_CHECKPOINT_THRESH_MIN 10 /* 10 % WAL capacity */
#define DAOS_PROP_PO_SVC_OPS_ENABLED_DEFAULT 1 /* true: enabled by default */
#define DAOS_PROP_PO_CHECKPOINT_THRESH_DEFAULT 50 /* 50 % WAL capacity */
#define DAOS_PROP_PO_CHECKPOINT_THRESH_MAX 75 /* 75 % WAL capacity */
#define DAOS_PROP_PO_CHECKPOINT_THRESH_MIN 10 /* 10 % WAL capacity */
#define DAOS_PROP_PO_SVC_OPS_ENABLED_DEFAULT 1 /* true: enabled by default */
#define DAOS_PROP_PO_SVC_OPS_ENTRY_AGE_DEFAULT 300 /* 300 seconds */
#define DAOS_PROP_PO_SVC_OPS_ENTRY_AGE_MIN 150 /* 150 seconds */
#define DAOS_PROP_PO_SVC_OPS_ENTRY_AGE_MAX 600 /* 600 seconds */

/** self healing strategy bits */
#define DAOS_SELF_HEAL_AUTO_EXCLUDE (1U << 0)
Expand Down
3 changes: 3 additions & 0 deletions src/pool/rpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,9 @@ pool_query_bits(daos_pool_info_t *po_info, daos_prop_t *prop)
case DAOS_PROP_PO_SVC_OPS_ENABLED:
bits |= DAOS_PO_QUERY_PROP_SVC_OPS_ENABLED;
break;
case DAOS_PROP_PO_SVC_OPS_ENTRY_AGE:
bits |= DAOS_PO_QUERY_PROP_SVC_OPS_ENTRY_AGE;
break;
default:
D_ERROR("ignore bad dpt_type %d.\n", entry->dpe_type);
break;
Expand Down
1 change: 1 addition & 0 deletions src/pool/srv_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ struct pool_iv_prop {
uint32_t pip_perf_domain;
uint32_t pip_reint_mode;
uint32_t pip_svc_ops_enabled;
uint32_t pip_svc_ops_entry_age;
char pip_iv_buf[0];
};

Expand Down
6 changes: 6 additions & 0 deletions src/pool/srv_iv.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ pool_iv_prop_l2g(daos_prop_t *prop, struct pool_iv_prop *iv_prop)
case DAOS_PROP_PO_SVC_OPS_ENABLED:
iv_prop->pip_svc_ops_enabled = prop_entry->dpe_val;
break;
case DAOS_PROP_PO_SVC_OPS_ENTRY_AGE:
iv_prop->pip_svc_ops_entry_age = prop_entry->dpe_val;
break;
default:
D_ASSERTF(0, "bad dpe_type %d\n", prop_entry->dpe_type);
break;
Expand Down Expand Up @@ -366,6 +369,9 @@ pool_iv_prop_g2l(struct pool_iv_prop *iv_prop, daos_prop_t *prop)
case DAOS_PROP_PO_SVC_OPS_ENABLED:
prop_entry->dpe_val = iv_prop->pip_svc_ops_enabled;
break;
case DAOS_PROP_PO_SVC_OPS_ENTRY_AGE:
prop_entry->dpe_val = iv_prop->pip_svc_ops_entry_age;
break;
default:
D_ASSERTF(0, "bad dpe_type %d\n", prop_entry->dpe_type);
break;
Expand Down
4 changes: 4 additions & 0 deletions src/pool/srv_layout.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ struct daos_prop_entry pool_prop_entries_default[DAOS_PROP_PO_NUM] = {
{
.dpe_type = DAOS_PROP_PO_SVC_OPS_ENABLED,
.dpe_val = DAOS_PROP_PO_SVC_OPS_ENABLED_DEFAULT,
},
{
.dpe_type = DAOS_PROP_PO_SVC_OPS_ENTRY_AGE,
.dpe_val = DAOS_PROP_PO_SVC_OPS_ENTRY_AGE_DEFAULT,
}};

daos_prop_t pool_prop_default = {
Expand Down
69 changes: 42 additions & 27 deletions src/pool/srv_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,7 @@
#define DAOS_POOL_GLOBAL_VERSION_WITH_SVC_OPS_KVS 3

#define PS_OPS_PER_SEC 4096
#define DEFAULT_PS_OPS_ENTRY_AGE_SEC 300ULL
#define MIN_PS_OPS_ENTRY_AGE_SEC 150
#define MAX_PS_OPS_ENTRY_AGE_SEC 600
#define DAOS_MD_OPS_AGE_SEC_ENV "DAOS_MD_OPS_AGE_SEC"

/*
* Return the corresponding VOS pool DF version or 0 if pool_global_version is
* not supported.
Expand Down Expand Up @@ -374,6 +371,7 @@ pool_prop_default_copy(daos_prop_t *prop_def, daos_prop_t *prop)
case DAOS_PROP_PO_SVC_REDUN_FAC:
case DAOS_PROP_PO_PERF_DOMAIN:
case DAOS_PROP_PO_SVC_OPS_ENABLED:
case DAOS_PROP_PO_SVC_OPS_ENTRY_AGE:
entry_def->dpe_val = entry->dpe_val;
break;
case DAOS_PROP_PO_POLICY:
Expand Down Expand Up @@ -682,6 +680,13 @@ pool_prop_write(struct rdb_tx *tx, const rdb_path_t *kvs, daos_prop_t *prop)
if (rc)
return rc;
break;
case DAOS_PROP_PO_SVC_OPS_ENTRY_AGE:
val32 = entry->dpe_val;
d_iov_set(&value, &val32, sizeof(val32));
rc = rdb_tx_update(tx, kvs, &ds_pool_prop_svc_ops_age, &value);
if (rc)
return rc;
break;
default:
D_ERROR("bad dpe_type %d.\n", entry->dpe_type);
return -DER_INVAL;
Expand All @@ -695,25 +700,6 @@ pool_prop_write(struct rdb_tx *tx, const rdb_path_t *kvs, daos_prop_t *prop)
return rc;
}

static uint32_t
get_svc_ops_age(void)
{
const uint32_t ops_age_default = DEFAULT_PS_OPS_ENTRY_AGE_SEC;
char *v;
int n;

v = getenv(DAOS_MD_OPS_AGE_SEC_ENV); /* in MB */
if (v == NULL)
return ops_age_default;
n = atoi(v);
if ((n < MIN_PS_OPS_ENTRY_AGE_SEC) || (n > MAX_PS_OPS_ENTRY_AGE_SEC)) {
D_ERROR("metadata ps ops age %d out of range %u..%u; using %u sec\n", n,
MIN_PS_OPS_ENTRY_AGE_SEC, MAX_PS_OPS_ENTRY_AGE_SEC, ops_age_default);
return ops_age_default;
}
return n;
}

static int
init_pool_metadata(struct rdb_tx *tx, const rdb_path_t *kvs, uint32_t nnodes, const char *group,
const d_rank_list_t *ranks, daos_prop_t *prop, uint32_t ndomains,
Expand All @@ -729,7 +715,7 @@ init_pool_metadata(struct rdb_tx *tx, const rdb_path_t *kvs, uint32_t nnodes, co
uint32_t upgrade_global_version = DAOS_POOL_GLOBAL_VERSION;
uint32_t svc_ops_enabled = 1;
/* max number of entries in svc_ops KVS: equivalent of max age (sec) x PS_OPS_PER_SEC */
uint32_t svc_ops_age;
uint32_t svc_ops_age = DAOS_PROP_PO_SVC_OPS_ENTRY_AGE_DEFAULT;
uint32_t svc_ops_max;
uint32_t svc_ops_num;
uint64_t rdb_size;
Expand Down Expand Up @@ -841,10 +827,12 @@ init_pool_metadata(struct rdb_tx *tx, const rdb_path_t *kvs, uint32_t nnodes, co
goto out_map_buf;
}

/* Maximum number of RPCs that may be kept in svc_ops.
* Default: number of RPCs equivalent to PS_OPS_PER_SEC x DEFAULT_PS_OPS_ENTRY_AGE_SEC.
/* Maximum number of RPCs that may be kept in svc_ops, from SVC_OPS_ENTRY_AGE property.
* Default: PS_OPS_PER_SEC x DEFAULT_SVC_OPS_ENTRY_AGE_SEC.
*/
svc_ops_age = get_svc_ops_age();
entry = daos_prop_entry_get(prop, DAOS_PROP_PO_SVC_OPS_ENTRY_AGE);
if (entry)
svc_ops_age = entry->dpe_val;
svc_ops_max = PS_OPS_PER_SEC * svc_ops_age;
svc_ops_num = 0;
d_iov_set(&value, &svc_ops_age, sizeof(svc_ops_age));
Expand Down Expand Up @@ -2775,6 +2763,27 @@ pool_prop_read(struct rdb_tx *tx, const struct pool_svc *svc, uint64_t bits,
idx++;
}

if (bits & DAOS_PO_QUERY_PROP_SVC_OPS_ENTRY_AGE) {
d_iov_set(&value, &val32, sizeof(val32));
rc = rdb_tx_lookup(tx, &svc->ps_root, &ds_pool_prop_svc_ops_age, &value);
if (rc == -DER_NONEXIST && global_ver < DAOS_POOL_GLOBAL_VERSION_WITH_SVC_OPS_KVS) {
/* needs to be upgraded */
rc = 0;
val32 = 0;
prop->dpp_entries[idx].dpe_flags |= DAOS_PROP_ENTRY_NOT_SET;
} else if (rc != 0) {
DL_ERROR(rc, DF_UUID ": DAOS_PROP_PO_SVC_OPS_ENTRY_AGE missing from pool",
DP_UUID(svc->ps_uuid));
D_GOTO(out_prop, rc);
}
if (rc != 0)
D_GOTO(out_prop, rc);
D_ASSERT(idx < nr);
prop->dpp_entries[idx].dpe_type = DAOS_PROP_PO_SVC_OPS_ENTRY_AGE;
prop->dpp_entries[idx].dpe_val = val32;
idx++;
}

*prop_out = prop;
return 0;

Expand Down Expand Up @@ -4518,6 +4527,7 @@ ds_pool_query_handler(crt_rpc_t *rpc, int handler_version)
case DAOS_PROP_PO_CHECKPOINT_THRESH:
case DAOS_PROP_PO_REINT_MODE:
case DAOS_PROP_PO_SVC_OPS_ENABLED:
case DAOS_PROP_PO_SVC_OPS_ENTRY_AGE:
if (entry->dpe_val != iv_entry->dpe_val) {
D_ERROR("type %d mismatch "DF_U64" - "
DF_U64".\n", entry->dpe_type,
Expand Down Expand Up @@ -5981,6 +5991,11 @@ ds_pool_svc_set_prop(uuid_t pool_uuid, d_rank_list_t *ranks, daos_prop_t *prop)
D_GOTO(out, rc = -DER_NO_PERM);
}

if (daos_prop_entry_get(prop, DAOS_PROP_PO_SVC_OPS_ENTRY_AGE)) {
D_ERROR("Can't set pool svc_ops_entry_age on existing pool.\n");
D_GOTO(out, rc = -DER_NO_PERM);
}

/* Disallow to begin with; will support in the future. */
if (daos_prop_entry_get(prop, DAOS_PROP_PO_SVC_REDUN_FAC)) {
D_ERROR(DF_UUID ": cannot set pool service redundancy factor on existing pool\n",
Expand Down
15 changes: 12 additions & 3 deletions src/tests/suite/daos_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,7 @@ pool_properties(void **state)
daos_prop_t *prop_query;
struct daos_prop_entry *entry;
daos_pool_info_t info = {0};
const uint64_t svc_ops_age = 180;
int rc;
char *expected_owner;
char *expected_group;
Expand All @@ -510,7 +511,7 @@ pool_properties(void **state)
SMALL_POOL_SIZE, 0, NULL);
assert_rc_equal(rc, 0);

prop = daos_prop_alloc(4);
prop = daos_prop_alloc(5);
/* label - set arg->pool_label to use daos_pool_connect() */
prop->dpp_entries[0].dpe_type = DAOS_PROP_PO_LABEL;
D_STRNDUP_S(prop->dpp_entries[0].dpe_str, label);
Expand All @@ -524,8 +525,11 @@ pool_properties(void **state)
prop->dpp_entries[2].dpe_type = DAOS_PROP_PO_SVC_OPS_ENABLED;
prop->dpp_entries[2].dpe_val = 0; /* disabled */

prop->dpp_entries[3].dpe_type = DAOS_PROP_PO_SPACE_RB;
prop->dpp_entries[3].dpe_val = space_rb;
prop->dpp_entries[3].dpe_type = DAOS_PROP_PO_SVC_OPS_ENTRY_AGE;
prop->dpp_entries[3].dpe_val = svc_ops_age; /* seconds */

prop->dpp_entries[4].dpe_type = DAOS_PROP_PO_SPACE_RB;
prop->dpp_entries[4].dpe_val = space_rb;

while (!rc && arg->setup_state != SETUP_POOL_CONNECT)
rc = test_setup_next_step((void **)&arg, NULL, prop, NULL);
Expand Down Expand Up @@ -559,6 +563,11 @@ pool_properties(void **state)
fail_msg("svc_ops_enabled verification failed.\n");
}

entry = daos_prop_entry_get(prop_query, DAOS_PROP_PO_SVC_OPS_ENTRY_AGE);
if (entry == NULL || (entry->dpe_val != svc_ops_age)) {
fail_msg("svc_ops_entry_age verification failed.\n");
}

entry = daos_prop_entry_get(prop_query, DAOS_PROP_PO_SPACE_RB);
if (entry == NULL || entry->dpe_val != space_rb) {
fail_msg("space_rb verification failed.\n");
Expand Down
Loading