-
Notifications
You must be signed in to change notification settings - Fork 306
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAOS-14013 rebuild: add a basic UT for incremental reint #15782
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3938,9 +3938,11 @@ static int | |
obj_tree_lookup_cont(daos_handle_t toh, uuid_t co_uuid, daos_handle_t *cont_toh) | ||
{ | ||
struct tree_cache_root *cont_root = NULL; | ||
d_iov_t key_iov; | ||
d_iov_t tmp_iov; | ||
int rc; | ||
d_iov_t key_iov; | ||
d_iov_t tmp_iov; | ||
daos_handle_t migrated_toh; | ||
struct umem_attr uma; | ||
int rc; | ||
|
||
D_ASSERT(daos_handle_is_valid(toh)); | ||
|
||
|
@@ -3957,9 +3959,17 @@ obj_tree_lookup_cont(daos_handle_t toh, uuid_t co_uuid, daos_handle_t *cont_toh) | |
return rc; | ||
} | ||
|
||
memset(&uma, 0, sizeof(uma)); | ||
uma.uma_id = UMEM_CLASS_VMEM; | ||
cont_root = tmp_iov.iov_buf; | ||
*cont_toh = cont_root->tcr_root_hdl; | ||
return 0; | ||
D_ASSERT(daos_handle_is_valid(cont_root->tcr_root_hdl)); | ||
rc = dbtree_open_inplace(&cont_root->tcr_btr_root, &uma, &migrated_toh); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the above code at L3952 the container lookup succeed, it means the container must with OID being migrated, then the container's migrated obj tree should have already been created. |
||
if (rc == 0) | ||
*cont_toh = migrated_toh; | ||
else | ||
DL_ERROR(rc, DF_UUID" failed to open cont migrated tree", DP_UUID(co_uuid)); | ||
|
||
return rc; | ||
} | ||
|
||
static int | ||
|
@@ -4040,10 +4050,11 @@ reint_post_cont_iter_cb(daos_handle_t ih, vos_iter_entry_t *entry, | |
vos_iter_param_t param = { 0 }; | ||
struct vos_iter_anchors anchor = { 0 }; | ||
daos_handle_t toh = arg->ria_migrated_tree_hdl; | ||
daos_handle_t cont_toh = { 0 }; | ||
struct ds_cont_child *cont_child = NULL; | ||
int rc; | ||
|
||
rc = obj_tree_lookup_cont(toh, entry->ie_couuid, &arg->ria_cont_toh); | ||
rc = obj_tree_lookup_cont(toh, entry->ie_couuid, &cont_toh); | ||
if (rc) { | ||
if (rc == -DER_NONEXIST) { | ||
D_DEBUG(DB_TRACE, DF_RB": cont "DF_UUID" non-exist in migrate tree, " | ||
|
@@ -4063,7 +4074,7 @@ reint_post_cont_iter_cb(daos_handle_t ih, vos_iter_entry_t *entry, | |
goto out; | ||
} | ||
|
||
D_ASSERT(daos_handle_is_valid(arg->ria_cont_toh)); | ||
D_ASSERT(daos_handle_is_valid(cont_toh)); | ||
|
||
rc = ds_cont_child_lookup(tls->mpt_pool_uuid, entry->ie_couuid, &cont_child); | ||
if (rc == -DER_NONEXIST || rc == -DER_SHUTDOWN) { | ||
|
@@ -4083,6 +4094,7 @@ reint_post_cont_iter_cb(daos_handle_t ih, vos_iter_entry_t *entry, | |
param.ip_epr.epr_hi = DAOS_EPOCH_MAX; | ||
param.ip_flags = VOS_IT_FOR_MIGRATION; | ||
uuid_copy(arg->ria_co_uuid, entry->ie_couuid); | ||
arg->ria_cont_toh = cont_toh; | ||
rc = vos_iterate(¶m, VOS_ITER_OBJ, false, &anchor, | ||
reint_post_obj_iter_cb, NULL, arg, NULL); | ||
if (rc) | ||
|
@@ -4091,6 +4103,8 @@ reint_post_cont_iter_cb(daos_handle_t ih, vos_iter_entry_t *entry, | |
ds_cont_child_put(cont_child); | ||
|
||
out: | ||
if (daos_handle_is_valid(cont_toh)) | ||
dbtree_close(cont_toh); | ||
if (--arg->ria_yield_cnt <= 0) { | ||
D_DEBUG(DB_REBUILD, DF_RB " rebuild yield: %d\n", DP_RB_MPT(tls), rc); | ||
arg->ria_yield_cnt = REINT_ITER_YIELD_CNT; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
/** | ||
* (C) Copyright 2016-2024 Intel Corporation. | ||
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP | ||
* | ||
* SPDX-License-Identifier: BSD-2-Clause-Patent | ||
*/ | ||
|
@@ -1563,6 +1564,42 @@ rebuild_cont_destroy_and_reintegrate(void **state) | |
reintegrate_single_pool_rank(arg, 5, true); | ||
} | ||
|
||
static void | ||
rebuild_incr_reint_basic(void **state) | ||
{ | ||
test_arg_t *arg = *state; | ||
daos_obj_id_t oids[OBJ_NR]; | ||
daos_obj_id_t update_oids[OBJ_NR]; | ||
int rc; | ||
int i; | ||
|
||
if (!test_runable(arg, 6)) | ||
return; | ||
|
||
rc = daos_pool_set_prop(arg->pool.pool_uuid, "reintegration", "incremental"); | ||
assert_rc_equal(rc, 0); | ||
for (i = 0; i < OBJ_NR; i++) { | ||
oids[i] = daos_test_oid_gen(arg->coh, DAOS_OC_R3S_SPEC_RANK, 0, 0, arg->myrank); | ||
oids[i] = dts_oid_set_rank(oids[i], 5); | ||
} | ||
|
||
dt_no_punch = true; | ||
rebuild_io(arg, oids, OBJ_NR); | ||
arg->no_rebuild = 0; | ||
rebuild_single_pool_rank(arg, 5, true); | ||
|
||
for (i = 0; i < OBJ_NR; i++) | ||
update_oids[i] = daos_test_oid_gen(arg->coh, OC_RP_3GX, 0, 0, arg->myrank); | ||
rebuild_io(arg, update_oids, OBJ_NR); | ||
|
||
reintegrate_single_pool_rank(arg, 5, true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: what is the expectation for the global minimum stable epoch of the container when the reintegration happens. Is it expected that none of the oids data in engine rank 5 storage will be wiped during this reintegration? Or, is it expected that maybe some of rank 5 data will be wiped as part of the reintegration, because maybe it falls later than the global minimum stable epoch? |
||
rebuild_io_verify(arg, oids, OBJ_NR); | ||
rebuild_io_verify(arg, update_oids, OBJ_NR); | ||
|
||
rc = daos_pool_set_prop(arg->pool.pool_uuid, "reintegration", "data_sync"); | ||
assert_rc_equal(rc, 0); | ||
dt_no_punch = false; | ||
} | ||
/** create a new pool/container for each test */ | ||
static const struct CMUnitTest rebuild_tests[] = { | ||
{"REBUILD0: drop rebuild scan reply", | ||
|
@@ -1655,6 +1692,9 @@ static const struct CMUnitTest rebuild_tests[] = { | |
{"REBUILD35: destroy container then reintegrate", | ||
rebuild_cont_destroy_and_reintegrate, rebuild_sub_6nodes_rf1_setup, | ||
rebuild_sub_teardown}, | ||
{"REBUILD36: basic incremental reintegration", | ||
rebuild_incr_reint_basic, rebuild_sub_6nodes_rf1_setup, | ||
rebuild_sub_teardown}, | ||
}; | ||
|
||
/* TODO: Enable aggregation once stable view rebuild is done. */ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
/** | ||
* (C) Copyright 2016-2024 Intel Corporation. | ||
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP | ||
* | ||
* SPDX-License-Identifier: BSD-2-Clause-Patent | ||
*/ | ||
|
@@ -436,6 +437,11 @@ rebuild_io_obj_internal(struct ioreq *req, bool validate, int index) | |
int k; | ||
int l; | ||
|
||
if (dt_no_punch) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this intended to also skip the record punch on line 483 below under the condition (l == rec_punch_idx) ? It seems this logic will not prevent it. Maybe it is ultimately not too important if that punch_single() call is made. Just curious/asking. |
||
akey_punch_idx = -1; | ||
dkey_punch_idx = -1; | ||
} | ||
|
||
D_ALLOC(large_key, LARGE_KEY_SIZE); | ||
if (large_key == NULL) | ||
return -DER_NOMEM; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
/** | ||
* (C) Copyright 2018-2023 Intel Corporation. | ||
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP | ||
* | ||
* SPDX-License-Identifier: BSD-2-Clause-Patent | ||
*/ | ||
|
@@ -32,6 +33,10 @@ int dt_obj_class; | |
int dt_redun_lvl; | ||
int dt_redun_fac; | ||
|
||
/** pool incremental reintegration */ | ||
int dt_incr_reint; | ||
bool dt_no_punch; /* will remove later */ | ||
|
||
/* Create or import a single pool with option to store info in arg->pool | ||
* or an alternate caller-specified test_pool structure. | ||
* ipool (optional): import pool: store info for an existing pool to arg->pool. | ||
|
@@ -349,6 +354,8 @@ test_setup(void **state, unsigned int step, bool multi_rank, | |
daos_prop_t co_props = {0}; | ||
struct daos_prop_entry dpp_entry[6] = {0}; | ||
struct daos_prop_entry *entry; | ||
daos_prop_t po_props = {0}; | ||
struct daos_prop_entry po_entry[1] = {0}; | ||
|
||
/* feed a seed for pseudo-random number generator */ | ||
gettimeofday(&now, NULL); | ||
|
@@ -395,6 +402,18 @@ test_setup(void **state, unsigned int step, bool multi_rank, | |
arg->pool.destroyed = false; | ||
} | ||
|
||
/** Look at variables set by test arguments and setup pool props */ | ||
if (dt_incr_reint) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dt_incr_reint isn't set anywhere that I can see. Is this meant for future use when there might be a specific setup function for incremental reintegration test cases? |
||
print_message("\n-------\n" | ||
"Incremental reintegration enabled in test!" | ||
"\n-------\n"); | ||
entry = &po_entry[po_props.dpp_nr]; | ||
entry->dpe_type = DAOS_PROP_PO_REINT_MODE; | ||
entry->dpe_val = DAOS_REINT_MODE_INCREMENTAL; | ||
|
||
po_props.dpp_nr++; | ||
} | ||
|
||
/** Look at variables set by test arguments and setup container props */ | ||
if (dt_csum_type) { | ||
print_message("\n-------\n" | ||
|
@@ -445,11 +464,13 @@ test_setup(void **state, unsigned int step, bool multi_rank, | |
co_props.dpp_nr++; | ||
} | ||
|
||
if (po_props.dpp_nr > 0) | ||
po_props.dpp_entries = po_entry; | ||
if (co_props.dpp_nr > 0) | ||
co_props.dpp_entries = dpp_entry; | ||
|
||
while (!rc && step != arg->setup_state) | ||
rc = test_setup_next_step(state, pool, NULL, &co_props); | ||
rc = test_setup_next_step(state, pool, &po_props, &co_props); | ||
|
||
if (rc) { | ||
D_FREE(arg); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some confused, since it is already opened, why need to open again?