Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-14013 rebuild: add a basic UT for incremental reint #15782

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions src/object/srv_obj_migrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -3938,9 +3938,11 @@ static int
obj_tree_lookup_cont(daos_handle_t toh, uuid_t co_uuid, daos_handle_t *cont_toh)
{
struct tree_cache_root *cont_root = NULL;
d_iov_t key_iov;
d_iov_t tmp_iov;
int rc;
d_iov_t key_iov;
d_iov_t tmp_iov;
daos_handle_t migrated_toh;
struct umem_attr uma;
int rc;

D_ASSERT(daos_handle_is_valid(toh));

Expand All @@ -3957,9 +3959,17 @@ obj_tree_lookup_cont(daos_handle_t toh, uuid_t co_uuid, daos_handle_t *cont_toh)
return rc;
}

memset(&uma, 0, sizeof(uma));
uma.uma_id = UMEM_CLASS_VMEM;
cont_root = tmp_iov.iov_buf;
*cont_toh = cont_root->tcr_root_hdl;
return 0;
D_ASSERT(daos_handle_is_valid(cont_root->tcr_root_hdl));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some confused, since it is already opened, why need to open again?

rc = dbtree_open_inplace(&cont_root->tcr_btr_root, &uma, &migrated_toh);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to check if (daos_handle_is_inval(&cont_root->tcr_root_hdl)) before open?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the above code at L3952 the container lookup succeed, it means the container must with OID being migrated, then the container's migrated obj tree should have already been created.
Can add an assertion the handle is valid although, I'll add it.

if (rc == 0)
*cont_toh = migrated_toh;
else
DL_ERROR(rc, DF_UUID" failed to open cont migrated tree", DP_UUID(co_uuid));

return rc;
}

static int
Expand Down Expand Up @@ -4040,10 +4050,11 @@ reint_post_cont_iter_cb(daos_handle_t ih, vos_iter_entry_t *entry,
vos_iter_param_t param = { 0 };
struct vos_iter_anchors anchor = { 0 };
daos_handle_t toh = arg->ria_migrated_tree_hdl;
daos_handle_t cont_toh = { 0 };
struct ds_cont_child *cont_child = NULL;
int rc;

rc = obj_tree_lookup_cont(toh, entry->ie_couuid, &arg->ria_cont_toh);
rc = obj_tree_lookup_cont(toh, entry->ie_couuid, &cont_toh);
if (rc) {
if (rc == -DER_NONEXIST) {
D_DEBUG(DB_TRACE, DF_RB": cont "DF_UUID" non-exist in migrate tree, "
Expand All @@ -4063,7 +4074,7 @@ reint_post_cont_iter_cb(daos_handle_t ih, vos_iter_entry_t *entry,
goto out;
}

D_ASSERT(daos_handle_is_valid(arg->ria_cont_toh));
D_ASSERT(daos_handle_is_valid(cont_toh));

rc = ds_cont_child_lookup(tls->mpt_pool_uuid, entry->ie_couuid, &cont_child);
if (rc == -DER_NONEXIST || rc == -DER_SHUTDOWN) {
Expand All @@ -4083,6 +4094,7 @@ reint_post_cont_iter_cb(daos_handle_t ih, vos_iter_entry_t *entry,
param.ip_epr.epr_hi = DAOS_EPOCH_MAX;
param.ip_flags = VOS_IT_FOR_MIGRATION;
uuid_copy(arg->ria_co_uuid, entry->ie_couuid);
arg->ria_cont_toh = cont_toh;
rc = vos_iterate(&param, VOS_ITER_OBJ, false, &anchor,
reint_post_obj_iter_cb, NULL, arg, NULL);
if (rc)
Expand All @@ -4091,6 +4103,8 @@ reint_post_cont_iter_cb(daos_handle_t ih, vos_iter_entry_t *entry,
ds_cont_child_put(cont_child);

out:
if (daos_handle_is_valid(cont_toh))
dbtree_close(cont_toh);
if (--arg->ria_yield_cnt <= 0) {
D_DEBUG(DB_REBUILD, DF_RB " rebuild yield: %d\n", DP_RB_MPT(tls), rc);
arg->ria_yield_cnt = REINT_ITER_YIELD_CNT;
Expand Down
17 changes: 17 additions & 0 deletions src/tests/ftest/daos_test/rebuild.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
'''
(C) Copyright 2018-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
'''
Expand Down Expand Up @@ -365,3 +366,19 @@ def test_rebuild_35(self):
:avocado: tags=DaosCoreTestRebuild,daos_test,daos_core_test_rebuild,test_rebuild_35
"""
self.run_subtest()

def test_rebuild_36(self):
"""Jira ID: DAOS-14013

Test Description:
Run daos_test -r -s5 -u subtests=36

Use cases:
Core tests for daos_test rebuild

:avocado: tags=all,daily_regression
:avocado: tags=hw,medium
:avocado: tags=unittest,rebuild
:avocado: tags=DaosCoreTestRebuild,daos_test,daos_core_test_rebuild,test_rebuild_36
"""
self.run_subtest()
4 changes: 4 additions & 0 deletions src/tests/ftest/daos_test/rebuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ timeouts:
test_rebuild_33: 200
test_rebuild_34: 200
test_rebuild_35: 180
test_rebuild_36: 200
pool:
nvme_size: 0G

Expand Down Expand Up @@ -81,6 +82,7 @@ daos_tests:
test_rebuild_33: DAOS_Rebuild_33
test_rebuild_34: DAOS_Rebuild_34
test_rebuild_35: DAOS_Rebuild_35
test_rebuild_36: DAOS_Rebuild_36
daos_test:
test_rebuild_0to10: r
test_rebuild_12to15: r
Expand All @@ -104,6 +106,7 @@ daos_tests:
test_rebuild_33: r
test_rebuild_34: r
test_rebuild_35: r
test_rebuild_36: r
args:
test_rebuild_0to10: -s3 -u subtests="0-10"
test_rebuild_12to15: -s3 -u subtests="12-15"
Expand All @@ -127,6 +130,7 @@ daos_tests:
test_rebuild_33: -s5 -u subtests="33"
test_rebuild_34: -s5 -u subtests="34"
test_rebuild_35: -s5 -u subtests="35"
test_rebuild_36: -s5 -u subtests="36"
stopped_ranks:
test_rebuild_26: ["random"]
test_rebuild_27: ["random"]
Expand Down
40 changes: 40 additions & 0 deletions src/tests/suite/daos_rebuild.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -1563,6 +1564,42 @@ rebuild_cont_destroy_and_reintegrate(void **state)
reintegrate_single_pool_rank(arg, 5, true);
}

static void
rebuild_incr_reint_basic(void **state)
{
test_arg_t *arg = *state;
daos_obj_id_t oids[OBJ_NR];
daos_obj_id_t update_oids[OBJ_NR];
int rc;
int i;

if (!test_runable(arg, 6))
return;

rc = daos_pool_set_prop(arg->pool.pool_uuid, "reintegration", "incremental");
assert_rc_equal(rc, 0);
for (i = 0; i < OBJ_NR; i++) {
oids[i] = daos_test_oid_gen(arg->coh, DAOS_OC_R3S_SPEC_RANK, 0, 0, arg->myrank);
oids[i] = dts_oid_set_rank(oids[i], 5);
}

dt_no_punch = true;
rebuild_io(arg, oids, OBJ_NR);
arg->no_rebuild = 0;
rebuild_single_pool_rank(arg, 5, true);

for (i = 0; i < OBJ_NR; i++)
update_oids[i] = daos_test_oid_gen(arg->coh, OC_RP_3GX, 0, 0, arg->myrank);
rebuild_io(arg, update_oids, OBJ_NR);

reintegrate_single_pool_rank(arg, 5, true);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: what is the expectation for the global minimum stable epoch of the container when the reintegration happens. Is it expected that none of the oids data in engine rank 5 storage will be wiped during this reintegration? Or, is it expected that maybe some of rank 5 data will be wiped as part of the reintegration, because maybe it falls later than the global minimum stable epoch?

rebuild_io_verify(arg, oids, OBJ_NR);
rebuild_io_verify(arg, update_oids, OBJ_NR);

rc = daos_pool_set_prop(arg->pool.pool_uuid, "reintegration", "data_sync");
assert_rc_equal(rc, 0);
dt_no_punch = false;
}
/** create a new pool/container for each test */
static const struct CMUnitTest rebuild_tests[] = {
{"REBUILD0: drop rebuild scan reply",
Expand Down Expand Up @@ -1655,6 +1692,9 @@ static const struct CMUnitTest rebuild_tests[] = {
{"REBUILD35: destroy container then reintegrate",
rebuild_cont_destroy_and_reintegrate, rebuild_sub_6nodes_rf1_setup,
rebuild_sub_teardown},
{"REBUILD36: basic incremental reintegration",
rebuild_incr_reint_basic, rebuild_sub_6nodes_rf1_setup,
rebuild_sub_teardown},
};

/* TODO: Enable aggregation once stable view rebuild is done. */
Expand Down
6 changes: 6 additions & 0 deletions src/tests/suite/daos_rebuild_common.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -436,6 +437,11 @@ rebuild_io_obj_internal(struct ioreq *req, bool validate, int index)
int k;
int l;

if (dt_no_punch) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this intended to also skip the record punch on line 483 below under the condition (l == rec_punch_idx) ? It seems this logic will not prevent it. Maybe it is ultimately not too important if that punch_single() call is made. Just curious/asking.

akey_punch_idx = -1;
dkey_punch_idx = -1;
}

D_ALLOC(large_key, LARGE_KEY_SIZE);
if (large_key == NULL)
return -DER_NOMEM;
Expand Down
5 changes: 5 additions & 0 deletions src/tests/suite/daos_test.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2016-2023 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -49,6 +50,10 @@
/** Server crt group ID */
extern const char *server_group;

/** pool incremental reintegration */
extern int dt_incr_reint;
extern bool dt_no_punch;

/** Pool service replicas */
extern unsigned int svc_nreplicas;
extern const char *dmg_config_file;
Expand Down
23 changes: 22 additions & 1 deletion src/tests/suite/daos_test_common.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2018-2023 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -32,6 +33,10 @@ int dt_obj_class;
int dt_redun_lvl;
int dt_redun_fac;

/** pool incremental reintegration */
int dt_incr_reint;
bool dt_no_punch; /* will remove later */

/* Create or import a single pool with option to store info in arg->pool
* or an alternate caller-specified test_pool structure.
* ipool (optional): import pool: store info for an existing pool to arg->pool.
Expand Down Expand Up @@ -349,6 +354,8 @@ test_setup(void **state, unsigned int step, bool multi_rank,
daos_prop_t co_props = {0};
struct daos_prop_entry dpp_entry[6] = {0};
struct daos_prop_entry *entry;
daos_prop_t po_props = {0};
struct daos_prop_entry po_entry[1] = {0};

/* feed a seed for pseudo-random number generator */
gettimeofday(&now, NULL);
Expand Down Expand Up @@ -395,6 +402,18 @@ test_setup(void **state, unsigned int step, bool multi_rank,
arg->pool.destroyed = false;
}

/** Look at variables set by test arguments and setup pool props */
if (dt_incr_reint) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dt_incr_reint isn't set anywhere that I can see. Is this meant for future use when there might be a specific setup function for incremental reintegration test cases?

print_message("\n-------\n"
"Incremental reintegration enabled in test!"
"\n-------\n");
entry = &po_entry[po_props.dpp_nr];
entry->dpe_type = DAOS_PROP_PO_REINT_MODE;
entry->dpe_val = DAOS_REINT_MODE_INCREMENTAL;

po_props.dpp_nr++;
}

/** Look at variables set by test arguments and setup container props */
if (dt_csum_type) {
print_message("\n-------\n"
Expand Down Expand Up @@ -445,11 +464,13 @@ test_setup(void **state, unsigned int step, bool multi_rank,
co_props.dpp_nr++;
}

if (po_props.dpp_nr > 0)
po_props.dpp_entries = po_entry;
if (co_props.dpp_nr > 0)
co_props.dpp_entries = dpp_entry;

while (!rc && step != arg->setup_state)
rc = test_setup_next_step(state, pool, NULL, &co_props);
rc = test_setup_next_step(state, pool, &po_props, &co_props);

if (rc) {
D_FREE(arg);
Expand Down
Loading