From cbddbb5790753ab8962000b92b94272201f9e3c4 Mon Sep 17 00:00:00 2001 From: Chris Davis Date: Wed, 29 Jan 2025 06:07:46 +0000 Subject: [PATCH] DAOS-17001 rebuild: when self_heal is set to delay_rebuild, do not rebuild on exclude delay_rebuild mode should delay the rebuild in all scenarios and not have an exception for target exclusion. Also changed an error message to warn on shard update failure. Shard update failure is normal during a failure, and the message was too frequent. Testing: `dmg pool exclude default-pool --rank 0 --target-idx 4` while write/read workflow was running against a cluster Signed-off-by: Chris Davis --- src/object/cli_obj.c | 3 ++- src/pool/srv_pool.c | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/object/cli_obj.c b/src/object/cli_obj.c index 75d661d0665..629a5e5e120 100644 --- a/src/object/cli_obj.c +++ b/src/object/cli_obj.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -5816,7 +5817,7 @@ dc_obj_update(tse_task_t *task, struct dtx_epoch *epoch, uint32_t map_ver, rc = obj_update_shards_get(obj, args, map_ver, obj_auxi, &shard, &shard_cnt); if (rc != 0) { - D_ERROR(DF_OID" get update shards failure %d\n", DP_OID(obj->cob_md.omd_id), rc); + D_WARN(DF_OID " get update shards failure %d\n", DP_OID(obj->cob_md.omd_id), rc); D_GOTO(out_task, rc); } diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index 143d3984f46..295f1cc199a 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -1,6 +1,7 @@ /* * (C) Copyright 2016-2024 Intel Corporation. * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -7343,7 +7344,7 @@ pool_svc_update_map(struct pool_svc *svc, crt_opcode_t opc, bool exclude_rank, entry = daos_prop_entry_get(&prop, DAOS_PROP_PO_SELF_HEAL); D_ASSERT(entry != NULL); - if (!(entry->dpe_val & (DAOS_SELF_HEAL_AUTO_REBUILD | DAOS_SELF_HEAL_DELAY_REBUILD))) { + if (!(entry->dpe_val & DAOS_SELF_HEAL_AUTO_REBUILD)) { D_DEBUG(DB_MD, "self healing is disabled\n"); D_GOTO(out, rc); } @@ -7360,9 +7361,7 @@ pool_svc_update_map(struct pool_svc *svc, crt_opcode_t opc, bool exclude_rank, D_GOTO(out, rc); } - if ((entry->dpe_val & DAOS_SELF_HEAL_DELAY_REBUILD) && exclude_rank) - delay = -1; - else if (daos_fail_check(DAOS_REBUILD_DELAY)) + if (daos_fail_check(DAOS_REBUILD_DELAY)) delay = 5; D_DEBUG(DB_MD, "map ver %u/%u\n", map_version ? *map_version : -1,