diff --git a/.github/dependabot.yml b/.github/dependabot.yml index b50a280da9e..ad76bca70f4 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -5,6 +5,8 @@ updates: - /utils/ansible/ftest/ - /utils/cq/ - / + ignore: + - dependency-name: "avocado-framework*" schedule: interval: daily groups: @@ -12,6 +14,19 @@ updates: patterns: - "*" - package-ecosystem: github-actions + target-branch: master + directory: / + schedule: + interval: weekly + groups: + gha-versions: + patterns: + - "*" + assignees: + - daos-stack/actions-watchers + + - package-ecosystem: github-actions + target-branch: release/2.6 directory: / schedule: interval: weekly diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 3cfa514ee80..578a3e16f9c 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -25,7 +25,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python environment - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 with: python-version: '3' - name: Install extra python packages @@ -86,7 +86,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python environment - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 with: python-version: '3' - name: Add parser @@ -139,7 +139,7 @@ jobs: uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: ref: ${{ github.event.pull_request.head.sha }} - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 with: python-version: '3.11' - name: Install python packages @@ -194,7 +194,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python environment - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 with: python-version: '3' - name: Install extra python packages diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index 0432e9a1e89..d17fd8faa63 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -71,6 +71,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@f6091c0113d1dcf9b98e269ee48e8a7e51b7bdd4 # v3.28.5 + uses: github/codeql-action/upload-sarif@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 # v3.28.9 with: sarif_file: results.sarif diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 22d5b656b66..0101053dfd4 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -58,7 +58,7 @@ jobs: trivy-config: 'utils/trivy/trivy.yaml' - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@f6091c0113d1dcf9b98e269ee48e8a7e51b7bdd4 # v3.28.5 + uses: github/codeql-action/upload-sarif@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 # v3.28.9 with: sarif_file: 'trivy-results.sarif' diff --git a/debian/changelog b/debian/changelog index d21293f8b20..ec161b2ce74 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,14 @@ -daos (2.7.101-4) unstable; urgency=medium +daos (2.7.101-6) unstable; urgency=medium [ Cedric Koch-Hofer] * Add support of the libasan - -- Cedric Koch-Hofer Mon, 20 Jan 2025 14:12:00 -0700 + -- Cedric Koch-Hofer Wed, 12 Feb 2025 14:12:00 -0700 + +daos (2.7.101-5) unstable; urgency=medium + [ Jan Michalski ] + * Add ddb_ut and dtx_ut to the server-tests package + + -- Jan Michalski Wed, 22 Jan 2025 14:31:00 +0000 daos (2.7.101-3) unstable; urgency=medium [ Jeff Olivier ] diff --git a/debian/daos-server-tests.install b/debian/daos-server-tests.install index ae15938b219..56fb0ad8742 100644 --- a/debian/daos-server-tests.install +++ b/debian/daos-server-tests.install @@ -1,4 +1,5 @@ usr/bin/dtx_tests +usr/bin/dtx_ut usr/bin/jump_pl_map usr/bin/ring_pl_map usr/bin/evt_ctl @@ -9,6 +10,7 @@ usr/bin/bio_ut usr/bin/vea_ut usr/bin/vos_tests usr/bin/ddb_tests +usr/bin/ddb_ut usr/bin/vea_stress usr/bin/vos_perf usr/bin/obj_ctl diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index 43e92918966..34b1105397a 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -132,13 +132,14 @@ class MissingSystemLibs(Exception): component -- component that has missing targets """ - def __init__(self, component): + def __init__(self, component, prog): super().__init__() self.component = component + self.prog = prog def __str__(self): """Exception string""" - return f'{self.component} has unmet dependencies required for build' + return f"{self.component} requires {self.prog} for build" class DownloadRequired(Exception): @@ -654,7 +655,7 @@ def _setup_compiler(self): if self.__check_only: continue config.Finish() - raise MissingSystemLibs(prog) + raise MissingSystemLibs(compiler, prog) args = {name: prog} self.__env.Replace(**args) @@ -1480,7 +1481,7 @@ def build(self, env, needed_libs): if build_dep: if self._has_missing_system_deps(self.prereqs.system_env): - raise MissingSystemLibs(self.name) + raise MissingSystemLibs(self.name, self.required_progs) self.get() diff --git a/src/cart/README.env b/src/cart/README.env index 6bad14ad41e..8a934574cf4 100644 --- a/src/cart/README.env +++ b/src/cart/README.env @@ -213,3 +213,7 @@ This file lists the environment variables used in CaRT. . CRT_CXI_INIT_RETRY Retry count for HG_Init_opt2() when initializing the CXI provider (default = 3). + + D_PROGRESS_BUSY + Force busy polling when progressing, preventing from sleeping when waiting for + new messages. diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index f1dd6a3f2c3..71caf39c34a 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -845,7 +845,7 @@ crt_hg_class_init(crt_provider_t provider, int ctx_idx, bool primary, int iface_ init_info.na_init_info.auth_key = prov_data->cpg_na_config.noc_auth_key; - if (crt_provider_is_block_mode(provider)) + if (crt_provider_is_block_mode(provider) && !prov_data->cpg_progress_busy) init_info.na_init_info.progress_mode = 0; else init_info.na_init_info.progress_mode = NA_NO_BLOCK; diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index d66d99cd65c..56b24503482 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -94,6 +94,8 @@ dump_opt(crt_init_options_t *opt) D_INFO("auth_key is set\n"); if (opt->cio_thread_mode_single) D_INFO("thread mode single is set\n"); + if (opt->cio_progress_busy) + D_INFO("progress busy mode is set\n"); } static int @@ -199,6 +201,14 @@ prov_data_init(struct crt_prov_gdata *prov_data, crt_provider_t provider, bool p prov_data->cpg_max_unexp_size = max_unexpect_size; prov_data->cpg_primary = primary; + if (opt && opt->cio_progress_busy) { + prov_data->cpg_progress_busy = opt->cio_progress_busy; + } else { + bool progress_busy = false; + crt_env_get(D_PROGRESS_BUSY, &progress_busy); + prov_data->cpg_progress_busy = progress_busy; + } + for (i = 0; i < CRT_SRV_CONTEXT_NUM; i++) prov_data->cpg_used_idx[i] = false; diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index d35148c2bfe..0e5cdae571c 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -56,38 +56,39 @@ enum crt_traffic_class { CRT_TRAFFIC_CLASSES }; struct crt_prov_gdata { /** NA plugin type */ - int cpg_provider; + int cpg_provider; - struct crt_na_config cpg_na_config; + struct crt_na_config cpg_na_config; /** Context0 URI */ - char cpg_addr[CRT_ADDR_STR_MAX_LEN]; + char cpg_addr[CRT_ADDR_STR_MAX_LEN]; /** CaRT contexts list */ - d_list_t cpg_ctx_list; + d_list_t cpg_ctx_list; /** actual number of items in CaRT contexts list */ - int cpg_ctx_num; + int cpg_ctx_num; /** maximum number of contexts user wants to create */ - uint32_t cpg_ctx_max_num; + uint32_t cpg_ctx_max_num; /** free-list of indices */ - bool cpg_used_idx[CRT_SRV_CONTEXT_NUM]; + bool cpg_used_idx[CRT_SRV_CONTEXT_NUM]; /** Hints to mercury/ofi for max expected/unexp sizes */ - uint32_t cpg_max_exp_size; - uint32_t cpg_max_unexp_size; + uint32_t cpg_max_exp_size; + uint32_t cpg_max_unexp_size; /** Number of remote tags */ - uint32_t cpg_num_remote_tags; - uint32_t cpg_last_remote_tag; + uint32_t cpg_num_remote_tags; + uint32_t cpg_last_remote_tag; /** Set of flags */ - unsigned int cpg_sep_mode : 1, - cpg_primary : 1, - cpg_contig_ports : 1, - cpg_inited : 1; + bool cpg_sep_mode; + bool cpg_primary; + bool cpg_contig_ports; + bool cpg_inited; + bool cpg_progress_busy; /** Mutext to protect fields above */ - pthread_mutex_t cpg_mutex; + pthread_mutex_t cpg_mutex; }; #define MAX_NUM_SECONDARY_PROVS 2 @@ -230,6 +231,7 @@ struct crt_event_cb_priv { ENV_STR(D_PORT) \ ENV(D_PORT_AUTO_ADJUST) \ ENV(D_THREAD_MODE_SINGLE) \ + ENV(D_PROGRESS_BUSY) \ ENV(D_POST_INCR) \ ENV(D_POST_INIT) \ ENV(D_MRECV_BUF) \ diff --git a/src/client/api/init.c b/src/client/api/init.c index 4c6ab9dd186..de45e51b4c5 100644 --- a/src/client/api/init.c +++ b/src/client/api/init.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -270,6 +271,7 @@ daos_init(void) if (rc != 0) D_GOTO(out_obj, rc); #endif + daos_array_env_init(); module_initialized++; D_GOTO(unlock, rc = 0); diff --git a/src/client/array/dc_array.c b/src/client/array/dc_array.c index 06201d267db..f089579b9bf 100644 --- a/src/client/array/dc_array.c +++ b/src/client/array/dc_array.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -69,6 +70,24 @@ struct io_params { char akey_val; }; +unsigned int array_list_io_limit; + +void +daos_array_env_init() +{ + array_list_io_limit = DAOS_ARRAY_LIST_IO_LIMIT; + d_getenv_uint("DAOS_ARRAY_LIST_IO_LIMIT", &array_list_io_limit); + if (array_list_io_limit == 0) { + array_list_io_limit = UINT_MAX; + } + if (array_list_io_limit > DAOS_ARRAY_LIST_IO_LIMIT) { + D_WARN("Setting a high limit for list io descriptors (%u) is not recommended\n", + array_list_io_limit); + } else { + D_DEBUG(DB_TRACE, "ARRAY List IO limit = %u\n", array_list_io_limit); + } +} + static void array_free(struct d_hlink *hlink) { @@ -1436,6 +1455,38 @@ dc_array_io(daos_handle_t array_oh, daos_handle_t th, D_GOTO(err_task, rc = -DER_INVAL); } + /* + * If we are above the limit, check for small recx size. Just a best effort check for + * extreme cases to reject. + */ + if (rg_iod->arr_nr > array_list_io_limit) { + daos_size_t i; + daos_size_t tiny_count = 0; + + /* quick shortcut check */ + for (i = 0; i < rg_iod->arr_nr; i = i * 2) { + if (rg_iod->arr_rgs[i].rg_len > DAOS_ARRAY_RG_LEN_THD) + break; + if (i == 0) + i++; + } + + /** Full check if quick check fails */ + if (i >= rg_iod->arr_nr) { + for (i = 0; i < rg_iod->arr_nr; i++) { + if (rg_iod->arr_rgs[i].rg_len <= DAOS_ARRAY_RG_LEN_THD) + tiny_count++; + if (tiny_count > array_list_io_limit) + break; + } + if (tiny_count > array_list_io_limit) { + D_ERROR("List io supports a max of %u offsets (using %zu)", + array_list_io_limit, rg_iod->arr_nr); + D_GOTO(err_task, rc = -DER_NOTSUPPORTED); + } + } + } + array = array_hdl2ptr(array_oh); if (array == NULL) { D_ERROR("Invalid array handle: "DF_RC"\n", DP_RC(-DER_NO_HDL)); diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index 0d98cae5aa2..ff41943abc9 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -6129,14 +6130,18 @@ futimens(int fd, const struct timespec times[2]) static int new_fcntl(int fd, int cmd, ...) { - int fd_directed, param, OrgFunc = 1; + int fd_directed, OrgFunc = 1; int next_dirfd, next_fd, rc; + void *param; va_list arg; va_start(arg, cmd); - param = va_arg(arg, int); + param = va_arg(arg, void *); va_end(arg); + if (!d_hook_enabled) + return libc_fcntl(fd, cmd, param); + if (fd < FD_FILE_BASE && d_compatible_mode) return libc_fcntl(fd, cmd, param); @@ -6155,9 +6160,6 @@ new_fcntl(int fd, int cmd, ...) case F_ADD_SEALS: fd_directed = d_get_fd_redirected(fd); - if (!d_hook_enabled) - return libc_fcntl(fd, cmd, param); - if (cmd == F_GETFL) { if (fd_directed >= FD_DIR_BASE) return dir_list[fd_directed - FD_DIR_BASE]->open_flag; @@ -6206,12 +6208,15 @@ new_fcntl(int fd, int cmd, ...) case F_OFD_GETLK: case F_GETOWN_EX: case F_SETOWN_EX: - if (!d_hook_enabled) + fd_directed = d_get_fd_redirected(fd); + if (fd_directed >= FD_FILE_BASE) { + errno = ENOTSUP; + return (-1); + } else { return libc_fcntl(fd, cmd, param); - - return libc_fcntl(fd, cmd, param); + } default: - return libc_fcntl(fd, cmd); + return libc_fcntl(fd, cmd, param); } } diff --git a/src/common/tests/btree.c b/src/common/tests/btree.c index 69a595d6109..c60c0bdebc8 100644 --- a/src/common/tests/btree.c +++ b/src/common/tests/btree.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -62,9 +63,10 @@ struct ik_rec { umem_off_t ir_val_off; }; -static char **test_group_args; -static int test_group_start; -static int test_group_stop; +struct btr_test_state { + int argc; + char **argv; +}; #define IK_TREE_CLASS 100 #define POOL_NAME "/mnt/daos/btree-test" @@ -834,7 +836,6 @@ ik_btr_perf(void **state) D_FREE(arr); } - static void ik_btr_drain(void **state) { @@ -884,52 +885,98 @@ ik_btr_drain(void **state) D_FREE(arr); } -static struct option btr_ops[] = { - { "create", required_argument, NULL, 'C' }, - { "destroy", no_argument, NULL, 'D' }, - { "drain", no_argument, NULL, 'e' }, - { "open", no_argument, NULL, 'o' }, - { "close", no_argument, NULL, 'c' }, - { "update", required_argument, NULL, 'u' }, - { "find", required_argument, NULL, 'f' }, - { "dyn_tree", no_argument, NULL, 't' }, - { "delete", required_argument, NULL, 'd' }, - { "del_retain", required_argument, NULL, 'r' }, - { "query", no_argument, NULL, 'q' }, - { "iterate", required_argument, NULL, 'i' }, - { "batch", required_argument, NULL, 'b' }, - { "perf", required_argument, NULL, 'p' }, - { NULL, 0, NULL, 0 }, -}; - -static int -use_pmem() { +static void +ik_btr_memory_register(char *pmem_flag) +{ + int rc = 0; + if (pmem_flag && *pmem_flag != 'p') { + fail_msg("Invalid value of -M parameter: %c\n", *pmem_flag); + } + if (pmem_flag) { + D_PRINT("Using pmem\n"); + rc = utest_pmem_create(POOL_NAME, POOL_SIZE, sizeof(*ik_root), NULL, &ik_utx); + if (rc) { + fail_msg("Cannot setup pmem: %d\n", rc); + } + } else { + D_PRINT("Using vmem\n"); + rc = utest_vmem_create(sizeof(*ik_root), &ik_utx); + if (rc) { + fail_msg("Cannot setup vmem: %d\n", rc); + } + } - int rc; + ik_root = utest_utx2root(ik_utx); + ik_uma = utest_utx2uma(ik_utx); +} - D_PRINT("Using pmem\n"); - rc = utest_pmem_create(POOL_NAME, POOL_SIZE, - sizeof(*ik_root), NULL, - &ik_utx); - D_ASSERT(rc == 0); - return rc; +static void +ik_btr_class_register(char *dynamic_flag) +{ + if (dynamic_flag && *dynamic_flag != 'd') { + fail_msg("Invalid value of -R parameter: %c\n", *dynamic_flag); + } + if (dynamic_flag) { + D_PRINT("Using dynamic tree order\n"); + } + int rc = dbtree_class_register(IK_TREE_CLASS, + (dynamic_flag ? BTR_FEAT_DYNAMIC_ROOT : 0) | + BTR_FEAT_EMBED_FIRST | BTR_FEAT_UINT_KEY, + &ik_ops); + if (rc) { + fail_msg("Cannot register memory class: %d\n", rc); + } } +static struct option btr_ops[] = { + {"start-test", required_argument, NULL, 'S'}, + {"reg-class", optional_argument, NULL, 'R'}, + {"reg-memory", optional_argument, NULL, 'M'}, + {"create", required_argument, NULL, 'C'}, + {"destroy", no_argument, NULL, 'D'}, + {"drain", no_argument, NULL, 'e'}, + {"open", no_argument, NULL, 'o'}, + {"close", no_argument, NULL, 'c'}, + {"update", required_argument, NULL, 'u'}, + {"find", required_argument, NULL, 'f'}, + {"delete", required_argument, NULL, 'd'}, + {"del_retain", required_argument, NULL, 'r'}, + {"query", no_argument, NULL, 'q'}, + {"iterate", required_argument, NULL, 'i'}, + {"batch", required_argument, NULL, 'b'}, + {"perf", required_argument, NULL, 'p'}, + {NULL, 0, NULL, 0}, +}; + +#define BTR_SHORTOPTS "+S:R::M::C:Deocqu:f:d:r:qi:b:p:" + +/** + * Execute test based on the given sequence of steps. + * -S/--start-test option is ignored + * -R and -M options must be provided before -C option. + * Each argument of the -R and -M options must be specified without a space between them (e.g. -Rd). + */ static void ts_group(void **state) { + struct btr_test_state *test_state = (struct btr_test_state *)*state; + int opt = 0; + void **st = NULL; - int opt = 0; - void **st = NULL; - - while ((opt = getopt_long(test_group_stop-test_group_start+1, - test_group_args+test_group_start, - "tmC:Deocqu:d:r:f:i:b:p:", - btr_ops, + while ((opt = getopt_long(test_state->argc, test_state->argv, BTR_SHORTOPTS, btr_ops, NULL)) != -1) { tst_fn_val.optval = optarg; tst_fn_val.input = true; switch (opt) { + case 'S': + /* not part of the test sequence */ + break; + case 'R': + ik_btr_class_register(tst_fn_val.optval); + break; + case 'M': + ik_btr_memory_register(tst_fn_val.optval); + break; case 'C': ik_btr_open_create(st); break; @@ -978,27 +1025,18 @@ ts_group(void **state) { ik_btr_perf(st); break; default: - D_PRINT("Unsupported command %c\n", opt); - case 'm': - case 't': - /* handled previously */ - break; + fail_msg("Unsupported command %c\n", opt); } } } static int -run_cmd_line_test(char *test_name, char **args, int start_idx, int stop_idx) +run_cmd_line_test(char *test_name, struct btr_test_state *initial_state) { - const struct CMUnitTest btree_test[] = { - {test_name, ts_group, NULL, NULL}, + {test_name, ts_group, NULL, NULL, initial_state}, }; - test_group_args = args; - test_group_start = start_idx; - test_group_stop = stop_idx; - return cmocka_run_group_tests_name(test_name, btree_test, NULL, @@ -1011,82 +1049,57 @@ main(int argc, char **argv) struct timeval tv; int rc = 0; int opt; - int dynamic_flag = 0; - int start_idx; - char *test_name; - int stop_idx; + char *test_name = NULL; + struct btr_test_state initial_state = {.argc = argc, .argv = argv}; d_register_alt_assert(mock_assert); - gettimeofday(&tv, NULL); - srand(tv.tv_usec); - - ik_toh = DAOS_HDL_INVAL; - ik_root_off = UMOFF_NULL; - - rc = daos_debug_init(DAOS_LOG_DEFAULT); - if (rc != 0) - return rc; - if (argc == 1) { - print_message("Invalid format.\n"); + print_message("No parameters provided.\n"); return -1; } - stop_idx = argc-1; - if (strcmp(argv[1], "--start-test") == 0) { - start_idx = 2; - test_name = argv[2]; - if (strcmp(argv[3], "-t") == 0) { - D_PRINT("Using dynamic tree order\n"); - dynamic_flag = BTR_FEAT_DYNAMIC_ROOT; - if (strcmp(argv[4], "-m") == 0) - rc = use_pmem(); - } else if (strcmp(argv[3], "-m") == 0) { - rc = use_pmem(); - if (strcmp(argv[4], "-t") == 0) { - D_PRINT("Using dynamic tree order\n"); - dynamic_flag = BTR_FEAT_DYNAMIC_ROOT; - } - } - } else { - start_idx = 0; - test_name = "Btree testing tool"; - optind = 0; - /* Check for -m option first */ - while ((opt = getopt_long(argc, argv, "tmC:Deocqu:d:r:f:i:b:p:", - btr_ops, NULL)) != -1) { - if (opt == 'm') { - rc = use_pmem(); - break; - } - if (opt == 't') { - D_PRINT("Using dynamic tree order\n"); - dynamic_flag = BTR_FEAT_DYNAMIC_ROOT; - } + /* Check for --start-test parameter and verify that all parameters are in place */ + while ((opt = getopt_long(argc, argv, BTR_SHORTOPTS, btr_ops, NULL)) != -1) { + if (opt == 'S') { + test_name = optarg; + } else if (opt == '?') { + break; } } + if (opt == '?') { + /* invalid option - error message printed on stderr already */ + return -1; + } else if (argc != optind) { + fail_msg("Cannot interpret parameter: \"%s\" at optind: %d.\n", argv[optind], + optind); + } - rc = dbtree_class_register( - IK_TREE_CLASS, dynamic_flag | BTR_FEAT_EMBED_FIRST | BTR_FEAT_UINT_KEY, &ik_ops); - D_ASSERT(rc == 0); + /* getopt_long start over */ + optind = 1; - if (ik_utx == NULL) { - D_PRINT("Using vmem\n"); - rc = utest_vmem_create(sizeof(*ik_root), &ik_utx); - D_ASSERT(rc == 0); + if (test_name == NULL) { + test_name = "Btree testing tool"; } - ik_root = utest_utx2root(ik_utx); - ik_uma = utest_utx2uma(ik_utx); + gettimeofday(&tv, NULL); + srand(tv.tv_usec); - /* start over */ - optind = 0; - rc = run_cmd_line_test(test_name, argv, start_idx, stop_idx); + ik_toh = DAOS_HDL_INVAL; + ik_root_off = UMOFF_NULL; + + rc = daos_debug_init(DAOS_LOG_DEFAULT); + if (rc != 0) { + fail_msg("daos_debug_init() failed: %d\n", rc); + } + rc = run_cmd_line_test(test_name, &initial_state); daos_debug_fini(); - rc += utest_utx_destroy(ik_utx); - if (rc != 0) - printf("Error: %d\n", rc); + if (ik_utx) { + rc += utest_utx_destroy(ik_utx); + } + if (rc != 0) { + fail_msg("Error: %d\n", rc); + } - return rc; + return 0; } diff --git a/src/common/tests/btree.sh b/src/common/tests/btree.sh index 03a35d47f9e..1bc85b21ccb 100755 --- a/src/common/tests/btree.sh +++ b/src/common/tests/btree.sh @@ -31,6 +31,7 @@ Usage: btree.sh [OPTIONS] -s [num] Run with num keys dyn Run with dynamic root ukey Use integer keys + emb Use embedded value perf Run performance tests direct Use direct string key EOF @@ -39,6 +40,7 @@ EOF PERF="" UINT="" +DYN="" test_conf_pre="" while [ $# -gt 0 ]; do case "$1" in @@ -53,7 +55,7 @@ while [ $# -gt 0 ]; do test_conf_pre="${test_conf_pre} keys=${BAT_NUM}" ;; dyn) - DYN="-t" + DYN="d" shift test_conf_pre="${test_conf_pre} dyn" ;; @@ -107,9 +109,9 @@ run_test() echo "B+tree functional test..." DAOS_DEBUG="$DDEBUG" \ - eval "${VCMD}" "$BTR" --start-test \ - "btree functional ${test_conf_pre} ${test_conf} iterate=${IDIR}" \ - "${DYN}" "${PMEM}" -C "${UINT}${IPL}o:$ORDER" \ + eval "${VCMD}" "$BTR" \ + --start-test "'btree functional ${test_conf_pre} ${test_conf} iterate=${IDIR}'" \ + -R"${DYN}" -M"${PMEM}" -C "${UINT}${IPL}o:$ORDER" \ -c \ -o \ -u "$RECORDS" \ @@ -128,8 +130,8 @@ run_test() echo "B+tree batch operations test..." eval "${VCMD}" "$BTR" \ - --start-test "btree batch operations ${test_conf_pre} ${test_conf}" \ - "${DYN}" "${PMEM}" -C "${UINT}${IPL}o:$ORDER" \ + --start-test "'btree batch operations ${test_conf_pre} ${test_conf}'" \ + -R"${DYN}" -M"${PMEM}" -C "${UINT}${IPL}o:$ORDER" \ -c \ -o \ -b "$BAT_NUM" \ @@ -137,15 +139,15 @@ run_test() echo "B+tree drain test..." eval "${VCMD}" "$BTR" \ - --start-test "btree drain ${test_conf_pre} ${test_conf}" \ - "${DYN}" "${PMEM}" -C "${UINT}${IPL}o:$ORDER" \ + --start-test "'btree drain ${test_conf_pre} ${test_conf}'" \ + -R"${DYN}" -M"${PMEM}" -C "${UINT}${IPL}o:$ORDER" \ -e -D else echo "B+tree performance test..." eval "${VCMD}" "$BTR" \ - --start-test "btree performance ${test_conf_pre} ${test_conf}" \ - "${DYN}" "${PMEM}" -C "${UINT}${IPL}o:$ORDER" \ + --start-test "'btree performance ${test_conf_pre} ${test_conf}'" \ + -R"${DYN}" -M"${PMEM}" -C "${UINT}${IPL}o:$ORDER" \ -p "$BAT_NUM" \ -D fi @@ -153,7 +155,7 @@ run_test() for IPL in "i," ""; do for IDIR in "f" "b"; do - for PMEM in "-m" ""; do + for PMEM in "p" ""; do run_test done done diff --git a/src/common/tests/btree_direct.c b/src/common/tests/btree_direct.c index 8fb3fa8b6fd..d399bbd1681 100644 --- a/src/common/tests/btree_direct.c +++ b/src/common/tests/btree_direct.c @@ -1,12 +1,12 @@ /** * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ #define D_LOGFAC DD_FAC(tests) #include -#include #include #include #include @@ -25,10 +25,6 @@ #include #include "utest_common.h" -static char **test_group_args; -static int test_group_start; -static int test_group_stop; - enum sk_btr_opc { BTR_OPC_UPDATE, BTR_OPC_LOOKUP, @@ -53,6 +49,11 @@ struct sk_rec { char sr_key[0]; }; +struct btr_test_state { + int argc; + char **argv; +}; + #define SK_TREE_CLASS 100 #define POOL_NAME "/mnt/daos/btree-direct-test" #define POOL_SIZE ((1024 * 1024 * 1024ULL)) @@ -285,15 +286,15 @@ sk_rec_stat(struct btr_instance *tins, struct btr_record *rec, } static btr_ops_t sk_ops = { - .to_key_cmp = sk_key_cmp, - .to_key_encode = sk_key_encode, - .to_key_decode = sk_key_decode, - .to_rec_alloc = sk_rec_alloc, - .to_rec_free = sk_rec_free, - .to_rec_fetch = sk_rec_fetch, - .to_rec_update = sk_rec_update, - .to_rec_string = sk_rec_string, - .to_rec_stat = sk_rec_stat, + .to_key_encode = sk_key_encode, + .to_key_decode = sk_key_decode, + .to_key_cmp = sk_key_cmp, + .to_rec_alloc = sk_rec_alloc, + .to_rec_free = sk_rec_free, + .to_rec_fetch = sk_rec_fetch, + .to_rec_update = sk_rec_update, + .to_rec_string = sk_rec_string, + .to_rec_stat = sk_rec_stat, }; #define SK_SEP ',' @@ -1026,37 +1027,87 @@ sk_btr_perf(void **state) D_FREE(kv); } +static void +sk_btr_memory_register(char *pmem_flag) +{ + int rc = 0; + if (pmem_flag && *pmem_flag != 'p') { + fail_msg("Invalid value of -M parameter: %c\n", *pmem_flag); + } + if (pmem_flag) { + D_PRINT("Using pmem\n"); + rc = utest_pmem_create(POOL_NAME, POOL_SIZE, sizeof(*sk_root), NULL, &sk_utx); + if (rc) { + fail_msg("Cannot setup pmem: %d\n", rc); + } + } else { + D_PRINT("Using vmem\n"); + rc = utest_vmem_create(sizeof(*sk_root), &sk_utx); + if (rc) { + fail_msg("Cannot setup vmem: %d\n", rc); + } + } + + sk_root = utest_utx2root(sk_utx); + sk_uma = utest_utx2uma(sk_utx); +} +static void +sk_btr_class_register() +{ + D_PRINT("Using dynamic tree order\n"); + int rc = dbtree_class_register(SK_TREE_CLASS, BTR_FEAT_EMBED_FIRST | BTR_FEAT_DIRECT_KEY, + &sk_ops); + if (rc) { + fail_msg("Cannot register memory class: %d\n", rc); + } +} + static struct option btr_ops[] = { - { "create", required_argument, NULL, 'C' }, - { "destroy", no_argument, NULL, 'D' }, - { "open", no_argument, NULL, 'o' }, - { "close", no_argument, NULL, 'c' }, - { "update", required_argument, NULL, 'u' }, - { "find", required_argument, NULL, 'f' }, - { "delete", required_argument, NULL, 'd' }, - { "del_retain", required_argument, NULL, 'r' }, - { "query", no_argument, NULL, 'q' }, - { "iterate", required_argument, NULL, 'i' }, - { "batch", required_argument, NULL, 'b' }, - { "perf", required_argument, NULL, 'p' }, - { NULL, 0, NULL, 0 }, + {"start-test", required_argument, NULL, 'S'}, + {"reg-class", no_argument, NULL, 'R'}, + {"reg-memory", optional_argument, NULL, 'M'}, + {"create", required_argument, NULL, 'C'}, + {"destroy", no_argument, NULL, 'D'}, + {"drain", no_argument, NULL, 'e'}, + {"open", no_argument, NULL, 'o'}, + {"close", no_argument, NULL, 'c'}, + {"update", required_argument, NULL, 'u'}, + {"find", required_argument, NULL, 'f'}, + {"delete", required_argument, NULL, 'd'}, + {"del_retain", required_argument, NULL, 'r'}, + {"query", no_argument, NULL, 'q'}, + {"iterate", required_argument, NULL, 'i'}, + {"batch", required_argument, NULL, 'b'}, + {"perf", required_argument, NULL, 'p'}, + {NULL, 0, NULL, 0}, }; +#define BTR_SHORTOPTS "+S:RM::C:Deocqu:f:d:r:qi:b:p:" + static void ts_group(void **state) { - + struct btr_test_state *test_state = (struct btr_test_state *)*state; int opt = 0; void **st = NULL; D_PRINT("--------------------------------------\n"); - while ((opt = getopt_long(test_group_stop-test_group_start+1, - test_group_args+test_group_start, - "mC:Docqu:d:r:f:i:b:p:", - btr_ops, + while ((opt = getopt_long(test_state->argc, test_state->argv, BTR_SHORTOPTS, btr_ops, NULL)) != -1) { tst_fn_val.optval = optarg; tst_fn_val.input = true; + switch (opt) { + case 'S': + /* not part of the test sequence */ + break; + case 'R': + sk_btr_class_register(); + break; + + case 'M': + sk_btr_memory_register(tst_fn_val.optval); + break; + case 'C': sk_btr_open_create(st); break; @@ -1072,6 +1123,10 @@ ts_group(void **state) { tst_fn_val.input = false; sk_btr_close_destroy(st); break; + case 'e': + /* not supported by btree_direct, + but keep here for compatibility with btree.c */ + break; case 'q': sk_btr_query(st); break; @@ -1101,104 +1156,79 @@ ts_group(void **state) { sk_btr_perf(st); break; default: - D_PRINT("Unsupported command %c\n", opt); - case 'm': - /* already handled */ - break; + fail_msg("Unsupported command %c\n", opt); } D_PRINT("--------------------------------------\n"); } } static int -run_cmd_line_test(char *test_name, char **args, int start_idx, int stop_idx) +run_cmd_line_test(char *test_name, struct btr_test_state *initial_state) { const struct CMUnitTest btree_test[] = { - {test_name, ts_group, NULL, NULL}, + {test_name, ts_group, NULL, NULL, initial_state}, }; - test_group_args = args; - test_group_start = start_idx; - test_group_stop = stop_idx; - - return cmocka_run_group_tests_name(test_name, - btree_test, - NULL, - NULL); - + return cmocka_run_group_tests_name(test_name, btree_test, NULL, NULL); } int main(int argc, char **argv) { struct timeval tv; + int rc = 0; int opt; - int rc; - int start_idx; - char *test_name; - int stop_idx; + char *test_name = NULL; + struct btr_test_state initial_state = {.argc = argc, .argv = argv}; d_register_alt_assert(mock_assert); - gettimeofday(&tv, NULL); - srand(tv.tv_usec); - - sk_toh = DAOS_HDL_INVAL; - sk_root_off = UMOFF_NULL; - - rc = daos_debug_init(DAOS_LOG_DEFAULT); - if (rc != 0) - return rc; + if (argc == 1) { + print_message("No parameters provided.\n"); + return -1; + } - rc = dbtree_class_register(SK_TREE_CLASS, BTR_FEAT_EMBED_FIRST | BTR_FEAT_DIRECT_KEY, - &sk_ops); - D_ASSERT(rc == 0); - - stop_idx = argc-1; - if (strcmp(argv[1], "--start-test") == 0) { - start_idx = 2; - test_name = argv[2]; - if (strcmp(argv[3], "-m") == 0) { - D_PRINT("Using pmem\n"); - rc = utest_pmem_create(POOL_NAME, POOL_SIZE, - sizeof(*sk_root), NULL, &sk_utx); - D_ASSERT(rc == 0); - } - } else { - start_idx = 0; - test_name = "Btree testing tool"; - optind = 0; - /* Check for -m option first */ - while ((opt = getopt_long(argc, argv, "mC:Docqu:d:r:f:i:b:p:", - btr_ops, NULL)) != -1) { - if (opt == 'm') { - D_PRINT("Using pmem\n"); - rc = utest_pmem_create(POOL_NAME, POOL_SIZE, - sizeof(*sk_root), NULL, - &sk_utx); - D_ASSERT(rc == 0); - break; - } + /* Check for --start-test parameter and verify that all parameters are in place */ + while ((opt = getopt_long(argc, argv, BTR_SHORTOPTS, btr_ops, NULL)) != -1) { + if (opt == 'S') { + test_name = optarg; + } else if (opt == '?') { + break; } } + if (opt == '?') { + /* invalid option - error message printed on stderr already */ + return -1; + } else if (argc != optind) { + fail_msg("Cannot interpret parameter: \"%s\" at optind: %d.\n", argv[optind], + optind); + } - if (sk_utx == NULL) { - D_PRINT("Using vmem\n"); - rc = utest_vmem_create(sizeof(*sk_root), &sk_utx); - D_ASSERT(rc == 0); + /* getopt_long start over */ + optind = 1; + + if (test_name == NULL) { + test_name = "Btree testing tool"; } - sk_root = utest_utx2root(sk_utx); - sk_uma = utest_utx2uma(sk_utx); + gettimeofday(&tv, NULL); + srand(tv.tv_usec); - /* start over */ - optind = 0; - rc = run_cmd_line_test(test_name, argv, start_idx, stop_idx); + sk_toh = DAOS_HDL_INVAL; + sk_root_off = UMOFF_NULL; + rc = daos_debug_init(DAOS_LOG_DEFAULT); + if (rc != 0) { + fail_msg("daos_debug_init() failed: %d\n", rc); + } + rc = run_cmd_line_test(test_name, &initial_state); daos_debug_fini(); - rc += utest_utx_destroy(sk_utx); - if (rc != 0) - printf("Error: %d\n", rc); + if (sk_utx) { + rc += utest_utx_destroy(sk_utx); + } + if (rc != 0) { + fail_msg("Error: %d\n", rc); + } - return rc; + return 0; } diff --git a/src/container/srv_container.c b/src/container/srv_container.c index 8d8a15cc820..f06d7f0a144 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -1863,9 +1863,9 @@ ds_cont_tgt_refresh_track_eph(uuid_t pool_uuid, uuid_t cont_uuid, arg.tgt_status[i] = tgts[i].ta_comp.co_status; ds_pool_put(pool); - rc = ds_pool_task_collective(pool_uuid, PO_COMP_ST_NEW | PO_COMP_ST_DOWN | - PO_COMP_ST_DOWNOUT, cont_refresh_track_eph_one, - &arg, DSS_ULT_FL_PERIODIC); + rc = ds_pool_thread_collective(pool_uuid, + PO_COMP_ST_NEW | PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT, + cont_refresh_track_eph_one, &arg, DSS_ULT_FL_PERIODIC); out: if (arg.tgt_status != NULL && arg.tgt_status != arg.tgt_status_inline) diff --git a/src/control/cmd/daos/attribute.go b/src/control/cmd/daos/attribute.go index 9514dad70f0..677d2beaebe 100644 --- a/src/control/cmd/daos/attribute.go +++ b/src/control/cmd/daos/attribute.go @@ -1,5 +1,6 @@ // -// (C) Copyright 2018-2021 Intel Corporation. +// (C) Copyright 2018-2024 Intel Corporation. +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -7,13 +8,17 @@ package main import ( + "context" "fmt" - "io" + "strings" "unsafe" "github.com/pkg/errors" - "github.com/daos-stack/daos/src/control/lib/txtfmt" + "github.com/daos-stack/daos/src/control/cmd/daos/pretty" + "github.com/daos-stack/daos/src/control/common/cmdutil" + "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/logging" ) /* @@ -21,75 +26,137 @@ import ( */ import "C" +type attrType int + +const ( + poolAttr attrType = iota + contAttr +) + +func (at attrType) String() string { + switch at { + case poolAttr: + return "pool" + case contAttr: + return "container" + default: + return "unknown" + } +} + type ( - attribute struct { - Name string `json:"name"` - Value []byte `json:"value,omitempty"` + attrCmd interface { + MustLogCtx() context.Context + cmdutil.JSONOutputter + logging.Logger } - attrList []*attribute + attrListerGetter interface { + ListAttributes(context.Context) ([]string, error) + GetAttributes(context.Context, ...string) (daos.AttributeList, error) + } + + attrSetter interface { + SetAttributes(context.Context, ...*daos.Attribute) error + } + + attrDeleter interface { + DeleteAttributes(context.Context, ...string) error + } ) -func (al attrList) asMap() map[string][]byte { - m := make(map[string][]byte) - for _, a := range al { - m[a.Name] = a.Value +func listAttributes(cmd attrCmd, alg attrListerGetter, at attrType, id string, verbose bool) error { + var attrs daos.AttributeList + if !verbose { + attrNames, err := alg.ListAttributes(cmd.MustLogCtx()) + if err != nil { + return errors.Wrapf(err, "failed to list attributes for %s %s", at, id) + } + attrs = attrListFromNames(attrNames) + } else { + var err error + attrs, err = alg.GetAttributes(cmd.MustLogCtx()) + if err != nil { + return errors.Wrapf(err, "failed to get attributes for %s %s", at, id) + } } - return m -} -func (al attrList) asList() []string { - names := make([]string, len(al)) - for i, a := range al { - names[i] = a.Name + if cmd.JSONOutputEnabled() { + if verbose { + return cmd.OutputJSON(attrs.AsMap(), nil) + } + return cmd.OutputJSON(attrs.AsList(), nil) } - return names + + var bld strings.Builder + title := fmt.Sprintf("Attributes for %s %s:", at, id) + pretty.PrintAttributes(&bld, title, attrs...) + + cmd.Info(bld.String()) + + return nil } -func printAttributes(out io.Writer, header string, attrs ...*attribute) { - fmt.Fprintf(out, "%s\n", header) +func getAttributes(cmd attrCmd, alg attrListerGetter, at attrType, id string, names ...string) error { + attrs, err := alg.GetAttributes(cmd.MustLogCtx(), names...) + if err != nil { + return errors.Wrapf(err, "failed to get attributes for %s %s", at, id) + } - if len(attrs) == 0 { - fmt.Fprintln(out, " No attributes found.") - return - } - - nameTitle := "Name" - valueTitle := "Value" - titles := []string{nameTitle} - - table := []txtfmt.TableRow{} - for _, attr := range attrs { - row := txtfmt.TableRow{} - row[nameTitle] = attr.Name - if len(attr.Value) != 0 { - row[valueTitle] = string(attr.Value) - if len(titles) == 1 { - titles = append(titles, valueTitle) - } + if cmd.JSONOutputEnabled() { + // Maintain compatibility with older behavior. + if len(names) == 1 && len(attrs) == 1 { + return cmd.OutputJSON(attrs[0], nil) } - table = append(table, row) + return cmd.OutputJSON(attrs, nil) } - tf := txtfmt.NewTableFormatter(titles...) - tf.InitWriter(out) - tf.Format(table) + var bld strings.Builder + title := fmt.Sprintf("Attributes for %s %s:", at, id) + pretty.PrintAttributes(&bld, title, attrs...) + + cmd.Info(bld.String()) + + return nil } -type attrType int +func setAttributes(cmd attrCmd, as attrSetter, at attrType, id string, attrMap map[string]string) error { + if len(attrMap) == 0 { + return errors.New("attribute name and value are required") + } -const ( - poolAttr attrType = iota - contAttr -) + attrs := make(daos.AttributeList, 0, len(attrMap)) + for key, val := range attrMap { + attrs = append(attrs, &daos.Attribute{ + Name: key, + Value: []byte(val), + }) + } -func listDaosAttributes(hdl C.daos_handle_t, at attrType, verbose bool) (attrList, error) { + if err := as.SetAttributes(cmd.MustLogCtx(), attrs...); err != nil { + return errors.Wrapf(err, "failed to set attributes on %s %s", at, id) + } + cmd.Infof("Attributes successfully set on %s %q", at, id) + + return nil +} + +func delAttributes(cmd attrCmd, ad attrDeleter, at attrType, id string, names ...string) error { + attrsString := strings.Join(names, ",") + if err := ad.DeleteAttributes(cmd.MustLogCtx(), names...); err != nil { + return errors.Wrapf(err, "failed to delete attributes %s on %s %s", attrsString, at, id) + } + cmd.Infof("Attribute(s) %s successfully deleted on %s %q", attrsString, at, id) + + return nil +} + +// NB: These will be removed in the next patch, which adds the container APIs. +func listDaosAttributes(hdl C.daos_handle_t, at attrType, verbose bool) (daos.AttributeList, error) { var rc C.int expectedSize, totalSize := C.size_t(0), C.size_t(0) switch at { - case poolAttr: - rc = C.daos_pool_list_attr(hdl, nil, &totalSize, nil) case contAttr: rc = C.daos_cont_list_attr(hdl, nil, &totalSize, nil) default: @@ -109,8 +176,6 @@ func listDaosAttributes(hdl C.daos_handle_t, at attrType, verbose bool) (attrLis defer C.free(buf) switch at { - case poolAttr: - rc = C.daos_pool_list_attr(hdl, (*C.char)(buf), &totalSize, nil) case contAttr: rc = C.daos_cont_list_attr(hdl, (*C.char)(buf), &totalSize, nil) default: @@ -130,9 +195,9 @@ func listDaosAttributes(hdl C.daos_handle_t, at attrType, verbose bool) (attrLis return getDaosAttributes(hdl, at, attrNames) } - attrs := make([]*attribute, len(attrNames)) + attrs := make(daos.AttributeList, len(attrNames)) for i, name := range attrNames { - attrs[i] = &attribute{Name: name} + attrs[i] = &daos.Attribute{Name: name} } return attrs, nil @@ -141,7 +206,7 @@ func listDaosAttributes(hdl C.daos_handle_t, at attrType, verbose bool) (attrLis // getDaosAttributes fetches the values for the given list of attribute names. // Uses the bulk attribute fetch API to minimize roundtrips. -func getDaosAttributes(hdl C.daos_handle_t, at attrType, names []string) (attrList, error) { +func getDaosAttributes(hdl C.daos_handle_t, at attrType, names []string) (daos.AttributeList, error) { if len(names) == 0 { attrList, err := listDaosAttributes(hdl, at, false) if err != nil { @@ -171,8 +236,6 @@ func getDaosAttributes(hdl C.daos_handle_t, at attrType, names []string) (attrLi attrSizes := make([]C.size_t, numAttr) var rc C.int switch at { - case poolAttr: - rc = C.daos_pool_get_attr(hdl, C.int(numAttr), &attrNames[0], nil, &attrSizes[0], nil) case contAttr: rc = C.daos_cont_get_attr(hdl, C.int(numAttr), &attrNames[0], nil, &attrSizes[0], nil) default: @@ -199,8 +262,6 @@ func getDaosAttributes(hdl C.daos_handle_t, at attrType, names []string) (attrLi // Do the actual fetch of all values in one go. switch at { - case poolAttr: - rc = C.daos_pool_get_attr(hdl, C.int(numAttr), &attrNames[0], &attrValues[0], &attrSizes[0], nil) case contAttr: rc = C.daos_cont_get_attr(hdl, C.int(numAttr), &attrNames[0], &attrValues[0], &attrSizes[0], nil) default: @@ -214,9 +275,9 @@ func getDaosAttributes(hdl C.daos_handle_t, at attrType, names []string) (attrLi // Note that we are copying the values into Go-managed byte slices // for safety and simplicity so that we can free the C memory as soon // as this function exits. - attrs := make([]*attribute, numAttr) + attrs := make(daos.AttributeList, numAttr) for i, name := range names { - attrs[i] = &attribute{ + attrs[i] = &daos.Attribute{ Name: name, Value: C.GoBytes(attrValues[i], C.int(attrSizes[i])), } @@ -228,7 +289,7 @@ func getDaosAttributes(hdl C.daos_handle_t, at attrType, names []string) (attrLi // getDaosAttribute fetches the value for the given attribute name. // NB: For operations involving multiple attributes, the getDaosAttributes() // function is preferred for efficiency. -func getDaosAttribute(hdl C.daos_handle_t, at attrType, name string) (*attribute, error) { +func getDaosAttribute(hdl C.daos_handle_t, at attrType, name string) (*daos.Attribute, error) { attrs, err := getDaosAttributes(hdl, at, []string{name}) if err != nil { return nil, err @@ -241,7 +302,7 @@ func getDaosAttribute(hdl C.daos_handle_t, at attrType, name string) (*attribute // setDaosAttributes sets the values for the given list of attribute names. // Uses the bulk attribute set API to minimize roundtrips. -func setDaosAttributes(hdl C.daos_handle_t, at attrType, attrs attrList) error { +func setDaosAttributes(hdl C.daos_handle_t, at attrType, attrs daos.AttributeList) error { if len(attrs) == 0 { return nil } @@ -277,8 +338,6 @@ func setDaosAttributes(hdl C.daos_handle_t, at attrType, attrs attrList) error { attrCount := C.int(len(attrs)) var rc C.int switch at { - case poolAttr: - rc = C.daos_pool_set_attr(hdl, attrCount, &attrNames[0], &valBufs[0], &valSizes[0], nil) case contAttr: rc = C.daos_cont_set_attr(hdl, attrCount, &attrNames[0], &valBufs[0], &valSizes[0], nil) default: @@ -291,12 +350,12 @@ func setDaosAttributes(hdl C.daos_handle_t, at attrType, attrs attrList) error { // setDaosAttribute sets the value for the given attribute name. // NB: For operations involving multiple attributes, the setDaosAttributes() // function is preferred for efficiency. -func setDaosAttribute(hdl C.daos_handle_t, at attrType, attr *attribute) error { +func setDaosAttribute(hdl C.daos_handle_t, at attrType, attr *daos.Attribute) error { if attr == nil { return errors.Errorf("nil %T", attr) } - return setDaosAttributes(hdl, at, attrList{attr}) + return setDaosAttributes(hdl, at, daos.AttributeList{attr}) } func delDaosAttribute(hdl C.daos_handle_t, at attrType, name string) error { @@ -305,8 +364,6 @@ func delDaosAttribute(hdl C.daos_handle_t, at attrType, name string) error { var rc C.int switch at { - case poolAttr: - rc = C.daos_pool_del_attr(hdl, 1, &attrName, nil) case contAttr: rc = C.daos_cont_del_attr(hdl, 1, &attrName, nil) default: diff --git a/src/control/cmd/daos/container.go b/src/control/cmd/daos/container.go index e69c78a370e..8d8980e8002 100644 --- a/src/control/cmd/daos/container.go +++ b/src/control/cmd/daos/container.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -19,6 +20,7 @@ import ( "github.com/jessevdk/go-flags" "github.com/pkg/errors" + "github.com/daos-stack/daos/src/control/cmd/daos/pretty" "github.com/daos-stack/daos/src/control/lib/daos" "github.com/daos-stack/daos/src/control/lib/txtfmt" "github.com/daos-stack/daos/src/control/lib/ui" @@ -221,14 +223,14 @@ func queryContainer(poolUUID, contUUID uuid.UUID, poolHandle, contHandle C.daos_ return ci, nil } -func (cmd *containerBaseCmd) connectPool(flags C.uint, ap *C.struct_cmd_args_s) (func(), error) { +func (cmd *containerBaseCmd) connectPool(flags daos.PoolConnectFlag, ap *C.struct_cmd_args_s) (func(), error) { if err := cmd.poolBaseCmd.connectPool(flags); err != nil { return nil, err } if ap != nil { ap.pool = cmd.cPoolHandle - if err := copyUUID(&ap.p_uuid, cmd.poolUUID); err != nil { + if err := copyUUID(&ap.p_uuid, cmd.pool.UUID()); err != nil { cmd.disconnectPool() return nil, err } @@ -299,7 +301,7 @@ func (cmd *containerCreateCmd) Execute(_ []string) (err error) { cmd.poolBaseCmd.Args.Pool.UUID = pu } - disconnectPool, err := cmd.connectPool(C.DAOS_PC_RW, ap) + disconnectPool, err := cmd.connectPool(daos.PoolConnectFlagReadWrite, ap) if err != nil { return err } @@ -317,7 +319,7 @@ func (cmd *containerCreateCmd) Execute(_ []string) (err error) { defer cmd.closeContainer() var ci *daos.ContainerInfo - ci, err = queryContainer(cmd.poolUUID, cmd.contUUID, cmd.cPoolHandle, cmd.cContHandle) + ci, err = queryContainer(cmd.pool.UUID(), cmd.contUUID, cmd.cPoolHandle, cmd.cContHandle) if err != nil { if errors.Cause(err) != daos.NoPermission { return errors.Wrapf(err, "failed to query new container %s", contID) @@ -327,7 +329,7 @@ func (cmd *containerCreateCmd) Execute(_ []string) (err error) { cmd.Errorf("container %s was created, but query failed", contID) ci = new(daos.ContainerInfo) - ci.PoolUUID = cmd.poolUUID + ci.PoolUUID = cmd.pool.UUID() ci.Type = cmd.Type.String() ci.ContainerUUID = cmd.contUUID ci.ContainerLabel = cmd.Args.Label @@ -406,9 +408,9 @@ func (cmd *containerCreateCmd) contCreate() (string, error) { } if len(cmd.Attrs.ParsedProps) != 0 { - attrs := make(attrList, 0, len(cmd.Attrs.ParsedProps)) + attrs := make(daos.AttributeList, 0, len(cmd.Attrs.ParsedProps)) for key, val := range cmd.Attrs.ParsedProps { - attrs = append(attrs, &attribute{ + attrs = append(attrs, &daos.Attribute{ Name: key, Value: []byte(val), }) @@ -627,7 +629,7 @@ func (cmd *existingContainerCmd) resolveAndConnect(contFlags C.uint, ap *C.struc } var cleanupPool func() - cleanupPool, err = cmd.connectPool(C.DAOS_PC_RO, ap) + cleanupPool, err = cmd.connectPool(daos.PoolConnectFlagReadOnly, ap) if err != nil { return } @@ -651,7 +653,7 @@ func (cmd *existingContainerCmd) resolveAndConnect(contFlags C.uint, ap *C.struc }, nil } -func (cmd *existingContainerCmd) getAttr(name string) (*attribute, error) { +func (cmd *existingContainerCmd) getAttr(name string) (*daos.Attribute, error) { return getDaosAttribute(cmd.cContHandle, contAttr, name) } @@ -731,7 +733,7 @@ func printContainers(out io.Writer, contIDs []*ContainerID) { } func (cmd *containerListCmd) Execute(_ []string) error { - cleanup, err := cmd.resolveAndConnect(C.DAOS_PC_RO, nil) + cleanup, err := cmd.resolveAndConnect(daos.PoolConnectFlagReadOnly, nil) if err != nil { return err } @@ -990,7 +992,7 @@ func (cmd *containerQueryCmd) Execute(_ []string) error { } defer cleanup() - ci, err := queryContainer(cmd.poolUUID, cmd.contUUID, cmd.cPoolHandle, cmd.cContHandle) + ci, err := queryContainer(cmd.pool.UUID(), cmd.contUUID, cmd.cPoolHandle, cmd.cContHandle) if err != nil { return errors.Wrapf(err, "failed to query container %s", @@ -1123,14 +1125,14 @@ func (cmd *containerListAttrsCmd) Execute(args []string) error { if cmd.JSONOutputEnabled() { if cmd.Verbose { - return cmd.OutputJSON(attrs.asMap(), nil) + return cmd.OutputJSON(attrs.AsMap(), nil) } - return cmd.OutputJSON(attrs.asList(), nil) + return cmd.OutputJSON(attrs.AsList(), nil) } var bld strings.Builder title := fmt.Sprintf("Attributes for container %s:", cmd.ContainerID()) - printAttributes(&bld, title, attrs...) + pretty.PrintAttributes(&bld, title, attrs...) cmd.Info(bld.String()) @@ -1208,7 +1210,7 @@ func (cmd *containerGetAttrCmd) Execute(args []string) error { } defer cleanup() - var attrs attrList + var attrs daos.AttributeList if len(cmd.Args.Attrs.ParsedProps) == 0 { attrs, err = listDaosAttributes(cmd.cContHandle, contAttr, true) } else { @@ -1229,7 +1231,7 @@ func (cmd *containerGetAttrCmd) Execute(args []string) error { var bld strings.Builder title := fmt.Sprintf("Attributes for container %s:", cmd.ContainerID()) - printAttributes(&bld, title, attrs...) + pretty.PrintAttributes(&bld, title, attrs...) cmd.Info(bld.String()) @@ -1278,9 +1280,9 @@ func (cmd *containerSetAttrCmd) Execute(args []string) error { } defer cleanup() - attrs := make(attrList, 0, len(cmd.Args.Attrs.ParsedProps)) + attrs := make(daos.AttributeList, 0, len(cmd.Args.Attrs.ParsedProps)) for key, val := range cmd.Args.Attrs.ParsedProps { - attrs = append(attrs, &attribute{ + attrs = append(attrs, &daos.Attribute{ Name: key, Value: []byte(val), }) @@ -1472,7 +1474,7 @@ func (f *ContainerID) Complete(match string) (comps []flags.Completion) { } defer fini() - cleanup, err := pf.resolveAndConnect(C.DAOS_PC_RO, nil) + cleanup, err := pf.resolveAndConnect(daos.PoolConnectFlagReadOnly, nil) if err != nil { return } diff --git a/src/control/cmd/daos/health.go b/src/control/cmd/daos/health.go index 61f1d1df142..fa71a5b9a08 100644 --- a/src/control/cmd/daos/health.go +++ b/src/control/cmd/daos/health.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -9,6 +10,7 @@ package main import ( "fmt" "strings" + "unsafe" "github.com/google/uuid" @@ -16,11 +18,17 @@ import ( "github.com/daos-stack/daos/src/control/cmd/daos/pretty" "github.com/daos-stack/daos/src/control/common/cmdutil" "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/lib/daos/api" "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/lib/ui" "github.com/daos-stack/daos/src/control/logging" ) +/* +#include "util.h" +*/ +import "C" + type healthCmds struct { Check healthCheckCmd `command:"check" description:"Perform DAOS system health checks"` NetTest netTestCmd `command:"net-test" description:"Perform non-destructive DAOS networking tests"` @@ -62,6 +70,8 @@ func collectBuildInfo(log logging.Logger, shi *daos.SystemHealthInfo) error { } func (cmd *healthCheckCmd) Execute([]string) error { + ctx := cmd.MustLogCtx() + // TODO (DAOS-10028): Move this logic into the daos package once the API is available. systemHealth := &daos.SystemHealthInfo{ ComponentBuildInfo: make(map[string]daos.ComponentBuild), @@ -72,7 +82,7 @@ func (cmd *healthCheckCmd) Execute([]string) error { return err } - sysInfo, err := cmd.apiProvider.GetSystemInfo(cmd.MustLogCtx()) + sysInfo, err := cmd.apiProvider.GetSystemInfo(ctx) if err != nil { cmd.Errorf("failed to query system information: %v", err) } @@ -80,7 +90,10 @@ func (cmd *healthCheckCmd) Execute([]string) error { cmd.Infof("Checking DAOS system: %s", systemHealth.SystemInfo.Name) - pools, err := getPoolList(cmd.Logger, cmd.SysName, true) + pools, err := api.GetPoolList(ctx, api.GetPoolListReq{ + SysName: cmd.SysName, + Query: true, + }) if err != nil { cmd.Errorf("failed to get pool list: %v", err) } @@ -88,13 +101,18 @@ func (cmd *healthCheckCmd) Execute([]string) error { for _, pool := range pools { systemHealth.Pools[pool.UUID] = pool - poolHdl, _, err := poolConnect(pool.UUID.String(), cmd.SysName, daos.PoolConnectFlagReadOnly, false) + pcResp, err := api.PoolConnect(ctx, api.PoolConnectReq{ + SysName: cmd.SysName, + ID: pool.UUID.String(), + Flags: daos.PoolConnectFlagReadOnly, + Query: false, + }) if err != nil { cmd.Errorf("failed to connect to pool %s: %v", pool.Label, err) continue } defer func() { - if err := poolDisconnectAPI(poolHdl); err != nil { + if err := pcResp.Connection.Disconnect(ctx); err != nil { cmd.Errorf("failed to disconnect from pool %s: %v", pool.Label, err) } }() @@ -104,7 +122,7 @@ func (cmd *healthCheckCmd) Execute([]string) error { if pool.DisabledTargets > 0 { queryMask.SetOptions(daos.PoolQueryOptionDisabledEngines) } - tpi, err := queryPool(poolHdl, queryMask) + tpi, err := pcResp.Connection.Query(ctx, queryMask) if err != nil { cmd.Errorf("failed to query pool %s: %v", pool.Label, err) continue @@ -113,6 +131,13 @@ func (cmd *healthCheckCmd) Execute([]string) error { pool.DisabledRanks = tpi.DisabledRanks pool.DeadRanks = tpi.DeadRanks + /* temporary, until we get the container API bindings */ + var poolHdl C.daos_handle_t + if err := pcResp.Connection.FillHandle(unsafe.Pointer(&poolHdl)); err != nil { + cmd.Errorf("failed to fill handle for pool %s: %v", pool.Label, err) + continue + } + poolConts, err := listContainers(poolHdl) if err != nil { cmd.Errorf("failed to list containers on pool %s: %v", pool.Label, err) diff --git a/src/control/cmd/daos/health_test.go b/src/control/cmd/daos/health_test.go index 95565b36bb2..a3a3f241018 100644 --- a/src/control/cmd/daos/health_test.go +++ b/src/control/cmd/daos/health_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -11,54 +12,87 @@ import ( "testing" "github.com/dustin/go-humanize" - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" + "github.com/pkg/errors" - "github.com/daos-stack/daos/src/control/common/cmdutil" "github.com/daos-stack/daos/src/control/common/test" "github.com/daos-stack/daos/src/control/lib/daos" "github.com/daos-stack/daos/src/control/lib/ranklist" - "github.com/daos-stack/daos/src/control/lib/ui" - "github.com/daos-stack/daos/src/control/logging" +) + +var ( + runSelfTestResult []*daos.SelfTestResult + runSelfTestErr error ) func RunSelfTest(ctx context.Context, cfg *daos.SelfTestConfig) ([]*daos.SelfTestResult, error) { - return []*daos.SelfTestResult{}, nil + return runSelfTestResult, runSelfTestErr } func TestDaos_netTestCmdExecute(t *testing.T) { - // Quickie smoke test for the UI -- will flesh out later. - var opts cliOptions - log, buf := logging.NewTestLogger(t.Name()) - defer test.ShowBufferOnFailure(t, buf) - args := []string{ - "health", "net-test", - "--ranks", "0-3", - "--tags", "4-9", - "--size", "20 MiB", - "--rep-count", "2222", - "--bytes", "--verbose", - } - expArgs := netTestCmd{} - expArgs.Ranks.Replace(ranklist.MustCreateRankSet("0-3")) - expArgs.Tags.Replace(ranklist.MustCreateRankSet("4-9")) - expArgs.XferSize.Bytes = 20 * humanize.MiByte - expArgs.RepCount = 2222 - expArgs.Verbose = true - expArgs.TpsBytes = true + baseArgs := test.JoinArgs(nil, "health", "net-test") - if err := parseOpts(args, &opts, log); err != nil { - t.Fatal(err) - } - cmpOpts := cmp.Options{ - cmpopts.IgnoreUnexported(netTestCmd{}), - cmp.Comparer(func(a, b ranklist.RankSet) bool { - return a.String() == b.String() - }), - cmp.Comparer(func(a, b ui.ByteSizeFlag) bool { - return a.String() == b.String() - }), - cmpopts.IgnoreTypes(cmdutil.LogCmd{}, cmdutil.JSONOutputCmd{}), + for name, tc := range map[string]struct { + args []string + expArgs netTestCmd + expErr error + }{ + "all set (long)": { + args: test.JoinArgs(baseArgs, + "--ranks", "0-3", + "--tags", "4-9", + "--size", "20 MiB", + "--rep-count", "2222", + "--max-inflight", "1234", + "--bytes", "--verbose", + ), + expArgs: func() netTestCmd { + cmd := netTestCmd{} + cmd.Ranks.Replace(ranklist.MustCreateRankSet("0-3")) + cmd.Tags.Replace(ranklist.MustCreateRankSet("4-9")) + cmd.XferSize.Bytes = 20 * humanize.MiByte + cmd.RepCount = 2222 + cmd.MaxInflight = 1234 + cmd.Verbose = true + cmd.TpsBytes = true + return cmd + }(), + }, + "all set (short)": { + args: test.JoinArgs(baseArgs, + "-r", "0-3", + "-t", "4-9", + "-s", "20 MiB", + "-c", "2222", + "-m", "1234", + "-y", "-v", + ), + expArgs: func() netTestCmd { + cmd := netTestCmd{} + cmd.Ranks.Replace(ranklist.MustCreateRankSet("0-3")) + cmd.Tags.Replace(ranklist.MustCreateRankSet("4-9")) + cmd.XferSize.Bytes = 20 * humanize.MiByte + cmd.RepCount = 2222 + cmd.MaxInflight = 1234 + cmd.Verbose = true + cmd.TpsBytes = true + return cmd + }(), + }, + "selftest fails": { + args: []string{"health", "net-test"}, + expErr: errors.New("whoops"), + }, + } { + t.Run(name, func(t *testing.T) { + if tc.expErr != nil { + prevErr := runSelfTestErr + t.Cleanup(func() { + runSelfTestErr = prevErr + }) + runSelfTestErr = tc.expErr + } + + runCmdTest(t, tc.args, tc.expArgs, tc.expErr, "Health.NetTest") + }) } - test.CmpAny(t, "health net-test args", expArgs, opts.Health.NetTest, cmpOpts...) } diff --git a/src/control/cmd/daos/pool.go b/src/control/cmd/daos/pool.go index 831a775db1a..8158d1edc98 100644 --- a/src/control/cmd/daos/pool.go +++ b/src/control/cmd/daos/pool.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -7,7 +8,6 @@ package main import ( - "fmt" "os" "strings" "unsafe" @@ -16,10 +16,10 @@ import ( "github.com/pkg/errors" "github.com/daos-stack/daos/src/control/cmd/daos/pretty" - "github.com/daos-stack/daos/src/control/common" "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/lib/daos/api" + "github.com/daos-stack/daos/src/control/lib/ranklist" "github.com/daos-stack/daos/src/control/lib/ui" - "github.com/daos-stack/daos/src/control/logging" ) /* @@ -67,8 +67,10 @@ type PoolID struct { type poolBaseCmd struct { daosCmd - poolUUID uuid.UUID + pool *api.PoolHandle + // deprecated params -- gradually remove in favor of PoolHandle + poolUUID uuid.UUID cPoolHandle C.daos_handle_t Args struct { @@ -76,138 +78,61 @@ type poolBaseCmd struct { } `positional-args:"yes"` } -func (cmd *poolBaseCmd) poolUUIDPtr() *C.uchar { - if cmd.poolUUID == uuid.Nil { - cmd.Errorf("poolUUIDPtr(): nil UUID") - return nil - } - return (*C.uchar)(unsafe.Pointer(&cmd.poolUUID[0])) -} - func (cmd *poolBaseCmd) PoolID() ui.LabelOrUUIDFlag { return cmd.Args.Pool.LabelOrUUIDFlag } -// poolConnect is a convenience wrapper around poolConnectAPI. -func poolConnect(poolID, sysName string, flags uint, query bool) (C.daos_handle_t, *C.daos_pool_info_t, error) { - var cSysName *C.char - if sysName != "" { - cSysName = C.CString(sysName) - defer freeString(cSysName) - } - - cPoolID := C.CString(poolID) - defer freeString(cPoolID) - - var hdl C.daos_handle_t - var infoPtr *C.daos_pool_info_t - if query { - infoPtr = &C.daos_pool_info_t{ - pi_bits: C.ulong(daos.DefaultPoolQueryMask), - } +func (cmd *poolBaseCmd) connectPool(flags daos.PoolConnectFlag) error { + if cmd.PoolID().Empty() { + return errors.New("no pool UUID or label supplied") } - return hdl, infoPtr, poolConnectAPI(cPoolID, cSysName, C.uint(flags), &hdl, infoPtr) -} - -// poolConnectAPI is a lower-level wrapper around daos_pool_connect(). -func poolConnectAPI(poolID, sysName *C.char, flags C.uint, hdl *C.daos_handle_t, info *C.daos_pool_info_t) error { - return daosError(C.daos_pool_connect(poolID, sysName, flags, hdl, info, nil)) -} - -// poolDisconnectAPI is a convenience wrapper around daos_pool_disconnect(). -func poolDisconnectAPI(hdl C.daos_handle_t) error { - // Hack for NLT fault injection testing: If the rc - // is -DER_NOMEM, retry once in order to actually - // shut down and release resources. - rc := C.daos_pool_disconnect(hdl, nil) - if rc == -C.DER_NOMEM { - rc = C.daos_pool_disconnect(hdl, nil) - // DAOS-8866, daos_pool_disconnect() might have failed, but worked anyway. - if rc == -C.DER_NO_HDL { - rc = -C.DER_SUCCESS - } + req := api.PoolConnectReq{ + SysName: cmd.SysName, + ID: cmd.PoolID().String(), + Flags: flags, } - return daosError(rc) -} - -func (cmd *poolBaseCmd) connectPool(flags C.uint) error { - sysName := cmd.SysName - var cSysName *C.char - if sysName != "" { - cSysName := C.CString(sysName) - defer freeString(cSysName) + resp, err := PoolConnect(cmd.MustLogCtx(), req) + if err != nil { + return err } + cmd.pool = resp.Connection - switch { - case cmd.PoolID().HasLabel(): - var poolInfo C.daos_pool_info_t - cLabel := C.CString(cmd.PoolID().Label) - defer freeString(cLabel) - - cmd.Debugf("connecting to pool: %s", cmd.PoolID().Label) - if err := poolConnectAPI(cLabel, cSysName, flags, &cmd.cPoolHandle, &poolInfo); err != nil { - return err - } - var err error - cmd.poolUUID, err = uuidFromC(poolInfo.pi_uuid) - if err != nil { - cmd.disconnectPool() - return err - } - case cmd.PoolID().HasUUID(): - cmd.poolUUID = cmd.PoolID().UUID - cmd.Debugf("connecting to pool: %s", cmd.poolUUID) - cUUIDstr := C.CString(cmd.poolUUID.String()) - defer freeString(cUUIDstr) - if err := poolConnectAPI(cUUIDstr, cSysName, flags, &cmd.cPoolHandle, nil); err != nil { - return err - } - default: - return errors.New("no pool UUID or label supplied") + // Needed for backward compatibility with code that calls libdaos directly. + // Can be removed when everything is behind the API. + if err := cmd.pool.FillHandle(unsafe.Pointer(&cmd.cPoolHandle)); err != nil { + cmd.disconnectPool() + return err } return nil } func (cmd *poolBaseCmd) disconnectPool() { - cmd.Debugf("disconnecting pool %s", cmd.PoolID()) - if err := poolDisconnectAPI(cmd.cPoolHandle); err != nil { + if err := cmd.pool.Disconnect(cmd.MustLogCtx()); err != nil { cmd.Errorf("pool disconnect failed: %v", err) } } -func (cmd *poolBaseCmd) resolveAndConnect(flags C.uint, ap *C.struct_cmd_args_s) (func(), error) { +func (cmd *poolBaseCmd) resolveAndConnect(flags daos.PoolConnectFlag, ap *C.struct_cmd_args_s) (func(), error) { if err := cmd.connectPool(flags); err != nil { return nil, errors.Wrapf(err, "failed to connect to pool %s", cmd.PoolID()) } if ap != nil { - if err := copyUUID(&ap.p_uuid, cmd.poolUUID); err != nil { + if err := copyUUID(&ap.p_uuid, cmd.pool.UUID()); err != nil { return nil, err } ap.pool = cmd.cPoolHandle - switch { - case cmd.PoolID().HasLabel(): - pLabel := C.CString(cmd.PoolID().Label) - defer freeString(pLabel) - C.strncpy(&ap.pool_str[0], pLabel, C.DAOS_PROP_LABEL_MAX_LEN) - case cmd.PoolID().HasUUID(): - pUUIDstr := C.CString(cmd.poolUUID.String()) - defer freeString(pUUIDstr) - C.strncpy(&ap.pool_str[0], pUUIDstr, C.DAOS_PROP_LABEL_MAX_LEN) - } - } - return func() { - cmd.disconnectPool() - }, nil -} + pLabel := C.CString(cmd.pool.Label) + defer freeString(pLabel) + C.strncpy(&ap.pool_str[0], pLabel, C.DAOS_PROP_LABEL_MAX_LEN) + } -func (cmd *poolBaseCmd) getAttr(name string) (*attribute, error) { - return getDaosAttribute(cmd.cPoolHandle, poolAttr, name) + return cmd.disconnectPool, nil } type poolCmd struct { @@ -228,130 +153,6 @@ type poolQueryCmd struct { HealthOnly bool `short:"t" long:"health-only" description:"Only perform pool health related queries"` } -func convertPoolSpaceInfo(in *C.struct_daos_pool_space, mt C.uint) *daos.StorageUsageStats { - if in == nil { - return nil - } - - return &daos.StorageUsageStats{ - Total: uint64(in.ps_space.s_total[mt]), - Free: uint64(in.ps_space.s_free[mt]), - Min: uint64(in.ps_free_min[mt]), - Max: uint64(in.ps_free_max[mt]), - Mean: uint64(in.ps_free_mean[mt]), - MediaType: daos.StorageMediaType(mt), - } -} - -func convertPoolRebuildStatus(in *C.struct_daos_rebuild_status) *daos.PoolRebuildStatus { - if in == nil { - return nil - } - - out := &daos.PoolRebuildStatus{ - Status: int32(in.rs_errno), - } - if out.Status == 0 { - out.TotalObjects = uint64(in.rs_toberb_obj_nr) - out.Objects = uint64(in.rs_obj_nr) - out.Records = uint64(in.rs_rec_nr) - switch { - case in.rs_version == 0: - out.State = daos.PoolRebuildStateIdle - case C.get_rebuild_state(in) == C.DRS_COMPLETED: - out.State = daos.PoolRebuildStateDone - default: - out.State = daos.PoolRebuildStateBusy - } - } - - return out -} - -func convertPoolInfo(pinfo *C.daos_pool_info_t) (*daos.PoolInfo, error) { - poolInfo := new(daos.PoolInfo) - - poolInfo.QueryMask = daos.PoolQueryMask(pinfo.pi_bits) - poolInfo.UUID = uuid.Must(uuidFromC(pinfo.pi_uuid)) - poolInfo.TotalTargets = uint32(pinfo.pi_ntargets) - poolInfo.DisabledTargets = uint32(pinfo.pi_ndisabled) - poolInfo.ActiveTargets = uint32(pinfo.pi_space.ps_ntargets) - poolInfo.TotalEngines = uint32(pinfo.pi_nnodes) - poolInfo.ServiceLeader = uint32(pinfo.pi_leader) - poolInfo.Version = uint32(pinfo.pi_map_ver) - poolInfo.State = daos.PoolServiceStateReady - if poolInfo.DisabledTargets > 0 { - poolInfo.State = daos.PoolServiceStateDegraded - } - - poolInfo.Rebuild = convertPoolRebuildStatus(&pinfo.pi_rebuild_st) - if poolInfo.QueryMask.HasOption(daos.PoolQueryOptionSpace) { - poolInfo.TierStats = []*daos.StorageUsageStats{ - convertPoolSpaceInfo(&pinfo.pi_space, C.DAOS_MEDIA_SCM), - convertPoolSpaceInfo(&pinfo.pi_space, C.DAOS_MEDIA_NVME), - } - } - - return poolInfo, nil -} - -func queryPool(poolHdl C.daos_handle_t, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { - var enabledRanks *C.d_rank_list_t - var disabledRanks *C.d_rank_list_t - defer func() { - C.d_rank_list_free(enabledRanks) - C.d_rank_list_free(disabledRanks) - }() - - var rc C.int - cPoolInfo := C.daos_pool_info_t{ - pi_bits: C.uint64_t(queryMask), - } - if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) && queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { - enaQm := queryMask - enaQm.ClearOptions(daos.PoolQueryOptionDisabledEngines) - cPoolInfo.pi_bits = C.uint64_t(enaQm) - rc = C.daos_pool_query(poolHdl, &enabledRanks, &cPoolInfo, nil, nil) - if err := daosError(rc); err != nil { - return nil, err - } - - /* second query to just get disabled ranks */ - rc = C.daos_pool_query(poolHdl, &disabledRanks, nil, nil, nil) - } else if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) { - rc = C.daos_pool_query(poolHdl, &enabledRanks, &cPoolInfo, nil, nil) - } else if queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { - rc = C.daos_pool_query(poolHdl, &disabledRanks, &cPoolInfo, nil, nil) - } else { - rc = C.daos_pool_query(poolHdl, nil, &cPoolInfo, nil, nil) - } - - if err := daosError(rc); err != nil { - return nil, err - } - - poolInfo, err := convertPoolInfo(&cPoolInfo) - if err != nil { - return nil, err - } - poolInfo.QueryMask = queryMask - - if enabledRanks != nil { - poolInfo.EnabledRanks, err = rankSetFromC(enabledRanks) - if err != nil { - return nil, err - } - } - if disabledRanks != nil { - poolInfo.DisabledRanks, err = rankSetFromC(disabledRanks) - if err != nil { - return nil, err - } - } - - return poolInfo, nil -} - func (cmd *poolQueryCmd) Execute(_ []string) error { queryMask := daos.DefaultPoolQueryMask if cmd.HealthOnly { @@ -360,15 +161,14 @@ func (cmd *poolQueryCmd) Execute(_ []string) error { if cmd.ShowEnabledRanks { queryMask.SetOptions(daos.PoolQueryOptionEnabledEngines) } - queryMask.SetOptions(daos.PoolQueryOptionDisabledEngines) - cleanup, err := cmd.resolveAndConnect(C.DAOS_PC_RO, nil) + cleanup, err := cmd.resolveAndConnect(daos.PoolConnectFlagReadOnly, nil) if err != nil { return err } defer cleanup() - poolInfo, err := queryPool(cmd.cPoolHandle, queryMask) + poolInfo, err := cmd.pool.Query(cmd.MustLogCtx(), queryMask) if err != nil { return errors.Wrapf(err, "failed to query pool %q", cmd.PoolID()) } @@ -391,73 +191,20 @@ func (cmd *poolQueryCmd) Execute(_ []string) error { type poolQueryTargetsCmd struct { poolBaseCmd - Rank uint32 `long:"rank" required:"1" description:"Engine rank of the targets to be queried"` - Targets string `long:"target-idx" description:"Comma-separated list of target idx(s) to be queried"` -} - -// For using the pretty printer that dmg uses for this target info. -func convertPoolTargetInfo(ptinfo *C.daos_target_info_t) (*daos.PoolQueryTargetInfo, error) { - pqti := new(daos.PoolQueryTargetInfo) - pqti.Type = daos.PoolQueryTargetType(ptinfo.ta_type) - pqti.State = daos.PoolQueryTargetState(ptinfo.ta_state) - pqti.Space = []*daos.StorageUsageStats{ - { - Total: uint64(ptinfo.ta_space.s_total[C.DAOS_MEDIA_SCM]), - Free: uint64(ptinfo.ta_space.s_free[C.DAOS_MEDIA_SCM]), - MediaType: C.DAOS_MEDIA_SCM, - }, - { - Total: uint64(ptinfo.ta_space.s_total[C.DAOS_MEDIA_NVME]), - Free: uint64(ptinfo.ta_space.s_free[C.DAOS_MEDIA_NVME]), - MediaType: C.DAOS_MEDIA_NVME, - }, - } - - return pqti, nil + Rank uint32 `long:"rank" required:"1" description:"Engine rank of the target(s) to be queried"` + Targets ui.RankSetFlag `long:"target-idx" description:"Comma-separated list of target index(es) to be queried (default: all)"` } func (cmd *poolQueryTargetsCmd) Execute(_ []string) error { - cleanup, err := cmd.resolveAndConnect(C.DAOS_PC_RO, nil) + cleanup, err := cmd.resolveAndConnect(daos.PoolConnectFlagReadOnly, nil) if err != nil { return err } defer cleanup() - var idxList []uint32 - if err = common.ParseNumberList(cmd.Targets, &idxList); err != nil { - return errors.WithMessage(err, "parsing target list") - } - - if len(idxList) == 0 { - pi, err := queryPool(cmd.cPoolHandle, daos.HealthOnlyPoolQueryMask) - if err != nil || (pi.TotalTargets == 0 || pi.TotalEngines == 0) { - if err != nil { - return errors.Wrap(err, "pool query failed") - } - return errors.New("failed to derive target count from pool query") - } - tgtCount := pi.TotalTargets / pi.TotalEngines - for i := uint32(0); i < tgtCount; i++ { - idxList = append(idxList, i) - } - } - - ptInfo := new(C.daos_target_info_t) - var rc C.int - - infos := make([]*daos.PoolQueryTargetInfo, 0, len(idxList)) - for tgt := 0; tgt < len(idxList); tgt++ { - rc = C.daos_pool_query_target(cmd.cPoolHandle, C.uint32_t(idxList[tgt]), C.uint32_t(cmd.Rank), ptInfo, nil) - if err := daosError(rc); err != nil { - return errors.Wrapf(err, - "failed to query pool %s rank:target %d:%d", cmd.poolUUID, cmd.Rank, idxList[tgt]) - } - - tgtInfo, err := convertPoolTargetInfo(ptInfo) - if err != nil { - return err - } - infos = append(infos, tgtInfo) + infos, err := cmd.pool.QueryTargets(cmd.MustLogCtx(), ranklist.Rank(cmd.Rank), &cmd.Targets.RankSet) + if err != nil { + return errors.Wrapf(err, "failed to query targets for pool %s", cmd.PoolID()) } if cmd.JSONOutputEnabled() { @@ -483,32 +230,13 @@ type poolListAttrsCmd struct { } func (cmd *poolListAttrsCmd) Execute(_ []string) error { - cleanup, err := cmd.resolveAndConnect(C.DAOS_PC_RO, nil) + cleanup, err := cmd.resolveAndConnect(daos.PoolConnectFlagReadOnly, nil) if err != nil { return err } defer cleanup() - attrs, err := listDaosAttributes(cmd.cPoolHandle, poolAttr, cmd.Verbose) - if err != nil { - return errors.Wrapf(err, - "failed to list attributes for pool %s", cmd.poolUUID) - } - - if cmd.JSONOutputEnabled() { - if cmd.Verbose { - return cmd.OutputJSON(attrs.asMap(), nil) - } - return cmd.OutputJSON(attrs.asList(), nil) - } - - var bld strings.Builder - title := fmt.Sprintf("Attributes for pool %s:", cmd.poolUUID) - printAttributes(&bld, title, attrs...) - - cmd.Info(bld.String()) - - return nil + return listAttributes(cmd, cmd.pool, poolAttr, cmd.pool.ID(), cmd.Verbose) } type poolGetAttrCmd struct { @@ -520,37 +248,13 @@ type poolGetAttrCmd struct { } func (cmd *poolGetAttrCmd) Execute(_ []string) error { - cleanup, err := cmd.resolveAndConnect(C.DAOS_PC_RO, nil) + cleanup, err := cmd.resolveAndConnect(daos.PoolConnectFlagReadOnly, nil) if err != nil { return err } defer cleanup() - var attrs attrList - if len(cmd.Args.Attrs.ParsedProps) == 0 { - attrs, err = listDaosAttributes(cmd.cPoolHandle, poolAttr, true) - } else { - attrs, err = getDaosAttributes(cmd.cPoolHandle, poolAttr, cmd.Args.Attrs.ParsedProps.ToSlice()) - } - if err != nil { - return errors.Wrapf(err, "failed to get attributes for pool %s", cmd.PoolID()) - } - - if cmd.JSONOutputEnabled() { - // Maintain compatibility with older behavior. - if len(cmd.Args.Attrs.ParsedProps) == 1 && len(attrs) == 1 { - return cmd.OutputJSON(attrs[0], nil) - } - return cmd.OutputJSON(attrs, nil) - } - - var bld strings.Builder - title := fmt.Sprintf("Attributes for pool %s:", cmd.PoolID()) - printAttributes(&bld, title, attrs...) - - cmd.Info(bld.String()) - - return nil + return getAttributes(cmd, cmd.pool, poolAttr, cmd.pool.ID(), cmd.Args.Attrs.ParsedProps.ToSlice()...) } type poolSetAttrCmd struct { @@ -562,53 +266,31 @@ type poolSetAttrCmd struct { } func (cmd *poolSetAttrCmd) Execute(_ []string) error { - cleanup, err := cmd.resolveAndConnect(C.DAOS_PC_RW, nil) + cleanup, err := cmd.resolveAndConnect(daos.PoolConnectFlagReadWrite, nil) if err != nil { return err } defer cleanup() - if len(cmd.Args.Attrs.ParsedProps) == 0 { - return errors.New("attribute name and value are required") - } - - attrs := make(attrList, 0, len(cmd.Args.Attrs.ParsedProps)) - for key, val := range cmd.Args.Attrs.ParsedProps { - attrs = append(attrs, &attribute{ - Name: key, - Value: []byte(val), - }) - } - - if err := setDaosAttributes(cmd.cPoolHandle, poolAttr, attrs); err != nil { - return errors.Wrapf(err, "failed to set attributes on pool %s", cmd.PoolID()) - } - - return nil + return setAttributes(cmd, cmd.pool, poolAttr, cmd.pool.ID(), cmd.Args.Attrs.ParsedProps) } type poolDelAttrCmd struct { poolBaseCmd Args struct { - Name string `positional-arg-name:"" required:"1"` + Attrs ui.GetPropertiesFlag `positional-arg-name:"key[,key...]" required:"1"` } `positional-args:"yes"` } func (cmd *poolDelAttrCmd) Execute(_ []string) error { - cleanup, err := cmd.resolveAndConnect(C.DAOS_PC_RW, nil) + cleanup, err := cmd.resolveAndConnect(daos.PoolConnectFlagReadWrite, nil) if err != nil { return err } defer cleanup() - if err := delDaosAttribute(cmd.cPoolHandle, poolAttr, cmd.Args.Name); err != nil { - return errors.Wrapf(err, - "failed to delete attribute %q on pool %s", - cmd.Args.Name, cmd.poolUUID) - } - - return nil + return delAttributes(cmd, cmd.pool, poolAttr, cmd.pool.ID(), cmd.Args.Attrs.ParsedProps.ToSlice()...) } type poolAutoTestCmd struct { @@ -625,14 +307,14 @@ func (cmd *poolAutoTestCmd) Execute(_ []string) error { } defer deallocCmdArgs() - cleanup, err := cmd.resolveAndConnect(C.DAOS_PC_RW, nil) + cleanup, err := cmd.resolveAndConnect(daos.PoolConnectFlagReadWrite, nil) if err != nil { return err } defer cleanup() ap.pool = cmd.cPoolHandle - if err := copyUUID(&ap.p_uuid, cmd.poolUUID); err != nil { + if err := copyUUID(&ap.p_uuid, cmd.pool.UUID()); err != nil { return err } ap.p_op = C.POOL_AUTOTEST @@ -649,108 +331,12 @@ func (cmd *poolAutoTestCmd) Execute(_ []string) error { rc := C.pool_autotest_hdlr(ap) if err := daosError(rc); err != nil { - return errors.Wrapf(err, "failed to run autotest for pool %s", - cmd.poolUUID) + return errors.Wrapf(err, "failed to run autotest for pool %s", cmd.PoolID()) } return nil } -func getPoolList(log logging.Logger, sysName string, queryEnabled bool) ([]*daos.PoolInfo, error) { - var cSysName *C.char - if sysName != "" { - cSysName := C.CString(sysName) - defer freeString(cSysName) - } - - var cPools []C.daos_mgmt_pool_info_t - for { - var rc C.int - var poolCount C.size_t - - // First, fetch the total number of pools in the system. - // We may not have access to all of them, so this is an upper bound. - rc = C.daos_mgmt_list_pools(cSysName, &poolCount, nil, nil) - if err := daosError(rc); err != nil { - return nil, err - } - log.Debugf("pools in system: %d", poolCount) - - if poolCount < 1 { - return nil, nil - } - - // Now, we actually fetch the pools into the buffer that we've created. - cPools = make([]C.daos_mgmt_pool_info_t, poolCount) - rc = C.daos_mgmt_list_pools(cSysName, &poolCount, &cPools[0], nil) - err := daosError(rc) - if err == nil { - cPools = cPools[:poolCount] // adjust the slice to the number of pools retrieved - log.Debugf("fetched %d pools", len(cPools)) - break - } - if err == daos.StructTooSmall { - log.Notice("server-side pool list changed; re-fetching") - continue - } - log.Errorf("failed to fetch pool list: %s", err) - return nil, err - } - - pools := make([]*daos.PoolInfo, 0, len(cPools)) - for i := 0; i < len(cPools); i++ { - cPool := &cPools[i] - - svcRanks, err := rankSetFromC(cPool.mgpi_svc) - if err != nil { - return nil, err - } - poolUUID, err := uuidFromC(cPool.mgpi_uuid) - if err != nil { - return nil, err - } - poolLabel := C.GoString(cPool.mgpi_label) - - var pool *daos.PoolInfo - if queryEnabled { - poolHandle, poolInfo, err := poolConnect(poolUUID.String(), sysName, daos.PoolConnectFlagReadOnly, true) - if err != nil { - log.Errorf("failed to connect to pool %q: %s", poolLabel, err) - continue - } - - var qErr error - pool, qErr = convertPoolInfo(poolInfo) - if qErr != nil { - log.Errorf("failed to query pool %q: %s", poolLabel, qErr) - } - if err := poolDisconnectAPI(poolHandle); err != nil { - log.Errorf("failed to disconnect from pool %q: %s", poolLabel, err) - } - if qErr != nil { - continue - } - - // Add a few missing pieces that the query doesn't fill in. - pool.Label = poolLabel - pool.ServiceReplicas = svcRanks.Ranks() - } else { - // Just populate the basic info. - pool = &daos.PoolInfo{ - UUID: poolUUID, - Label: poolLabel, - ServiceReplicas: svcRanks.Ranks(), - State: daos.PoolServiceStateReady, - } - } - - pools = append(pools, pool) - } - - log.Debugf("fetched %d/%d pools", len(pools), len(cPools)) - return pools, nil -} - type poolListCmd struct { daosCmd Verbose bool `short:"v" long:"verbose" description:"Add pool UUIDs and service replica lists to display"` @@ -758,7 +344,9 @@ type poolListCmd struct { } func (cmd *poolListCmd) Execute(_ []string) error { - pools, err := getPoolList(cmd.Logger, cmd.SysName, !cmd.NoQuery) + pools, err := GetPoolList(cmd.MustLogCtx(), api.GetPoolListReq{ + Query: !cmd.NoQuery, + }) if err != nil { return err } diff --git a/src/control/cmd/daos/pool_test.go b/src/control/cmd/daos/pool_test.go new file mode 100644 index 00000000000..5f52244788f --- /dev/null +++ b/src/control/cmd/daos/pool_test.go @@ -0,0 +1,475 @@ +// +// (C) Copyright 2025 Google LLC +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package main + +import ( + "context" + "strings" + "testing" + + "github.com/dustin/go-humanize" + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/lib/daos/api" + "github.com/daos-stack/daos/src/control/lib/ranklist" + "github.com/daos-stack/daos/src/control/lib/ui" +) + +var ( + defaultPoolInfo *daos.PoolInfo = &daos.PoolInfo{ + QueryMask: daos.DefaultPoolQueryMask, + State: daos.PoolServiceStateReady, + UUID: test.MockPoolUUID(1), + Label: "test-pool", + TotalTargets: 48, + TotalEngines: 3, + ActiveTargets: 48, + DisabledTargets: 0, + Version: 1, + ServiceLeader: 2, + ServiceReplicas: []ranklist.Rank{0, 1, 2}, + TierStats: []*daos.StorageUsageStats{ + { + MediaType: daos.StorageMediaTypeScm, + Total: 64 * humanize.TByte, + Free: 16 * humanize.TByte, + }, + { + MediaType: daos.StorageMediaTypeNvme, + Total: 1 * humanize.PByte, + Free: 512 * humanize.TByte, + }, + }, + } +) + +var ( + defaultGetPoolListResult = []*daos.PoolInfo{ + defaultPoolInfo, + } + + getPoolListResult []*daos.PoolInfo = defaultGetPoolListResult + getPoolListErr error +) + +func GetPoolList(ctx context.Context, req api.GetPoolListReq) ([]*daos.PoolInfo, error) { + return getPoolListResult, getPoolListErr +} + +func TestDaos_poolListCmd(t *testing.T) { + baseArgs := test.JoinArgs(nil, "pool", "list") + + for name, tc := range map[string]struct { + args []string + expErr error + expArgs poolListCmd + }{ + "all set (long)": { + args: test.JoinArgs(baseArgs, "--verbose", "--no-query"), + expArgs: poolListCmd{ + NoQuery: true, + Verbose: true, + }, + }, + "all set (short)": { + args: test.JoinArgs(baseArgs, "-v", "-n"), + expArgs: poolListCmd{ + NoQuery: true, + Verbose: true, + }, + }, + "query fails": { + args: []string{"pool", "list"}, + expErr: errors.New("whoops"), + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(api.ResetTestStubs) + if tc.expErr != nil { + prevErr := getPoolListErr + t.Cleanup(func() { + getPoolListErr = prevErr + }) + getPoolListErr = tc.expErr + } + + runCmdTest(t, tc.args, tc.expArgs, tc.expErr, "Pool.List") + }) + } +} + +var ( + defaultPoolConnectResp *api.PoolConnectResp = &api.PoolConnectResp{ + Connection: &api.PoolHandle{}, + Info: defaultPoolInfo, + } + + poolConnectResp *api.PoolConnectResp = defaultPoolConnectResp + poolConnectErr error +) + +func PoolConnect(ctx context.Context, req api.PoolConnectReq) (*api.PoolConnectResp, error) { + return poolConnectResp, poolConnectErr +} + +func TestDaos_poolQueryCmd(t *testing.T) { + baseArgs := test.JoinArgs(nil, "pool", "query", defaultPoolInfo.Label) + + for name, tc := range map[string]struct { + args []string + expErr error + expArgs poolQueryCmd + setup func(t *testing.T) + }{ + "invalid flag": { + args: test.JoinArgs(baseArgs, "--bad"), + expErr: errors.New("unknown flag"), + }, + "missing pool ID": { + args: baseArgs[:len(baseArgs)-1], + expErr: errors.New("no pool UUID or label supplied"), + }, + "connect fails": { + args: baseArgs, + expErr: errors.New("whoops"), + setup: func(t *testing.T) { + prevErr := poolConnectErr + t.Cleanup(func() { + poolConnectErr = prevErr + }) + poolConnectErr = errors.New("whoops") + }, + }, + "all set (long)": { + args: test.JoinArgs(baseArgs, "--show-enabled", "--health-only"), + expArgs: poolQueryCmd{ + ShowEnabledRanks: true, + HealthOnly: true, + }, + }, + "all set (short)": { + args: test.JoinArgs(baseArgs, "-e", "-t"), + expArgs: poolQueryCmd{ + ShowEnabledRanks: true, + HealthOnly: true, + }, + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(api.ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + + runCmdTest(t, tc.args, tc.expArgs, tc.expErr, "Pool.Query") + }) + } +} + +func TestDaos_poolQueryTargetsCmd(t *testing.T) { + baseArgs := test.JoinArgs(nil, "pool", "query-targets", defaultPoolInfo.Label) + + for name, tc := range map[string]struct { + args []string + expErr error + expArgs poolQueryTargetsCmd + setup func(t *testing.T) + }{ + "invalid flag": { + args: test.JoinArgs(baseArgs, "--rank=2", "--bad"), + expErr: errors.New("unknown flag"), + }, + "missing pool ID": { + args: test.JoinArgs(baseArgs[:len(baseArgs)-1], "--rank=2"), + expErr: errors.New("no pool UUID or label supplied"), + }, + "missing rank argument": { + args: baseArgs, + expErr: errors.New("required flag"), + }, + "connect fails": { + args: test.JoinArgs(baseArgs, "--rank=2"), + expErr: errors.New("whoops"), + setup: func(t *testing.T) { + prevErr := poolConnectErr + t.Cleanup(func() { + poolConnectErr = prevErr + }) + poolConnectErr = errors.New("whoops") + }, + }, + "success (rank only)": { + args: test.JoinArgs(baseArgs, "--rank=2"), + expArgs: poolQueryTargetsCmd{ + Rank: 2, + }, + }, + "success (rank and target)": { + args: test.JoinArgs(baseArgs, "--rank=2", "--target-idx=1,2"), + expArgs: poolQueryTargetsCmd{ + Rank: 2, + Targets: ui.RankSetFlag{ + RankSet: *ranklist.MustCreateRankSet("1,2"), + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(api.ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + + runCmdTest(t, tc.args, tc.expArgs, tc.expErr, "Pool.QueryTargets") + }) + } +} + +func TestDaos_poolSetAttrCmd(t *testing.T) { + baseArgs := test.JoinArgs(nil, "pool", "set-attr", defaultPoolInfo.Label) + keysOnlyArg := "key1,key2" + keyValArg := "key1:val1,key2:val2" + + for name, tc := range map[string]struct { + args []string + expErr error + expArgs poolSetAttrCmd + setup func(t *testing.T) + }{ + "invalid flag": { + args: test.JoinArgs(baseArgs, "--bad", keyValArg), + expErr: errors.New("unknown flag"), + }, + "connect fails": { + args: test.JoinArgs(baseArgs, keyValArg), + expErr: errors.New("whoops"), + setup: func(t *testing.T) { + prevErr := poolConnectErr + t.Cleanup(func() { + poolConnectErr = prevErr + }) + poolConnectErr = errors.New("whoops") + }, + }, + "missing required arguments": { + args: baseArgs, + expErr: errors.New("required argument"), + }, + "malformed required arguments": { + args: test.JoinArgs(baseArgs, keysOnlyArg), + expErr: errors.New("invalid property"), + }, + "success": { + args: test.JoinArgs(baseArgs, keyValArg), + expArgs: poolSetAttrCmd{ + Args: struct { + Attrs ui.SetPropertiesFlag `positional-arg-name:"key:val[,key:val...]" required:"1"` + }{ + Attrs: ui.SetPropertiesFlag{ + ParsedProps: map[string]string{ + "key1": "val1", + "key2": "val2", + }, + }, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(api.ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + + runCmdTest(t, tc.args, tc.expArgs, tc.expErr, "Pool.SetAttr") + }) + } +} + +func TestDaos_poolGetAttrCmd(t *testing.T) { + baseArgs := test.JoinArgs(nil, "pool", "get-attr", defaultPoolInfo.Label) + keysOnlyArg := "key1,key2" + + for name, tc := range map[string]struct { + args []string + expErr error + expArgs poolGetAttrCmd + setup func(t *testing.T) + }{ + "invalid flag": { + args: test.JoinArgs(baseArgs, "--bad"), + expErr: errors.New("unknown flag"), + }, + "missing pool ID": { + args: baseArgs[:len(baseArgs)-1], + expErr: errors.New("no pool UUID or label supplied"), + }, + "connect fails": { + args: baseArgs, + expErr: errors.New("whoops"), + setup: func(t *testing.T) { + prevErr := poolConnectErr + t.Cleanup(func() { + poolConnectErr = prevErr + }) + poolConnectErr = errors.New("whoops") + }, + }, + "malformed arguments": { + args: test.JoinArgs(baseArgs, strings.ReplaceAll(keysOnlyArg, ",", ":")), + expErr: errors.New("key cannot contain"), + }, + "unknown key(s)": { + args: test.JoinArgs(baseArgs, keysOnlyArg), + expErr: daos.Nonexistent, + }, + "success (one key)": { + args: test.JoinArgs(baseArgs, "one"), + expArgs: poolGetAttrCmd{ + Args: struct { + Attrs ui.GetPropertiesFlag `positional-arg-name:"key[,key...]"` + }{ + Attrs: ui.GetPropertiesFlag{ + ParsedProps: map[string]struct{}{ + "one": {}, + }, + }, + }, + }, + }, + "success (all keys)": { + args: baseArgs, + expArgs: poolGetAttrCmd{}, + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(api.ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + + runCmdTest(t, tc.args, tc.expArgs, tc.expErr, "Pool.GetAttr") + }) + } +} + +func TestDaos_poolDelAttrCmd(t *testing.T) { + baseArgs := test.JoinArgs(nil, "pool", "del-attr", defaultPoolInfo.Label) + keysOnlyArg := "key1,key2" + + for name, tc := range map[string]struct { + args []string + expErr error + expArgs poolDelAttrCmd + setup func(t *testing.T) + }{ + "invalid flag": { + args: test.JoinArgs(baseArgs, "--bad"), + expErr: errors.New("unknown flag"), + }, + "missing required arguments": { + args: baseArgs, + expErr: errors.New("required argument"), + }, + "connect fails": { + args: test.JoinArgs(baseArgs, keysOnlyArg), + expErr: errors.New("whoops"), + setup: func(t *testing.T) { + prevErr := poolConnectErr + t.Cleanup(func() { + poolConnectErr = prevErr + }) + poolConnectErr = errors.New("whoops") + }, + }, + "malformed arguments": { + args: test.JoinArgs(baseArgs, strings.ReplaceAll(keysOnlyArg, ",", ":")), + expErr: errors.New("key cannot contain"), + }, + "success (one key)": { + args: test.JoinArgs(baseArgs, "one"), + expArgs: poolDelAttrCmd{ + Args: struct { + Attrs ui.GetPropertiesFlag `positional-arg-name:"key[,key...]" required:"1"` + }{ + Attrs: ui.GetPropertiesFlag{ + ParsedProps: map[string]struct{}{ + "one": {}, + }, + }, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(api.ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + + runCmdTest(t, tc.args, tc.expArgs, tc.expErr, "Pool.DelAttr") + }) + } +} + +func TestDaos_poolListAttrCmd(t *testing.T) { + baseArgs := test.JoinArgs(nil, "pool", "list-attr", defaultPoolInfo.Label) + + for name, tc := range map[string]struct { + args []string + expErr error + expArgs poolListAttrsCmd + setup func(t *testing.T) + }{ + "invalid flag": { + args: test.JoinArgs(baseArgs, "--bad"), + expErr: errors.New("unknown flag"), + }, + "missing pool ID": { + args: baseArgs[:len(baseArgs)-1], + expErr: errors.New("no pool UUID or label supplied"), + }, + "connect fails": { + args: baseArgs, + expErr: errors.New("whoops"), + setup: func(t *testing.T) { + prevErr := poolConnectErr + t.Cleanup(func() { + poolConnectErr = prevErr + }) + poolConnectErr = errors.New("whoops") + }, + }, + "success": { + args: baseArgs, + expArgs: poolListAttrsCmd{}, + }, + "success (verbose, short)": { + args: test.JoinArgs(baseArgs, "-V"), + expArgs: poolListAttrsCmd{ + Verbose: true, + }, + }, + "success (verbose, long)": { + args: test.JoinArgs(baseArgs, "--verbose"), + expArgs: poolListAttrsCmd{ + Verbose: true, + }, + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(api.ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + + runCmdTest(t, tc.args, tc.expArgs, tc.expErr, "Pool.ListAttrs") + }) + } +} diff --git a/src/control/cmd/daos/pretty/pool.go b/src/control/cmd/daos/pretty/pool.go index 30ba143b0a0..483d50ebe12 100644 --- a/src/control/cmd/daos/pretty/pool.go +++ b/src/control/cmd/daos/pretty/pool.go @@ -1,6 +1,7 @@ // // (C) Copyright 2020-2024 Intel Corporation. // (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -63,7 +64,7 @@ func printPoolTiersMdOnSsd(memFileBytes uint64, suss []*daos.StorageUsageStats, } // PrintPoolInfo generates a human-readable representation of the supplied -// PoolQueryResp struct and writes it to the supplied io.Writer. +// PoolInfo struct and writes it to the supplied io.Writer. func PrintPoolInfo(pi *daos.PoolInfo, out io.Writer) error { if pi == nil { return errors.Errorf("nil %T", pi) @@ -355,3 +356,35 @@ func PrintPoolList(pools []*daos.PoolInfo, out io.Writer, verbose bool) error { return printPoolList(pools, out) } + +// PrintAttributes generates a human-readable representation of the supplied +// list of daos.Attributes and writes it to the supplied io.Writer. +func PrintAttributes(out io.Writer, header string, attrs ...*daos.Attribute) { + fmt.Fprintf(out, "%s\n", header) + + if len(attrs) == 0 { + fmt.Fprintln(out, " No attributes found.") + return + } + + nameTitle := "Name" + valueTitle := "Value" + titles := []string{nameTitle} + + table := []txtfmt.TableRow{} + for _, attr := range attrs { + row := txtfmt.TableRow{} + row[nameTitle] = attr.Name + if len(attr.Value) != 0 { + row[valueTitle] = string(attr.Value) + if len(titles) == 1 { + titles = append(titles, valueTitle) + } + } + table = append(table, row) + } + + tf := txtfmt.NewTableFormatter(titles...) + tf.InitWriter(out) + tf.Format(table) +} diff --git a/src/control/cmd/daos/stubbed.go b/src/control/cmd/daos/stubbed.go index 4a08ad77255..000e8be5a20 100644 --- a/src/control/cmd/daos/stubbed.go +++ b/src/control/cmd/daos/stubbed.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -12,4 +13,6 @@ import "github.com/daos-stack/daos/src/control/lib/daos/api" var ( RunSelfTest = api.RunSelfTest + GetPoolList = api.GetPoolList + PoolConnect = api.PoolConnect ) diff --git a/src/control/cmd/daos/util.go b/src/control/cmd/daos/util.go index d5b128bf9a4..7d1f5b15ea8 100644 --- a/src/control/cmd/daos/util.go +++ b/src/control/cmd/daos/util.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -336,3 +337,11 @@ func _writeDunsPath(path, ct string, poolUUID uuid.UUID, contUUID uuid.UUID) err return nil } + +func attrListFromNames(names []string) daos.AttributeList { + attrs := make(daos.AttributeList, len(names)) + for i, name := range names { + attrs[i] = &daos.Attribute{Name: name} + } + return attrs +} diff --git a/src/control/cmd/daos/util_test.go b/src/control/cmd/daos/util_test.go new file mode 100644 index 00000000000..9ce7b5b5cf9 --- /dev/null +++ b/src/control/cmd/daos/util_test.go @@ -0,0 +1,71 @@ +// +// (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2025 Google LLC +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package main + +import ( + "fmt" + "os" + "reflect" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + + "github.com/daos-stack/daos/src/control/common/cmdutil" + "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/lib/daos/api" + "github.com/daos-stack/daos/src/control/lib/ranklist" + "github.com/daos-stack/daos/src/control/lib/ui" + "github.com/daos-stack/daos/src/control/logging" +) + +// Lock the api test stubs to avoid any inter-package test interference. +func TestMain(m *testing.M) { + api.LockTestStubs() + api.ResetTestStubs() + defer api.UnlockTestStubs() + os.Exit(m.Run()) + api.ResetTestStubs() +} + +func runCmdTest(t *testing.T, args []string, expCmd any, expErr error, cmdPath string, cmpOpts ...cmp.Option) { + t.Helper() + + var opts cliOptions + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + if err := parseOpts(args, &opts, log); err != nil { + test.CmpErr(t, expErr, err) + if expErr != nil { + return + } + } + + testCmd := reflect.ValueOf(opts) + for _, subCmd := range strings.Split(cmdPath, ".") { + testCmd = testCmd.FieldByName(subCmd) + if !testCmd.IsValid() || testCmd.IsZero() { + t.Fatalf("failed to select subcommand struct using %q", cmdPath) + } + } + + cmpOpts = append(cmpOpts, []cmp.Option{ + cmpopts.IgnoreUnexported(ui.GetPropertiesFlag{}, ui.SetPropertiesFlag{}, ui.PropertiesFlag{}), + cmpopts.IgnoreUnexported(testCmd.Interface()), + cmpopts.IgnoreTypes(cmdutil.LogCmd{}, cmdutil.JSONOutputCmd{}), + cmp.Comparer(func(a, b ranklist.RankSet) bool { + return a.String() == b.String() + }), + cmp.Comparer(func(a, b ui.ByteSizeFlag) bool { + return a.String() == b.String() + }), + }...) + test.CmpAny(t, fmt.Sprintf("%s args", cmdPath), expCmd, testCmd.Interface(), cmpOpts...) +} diff --git a/src/control/cmd/ddb/commands_wrapper.go b/src/control/cmd/ddb/commands_wrapper.go index e19cff9a51f..c8ab7e5543e 100644 --- a/src/control/cmd/ddb/commands_wrapper.go +++ b/src/control/cmd/ddb/commands_wrapper.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -204,7 +205,7 @@ func ddbVeaUpdate(ctx *DdbContext, offset string, blk_cnt string) error { func ddbDtxActCommit(ctx *DdbContext, path string, dtx_id string) error { /* Set up the options */ - options := C.struct_dtx_act_commit_options{} + options := C.struct_dtx_act_options{} options.path = C.CString(path) defer freeString(options.path) options.dtx_id = C.CString(dtx_id) @@ -215,7 +216,7 @@ func ddbDtxActCommit(ctx *DdbContext, path string, dtx_id string) error { func ddbDtxActAbort(ctx *DdbContext, path string, dtx_id string) error { /* Set up the options */ - options := C.struct_dtx_act_abort_options{} + options := C.struct_dtx_act_options{} options.path = C.CString(path) defer freeString(options.path) options.dtx_id = C.CString(dtx_id) @@ -256,3 +257,14 @@ func ddbRmPool(ctx *DdbContext, path string) error { /* Run the c code command */ return daosError(C.ddb_run_rm_pool(&ctx.ctx, &options)) } + +func ddbDtxActDiscardInvalid(ctx *DdbContext, path string, dtx_id string) error { + /* Set up the options */ + options := C.struct_dtx_act_options{} + options.path = C.CString(path) + defer freeString(options.path) + options.dtx_id = C.CString(dtx_id) + defer freeString(options.dtx_id) + /* Run the c code command */ + return daosError(C.ddb_run_dtx_act_discard_invalid(&ctx.ctx, &options)) +} diff --git a/src/control/cmd/ddb/ddb_commands.go b/src/control/cmd/ddb/ddb_commands.go index b87ea681dc4..890b98d6c72 100644 --- a/src/control/cmd/ddb/ddb_commands.go +++ b/src/control/cmd/ddb/ddb_commands.go @@ -1,5 +1,6 @@ // // (C) Copyright 2022-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP. // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -332,4 +333,20 @@ the path must include the extent, otherwise, it must not.`, }, Completer: rmPoolCompleter, }) + // Command: dtx_act_discard_invalid + app.AddCommand(&grumble.Command{ + Name: "dtx_act_discard_invalid", + Aliases: nil, + Help: "Discard the active DTX entry's records if invalid.", + LongHelp: "", + HelpGroup: "vos", + Args: func(a *grumble.Args) { + a.String("path", "VOS tree path to a container.") + a.String("dtx_id", "DTX id of the entry to validate or 'all' to validate all active DTX entries.") + }, + Run: func(c *grumble.Context) error { + return ddbDtxActDiscardInvalid(ctx, c.Args.String("path"), c.Args.String("dtx_id")) + }, + Completer: nil, + }) } diff --git a/src/control/cmd/dmg/pool.go b/src/control/cmd/dmg/pool.go index 155b49030af..3ee830bdd12 100644 --- a/src/control/cmd/dmg/pool.go +++ b/src/control/cmd/dmg/pool.go @@ -1,6 +1,7 @@ // // (C) Copyright 2019-2024 Intel Corporation. // (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -700,8 +701,8 @@ func (cmd *poolQueryCmd) Execute(args []string) error { type poolQueryTargetsCmd struct { poolCmd - Rank uint32 `long:"rank" required:"1" description:"Engine rank of the targets to be queried"` - Targets string `long:"target-idx" description:"Comma-separated list of target idx(s) to be queried"` + Rank uint32 `long:"rank" required:"1" description:"Engine rank of the target(s) to be queried"` + Targets ui.RankSetFlag `long:"target-idx" description:"Comma-separated list of target index(es) to be queried (default: all)"` } // Execute is run when PoolQueryTargetsCmd subcommand is activated @@ -709,11 +710,7 @@ func (cmd *poolQueryTargetsCmd) Execute(args []string) error { ctx := cmd.MustLogCtx() var tgtsList []uint32 - if len(cmd.Targets) > 0 { - if err := common.ParseNumberList(cmd.Targets, &tgtsList); err != nil { - return errors.WithMessage(err, "parsing target list") - } - } else { + if cmd.Targets.RankSet.Count() == 0 { pi, err := control.PoolQuery(ctx, cmd.ctlInvoker, &control.PoolQueryReq{ ID: cmd.PoolID().String(), QueryMask: daos.DefaultPoolQueryMask, @@ -728,6 +725,11 @@ func (cmd *poolQueryTargetsCmd) Execute(args []string) error { for i := uint32(0); i < tgtCount; i++ { tgtsList = append(tgtsList, i) } + } else { + tgtsList = make([]uint32, cmd.Targets.RankSet.Count()) + for i, rank := range cmd.Targets.RankSet.Ranks() { + tgtsList[i] = uint32(rank) + } } req := &control.PoolQueryTargetReq{ diff --git a/src/control/common/test/utils.go b/src/control/common/test/utils.go index ee685bf568a..aabcccbc04e 100644 --- a/src/control/common/test/utils.go +++ b/src/control/common/test/utils.go @@ -1,5 +1,6 @@ // // (C) Copyright 2018-2024 Intel Corporation. +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -433,3 +434,11 @@ func MustLogContext(t *testing.T, log logging.Logger) context.Context { } return ctx } + +// JoinArgs creates a new string slice from a base string and optional +// additional string arguments. Does not modify the base string. +func JoinArgs(base []string, args ...string) []string { + joined := make([]string, len(base)) + copy(joined, base) + return append(joined, args...) +} diff --git a/src/control/lib/daos/api/api.go b/src/control/lib/daos/api/api.go index 51ef20e669a..add21b6da57 100644 --- a/src/control/lib/daos/api/api.go +++ b/src/control/lib/daos/api/api.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -26,10 +27,6 @@ type ( } ) -func daosError(rc C.int) error { - return daos.ErrorFromRC(int(rc)) -} - func (api *api) isInitialized() bool { api.RLock() defer api.RUnlock() diff --git a/src/control/lib/daos/api/api_test.go b/src/control/lib/daos/api/api_test.go new file mode 100644 index 00000000000..5f4e1672b27 --- /dev/null +++ b/src/control/lib/daos/api/api_test.go @@ -0,0 +1,28 @@ +// +// (C) Copyright 2025 Google LLC +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package api + +import ( + "os" + "testing" +) + +// TestMain defines package-level setup and teardown logic. +// NB: Any packages that run tests which depend on the API stubs +// should copy this function in order to avoid interference +// between parallel tests. Go parallelizes testing across +// packages (but not within a package) by default. +// +// Long-term, this should be phased out as API users should mock +// the API instead of relying on the stubs. +func TestMain(m *testing.M) { + LockTestStubs() + ResetTestStubs() + defer UnlockTestStubs() + os.Exit(m.Run()) + ResetTestStubs() +} diff --git a/src/control/lib/daos/api/attribute.go b/src/control/lib/daos/api/attribute.go new file mode 100644 index 00000000000..79e2630069e --- /dev/null +++ b/src/control/lib/daos/api/attribute.go @@ -0,0 +1,268 @@ +// +// (C) Copyright 2018-2024 Intel Corporation. +// (C) Copyright 2025 Google LLC +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package api + +import ( + "unsafe" + + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/lib/daos" +) + +/* +#include +#include + +#include +*/ +import "C" + +type attrType int + +const ( + poolAttr attrType = iota + contAttr +) + +func (at attrType) String() string { + switch at { + case poolAttr: + return "pool" + case contAttr: + return "container" + default: + return "unknown" + } +} + +func listDaosAttributes(hdl C.daos_handle_t, at attrType) ([]string, error) { + var rc C.int + expectedSize, totalSize := C.size_t(0), C.size_t(0) + + switch at { + case poolAttr: + rc = daos_pool_list_attr(hdl, nil, &totalSize, nil) + /*case contAttr: + rc = daos_cont_list_attr(hdl, nil, &totalSize, nil)*/ + default: + return nil, errors.Wrapf(daos.InvalidInput, "unknown attr type %d", at) + } + if err := daosError(rc); err != nil { + return nil, errors.Wrapf(err, "failed to list %s attributes", at) + } + + if totalSize < 1 { + return nil, nil + } + + attrNames := []string{} + expectedSize = totalSize + cNamesBuf := C.malloc(totalSize) + defer C.free(cNamesBuf) + + switch at { + case poolAttr: + rc = daos_pool_list_attr(hdl, (*C.char)(cNamesBuf), &totalSize, nil) + /*case contAttr: + rc = daos_cont_list_attr(hdl, (*C.char)(buf), &totalSize, nil)*/ + default: + return nil, errors.Wrapf(daos.InvalidInput, "unknown attr type %d", at) + } + if err := daosError(rc); err != nil { + return nil, errors.Wrapf(err, "failed to list %s attributes", at) + } + + if err := iterStringsBuf(cNamesBuf, expectedSize, func(name string) { + attrNames = append(attrNames, name) + }); err != nil { + return nil, err + } + + return attrNames, nil +} + +// getDaosAttributes fetches the values for the given list of attribute names. +// Uses the bulk attribute fetch API to minimize roundtrips. +func getDaosAttributes(hdl C.daos_handle_t, at attrType, reqAttrNames []string) (daos.AttributeList, error) { + if len(reqAttrNames) == 0 { + attrNameList, err := listDaosAttributes(hdl, at) + if err != nil { + return nil, errors.Wrapf(err, "failed to list %s attributes", at) + } + reqAttrNames = attrNameList + } + numAttr := len(reqAttrNames) + + if numAttr == 0 { + return nil, nil + } + + // First, build a slice of C strings for the requested attribute names. + cAttrNames := make([]*C.char, numAttr) + for i, name := range reqAttrNames { + if name == "" { + return nil, errors.Wrapf(daos.InvalidInput, "empty %s attribute name at index %d", at, i) + } + cAttrNames[i] = C.CString(name) + } + defer func(nameSlice []*C.char) { + for _, name := range nameSlice { + freeString(name) + } + }(cAttrNames) + + // Next, create a slice of C.size_t entries to hold the sizes of the values. + // We have to do this first in order to know the buffer sizes to allocate + // before fetching the actual values. + cAttrSizes := make([]C.size_t, numAttr) + var rc C.int + switch at { + case poolAttr: + rc = daos_pool_get_attr(hdl, C.int(numAttr), &cAttrNames[0], nil, &cAttrSizes[0], nil) + /*case contAttr: + rc = daos_cont_get_attr(hdl, C.int(numAttr), &attrNames[0], nil, &attrSizes[0], nil)*/ + default: + return nil, errors.Wrapf(daos.InvalidInput, "unknown attr type %d", at) + } + if err := daosError(rc); err != nil { + return nil, errors.Wrapf(err, "failed to get %s attribute sizes", at) + } + + // Now, create a slice of buffers to hold the values. + cAttrValues := make([]unsafe.Pointer, numAttr) + defer func(valueSlice []unsafe.Pointer) { + for _, value := range valueSlice { + C.free(value) + } + }(cAttrValues) + for i, size := range cAttrSizes { + if size < 1 { + return nil, errors.Wrapf(daos.MiscError, "failed to get %s attribute %s: size is %d", at, reqAttrNames[i], size) + } + + cAttrValues[i] = C.malloc(size) + } + + // Do the actual fetch of all values in one go. + switch at { + case poolAttr: + rc = daos_pool_get_attr(hdl, C.int(numAttr), &cAttrNames[0], &cAttrValues[0], &cAttrSizes[0], nil) + /*case contAttr: + rc = daos_cont_get_attr(hdl, C.int(numAttr), &attrNames[0], &attrValues[0], &attrSizes[0], nil)*/ + default: + return nil, errors.Wrapf(daos.InvalidInput, "unknown attr type %d", at) + } + if err := daosError(rc); err != nil { + return nil, errors.Wrapf(err, "failed to get %s attribute values", at) + } + + // Finally, create a slice of attribute structs to hold the results. + // Note that we are copying the values into Go-managed byte slices + // for safety and simplicity so that we can free the C memory as soon + // as this function exits. + attrs := make([]*daos.Attribute, numAttr) + for i, name := range reqAttrNames { + attrs[i] = &daos.Attribute{ + Name: name, + Value: C.GoBytes(cAttrValues[i], C.int(cAttrSizes[i])), + } + } + + return attrs, nil +} + +// setDaosAttributes sets the values for the given list of attribute names. +// Uses the bulk attribute set API to minimize roundtrips. +func setDaosAttributes(hdl C.daos_handle_t, at attrType, attrs daos.AttributeList) error { + if len(attrs) == 0 { + return errors.Wrapf(daos.InvalidInput, "no %s attributes provided", at) + } + + // First, build a slice of C strings for the attribute names. + attrNames := make([]*C.char, len(attrs)) + for i, attr := range attrs { + if attr == nil { + return errors.Wrapf(daos.InvalidInput, "nil %s attribute at index %d", at, i) + } + if attr.Name == "" { + return errors.Wrapf(daos.InvalidInput, "empty %s attribute name at index %d", at, i) + } + attrNames[i] = C.CString(attr.Name) + } + defer func(nameSlice []*C.char) { + for _, name := range nameSlice { + freeString(name) + } + }(attrNames) + + // Next, create a slice of C.size_t entries to hold the sizes of the values, + // and a slice of pointers to the actual values. + attrSizes := make([]C.size_t, len(attrs)) + attrValues := make([]unsafe.Pointer, len(attrs)) + for i, attr := range attrs { + attrSizes[i] = C.size_t(len(attr.Value)) + if attrSizes[i] == 0 { + return errors.Wrapf(daos.InvalidInput, "empty %s attribute value at index %d", at, i) + } + // NB: We are copying the values into C memory for safety and simplicity. + attrValues[i] = C.malloc(attrSizes[i]) + valSlice := unsafe.Slice((*byte)(attrValues[i]), attrSizes[i]) + copy(valSlice[:], attr.Value) + } + defer func(bufSlice []unsafe.Pointer) { + for _, buf := range bufSlice { + C.free(buf) + } + }(attrValues) + + attrCount := C.int(len(attrs)) + var rc C.int + switch at { + case poolAttr: + rc = daos_pool_set_attr(hdl, attrCount, &attrNames[0], &attrValues[0], &attrSizes[0], nil) + /*case contAttr: + rc = daos_cont_set_attr(hdl, attrCount, &attrNames[0], &valBufs[0], &valSizes[0], nil)*/ + default: + return errors.Wrapf(daos.InvalidInput, "unknown attr type %d", at) + } + + return errors.Wrapf(daosError(rc), "failed to set %s attributes", at) +} + +// delDaosAttributes deletes the given attributes. +func delDaosAttributes(hdl C.daos_handle_t, at attrType, names []string) error { + if len(names) == 0 { + return errors.Wrapf(daos.InvalidInput, "no %s attribute names provided", at) + } + + attrNames := make([]*C.char, len(names)) + for i, name := range names { + if name == "" { + return errors.Wrapf(daos.InvalidInput, "empty %s attribute name at index %d", at, i) + } + attrNames[i] = C.CString(name) + } + defer func(nameSlice []*C.char) { + for _, name := range nameSlice { + freeString(name) + } + }(attrNames) + + var rc C.int + switch at { + case poolAttr: + rc = daos_pool_del_attr(hdl, C.int(len(attrNames)), &attrNames[0], nil) + /*case contAttr: + rc = daos_cont_del_attr(hdl, 1, &attrName, nil)*/ + default: + return errors.Wrapf(daos.InvalidInput, "unknown attr type %d", at) + } + + return errors.Wrapf(daosError(rc), "failed to delete %s attributes", at) +} diff --git a/src/control/lib/daos/api/errors.go b/src/control/lib/daos/api/errors.go index 6d1b4b665e3..623b61243c0 100644 --- a/src/control/lib/daos/api/errors.go +++ b/src/control/lib/daos/api/errors.go @@ -1,13 +1,62 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // package api -import "github.com/pkg/errors" +import ( + "context" + + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/lib/daos" +) + +/* +#include +*/ +import "C" var ( - ErrNoSystemRanks = errors.New("no ranks in system") + ErrNoSystemRanks = errors.New("no ranks in system") + ErrContextHandleConflict = errors.New("context already contains a handle for a different pool or container") + ErrInvalidPoolHandle = errors.New("pool handle is nil or invalid") + + errInvalidContainerHandle = errors.New("container handle is nil or invalid") + errNilCtx = errors.New("nil context") + errNoCtxHdl = errors.New("no handle in context") ) + +// dfsError converts a return code from a DFS API +// call to a Go error. +func dfsError(rc C.int) error { + if rc == 0 { + return nil + } + + strErr := C.strerror(rc) + return errors.Errorf("DFS error %d: %s", rc, C.GoString(strErr)) +} + +// daosError converts a return code from a DAOS API +// call to a Go error. +func daosError(rc C.int) error { + return daos.ErrorFromRC(int(rc)) +} + +// ctxErr recasts a context error as a DAOS error. +func ctxErr(err error) error { + switch { + case err == nil: + return nil + case errors.Is(err, context.Canceled): + return errors.Wrap(daos.Canceled, "DAOS API context canceled") + case errors.Is(err, context.DeadlineExceeded): + return errors.Wrap(daos.TimedOut, "DAOS API context deadline exceeded") + default: + return errors.Wrap(daos.MiscError, "DAOS API context error") + } +} diff --git a/src/control/lib/daos/api/handle.go b/src/control/lib/daos/api/handle.go new file mode 100644 index 00000000000..e729b7401e5 --- /dev/null +++ b/src/control/lib/daos/api/handle.go @@ -0,0 +1,92 @@ +// +// (C) Copyright 2025 Google LLC +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package api + +import ( + "fmt" + "unsafe" + + "github.com/google/uuid" + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/logging" +) + +/* +#include + +#cgo LDFLAGS: -ldaos_common +*/ +import "C" + +const ( + MissingPoolLabel = "" + MissingContainerLabel = "" +) + +type ( + // ctxHdlKey is a type used for storing handles as context values. + ctxHdlKey string + + // connHandle is an opaque type used to represent a DAOS connection (pool or container). + connHandle struct { + UUID uuid.UUID + Label string + daosHandle C.daos_handle_t + } +) + +// invalidate clears the handle so that it cannot be reused inadvertently. +func (ch *connHandle) invalidate() { + if ch == nil { + return + } + ch.UUID = uuid.Nil + ch.Label = "" + ch.daosHandle.cookie = 0 +} + +// FillHandle copies the handle to the supplied pointer, +// which must be a reference to a C.daos_handle_t. +// NB: Caller is responsible for keeping the copy in sync with +// this handle -- use of this method should be discouraged as +// it is provided for compatibility with older code that calls +// into libdaos directly. +func (ch *connHandle) FillHandle(cHandle unsafe.Pointer) error { + if ch == nil || cHandle == nil { + return errors.New("invalid handle") + } + (*C.daos_handle_t)(cHandle).cookie = ch.daosHandle.cookie + + return nil +} + +// IsValid returns true if the pool or container handle is valid. +func (ch *connHandle) IsValid() bool { + if ch == nil { + return false + } + return bool(daos_handle_is_valid(ch.daosHandle)) +} + +// ID returns the label if available, otherwise the UUID. +func (ch *connHandle) ID() string { + id := ch.Label + if id == "" || id == MissingPoolLabel || id == MissingContainerLabel { + id = ch.UUID.String() + } + + return id +} + +func (ch *connHandle) String() string { + id := ch.Label + if id == "" || id == MissingPoolLabel || id == MissingContainerLabel { + id = logging.ShortUUID(ch.UUID) + } + return fmt.Sprintf("%s:%t", id, ch.IsValid()) +} diff --git a/src/control/lib/daos/api/libdaos.go b/src/control/lib/daos/api/libdaos.go index d7c6bfed82d..426507b98ad 100644 --- a/src/control/lib/daos/api/libdaos.go +++ b/src/control/lib/daos/api/libdaos.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -12,10 +13,12 @@ package api #include #include #include +#include #cgo LDFLAGS: -lcart -lgurt -ldaos -ldaos_common */ import "C" +import "unsafe" func daos_init() C.int { return C.daos_init() @@ -29,6 +32,10 @@ func dc_agent_fini() { C.dc_agent_fini() } +func daos_handle_is_valid(handle C.daos_handle_t) C.bool { + return C.daos_handle_is_valid(handle) +} + func daos_mgmt_get_sys_info(sys *C.char, sys_info **C.struct_daos_sys_info) C.int { return C.daos_mgmt_get_sys_info(sys, sys_info) } @@ -36,3 +43,51 @@ func daos_mgmt_get_sys_info(sys *C.char, sys_info **C.struct_daos_sys_info) C.in func daos_mgmt_put_sys_info(sys_info *C.struct_daos_sys_info) { C.daos_mgmt_put_sys_info(sys_info) } + +func daos_pool_connect(poolID *C.char, sys *C.char, flags C.uint32_t, poolHdl *C.daos_handle_t, poolInfo *C.daos_pool_info_t, ev *C.struct_daos_event) C.int { + return C.daos_pool_connect(poolID, sys, flags, poolHdl, poolInfo, ev) +} + +func daos_pool_disconnect(poolHdl C.daos_handle_t) C.int { + // Hack for NLT fault injection testing: If the rc + // is -DER_NOMEM, retry once in order to actually + // shut down and release resources. + rc := C.daos_pool_disconnect(poolHdl, nil) + if rc == -C.DER_NOMEM { + rc = C.daos_pool_disconnect(poolHdl, nil) + // DAOS-8866, daos_pool_disconnect() might have failed, but worked anyway. + if rc == -C.DER_NO_HDL { + rc = -C.DER_SUCCESS + } + } + + return rc +} + +func daos_pool_query(poolHdl C.daos_handle_t, rankList **C.d_rank_list_t, poolInfo *C.daos_pool_info_t, props *C.daos_prop_t, ev *C.struct_daos_event) C.int { + return C.daos_pool_query(poolHdl, rankList, poolInfo, props, ev) +} + +func daos_pool_query_target(poolHdl C.daos_handle_t, tgt C.uint32_t, rank C.uint32_t, info *C.daos_target_info_t, ev *C.struct_daos_event) C.int { + return C.daos_pool_query_target(poolHdl, tgt, rank, info, ev) +} + +func daos_pool_list_attr(poolHdl C.daos_handle_t, buf *C.char, size *C.size_t, ev *C.struct_daos_event) C.int { + return C.daos_pool_list_attr(poolHdl, buf, size, ev) +} + +func daos_pool_get_attr(poolHdl C.daos_handle_t, n C.int, names **C.char, values *unsafe.Pointer, sizes *C.size_t, ev *C.struct_daos_event) C.int { + return C.daos_pool_get_attr(poolHdl, n, names, values, sizes, ev) +} + +func daos_pool_set_attr(poolHdl C.daos_handle_t, n C.int, names **C.char, values *unsafe.Pointer, sizes *C.size_t, ev *C.struct_daos_event) C.int { + return C.daos_pool_set_attr(poolHdl, n, names, values, sizes, ev) +} + +func daos_pool_del_attr(poolHdl C.daos_handle_t, n C.int, name **C.char, ev *C.struct_daos_event) C.int { + return C.daos_pool_del_attr(poolHdl, n, name, ev) +} + +func daos_mgmt_list_pools(sysName *C.char, poolCount *C.daos_size_t, pools *C.daos_mgmt_pool_info_t, ev *C.struct_daos_event) C.int { + return C.daos_mgmt_list_pools(sysName, poolCount, pools, ev) +} diff --git a/src/control/lib/daos/api/libdaos_attr_stubs.go b/src/control/lib/daos/api/libdaos_attr_stubs.go new file mode 100644 index 00000000000..f3446fa56ee --- /dev/null +++ b/src/control/lib/daos/api/libdaos_attr_stubs.go @@ -0,0 +1,175 @@ +// +// (C) Copyright 2025 Google LLC +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build test_stubs +// +build test_stubs + +package api + +import ( + "unsafe" + + "github.com/daos-stack/daos/src/control/lib/daos" +) + +import "C" + +var ( + daos_default_AttrList daos.AttributeList = daos.AttributeList{ + { + Name: "one", + Value: []byte("1"), + }, + { + Name: "two", + Value: []byte("2"), + }, + { + Name: "three", + Value: []byte("3"), + }, + } +) + +func daos_test_get_mappedNames(nameMap map[string]struct{}) []string { + names := make([]string, 0, len(nameMap)) + for name := range nameMap { + names = append(names, name) + } + return names +} + +func list_attrs(buf *C.char, size *C.size_t, RCList []C.int, CallCount *int, RC C.int, AttrList daos.AttributeList) C.int { + if len(RCList) > 0 { + rc := RCList[*CallCount] + *CallCount++ + if rc != 0 { + return rc + } + } + if RC != 0 { + return RC + } + + bufSize := 0 + for _, attr := range AttrList { + bufSize += len(attr.Name) + 1 + } + *size = C.size_t(bufSize) + + if buf == nil { + return RC + } + + bufSlice := unsafe.Slice((*C.char)(buf), bufSize) + bufPtr := 0 + for _, attr := range AttrList { + for i := 0; i < len(attr.Name); i++ { + bufSlice[bufPtr] = C.char(attr.Name[i]) + bufPtr++ + } + bufSlice[bufPtr] = C.char(0) + bufPtr++ + } + + return RC +} + +func get_attr(n C.int, names **C.char, values *unsafe.Pointer, sizes *C.size_t, + RCList []C.int, CallCount *int, RC C.int, AttrList daos.AttributeList, SetN *int, ReqNames *map[string]struct{}) C.int { + if len(RCList) > 0 { + rc := RCList[*CallCount] + *CallCount++ + if rc != 0 { + return rc + } + } + if RC != 0 { + return RC + } + + *SetN = int(n) + *ReqNames = make(map[string]struct{}) + cReqNames := unsafe.Slice(names, n) + for i := 0; i < int(n); i++ { + reqNames := *ReqNames + reqNames[C.GoString(cReqNames[i])] = struct{}{} + } + + if len(*ReqNames) > 0 && len(AttrList) == 0 { + return -C.int(daos.Nonexistent) + } + + attrListMap := AttrList.AsMap() + reqAttrCt := 0 + for attrName := range *ReqNames { + if _, ok := attrListMap[attrName]; !ok { + return -C.int(daos.Nonexistent) + } + reqAttrCt++ + } + + if reqAttrCt == 0 { + return RC + } + + var valuesSlice []unsafe.Pointer + if values != nil { + valuesSlice = unsafe.Slice(values, reqAttrCt) + } + sizesSlice := unsafe.Slice(sizes, reqAttrCt) + idx := 0 + for _, attr := range AttrList { + reqNames := *ReqNames + if _, ok := reqNames[attr.Name]; !ok { + continue + } + sizesSlice[idx] = C.size_t(len(attr.Value)) + if values != nil { + valSlice := unsafe.Slice((*byte)(valuesSlice[idx]), sizesSlice[idx]) + copy(valSlice[:], attr.Value) + } + idx++ + } + + return RC +} + +func set_attr(n C.int, names **C.char, values *unsafe.Pointer, sizes *C.size_t, RC C.int, AttrList *daos.AttributeList) C.int { + if RC != 0 { + return RC + } + + namesSlice := unsafe.Slice(names, n) + valuesSlice := unsafe.Slice(values, n) + sizesSlice := unsafe.Slice(sizes, n) + attrList := *AttrList + for i := 0; i < int(n); i++ { + valueSlice := unsafe.Slice((*byte)(valuesSlice[i]), sizesSlice[i]) + attrList = append(attrList, &daos.Attribute{ + Name: C.GoString(namesSlice[i]), + Value: make([]byte, sizesSlice[i]), + }) + copy(attrList[len(attrList)-1].Value, valueSlice) + } + *AttrList = attrList + + return RC +} + +func del_attr(n C.int, name **C.char, RC C.int, AttrNames *[]string) C.int { + if RC != 0 { + return RC + } + + attrNames := *AttrNames + nameSlice := unsafe.Slice(name, n) + for i := 0; i < int(n); i++ { + attrNames = append(attrNames, C.GoString(nameSlice[i])) + } + *AttrNames = attrNames + + return RC +} diff --git a/src/control/lib/daos/api/libdaos_pool_stubs.go b/src/control/lib/daos/api/libdaos_pool_stubs.go new file mode 100644 index 00000000000..308c4533a8e --- /dev/null +++ b/src/control/lib/daos/api/libdaos_pool_stubs.go @@ -0,0 +1,474 @@ +// +// (C) Copyright 2025 Google LLC +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build test_stubs +// +build test_stubs + +package api + +import ( + "unsafe" + + "github.com/dustin/go-humanize" + + "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/lib/ranklist" +) + +/* +#include +#include +#include + +#include "util.h" + +static inline void +set_rebuild_state(struct daos_rebuild_status *drs, int32_t state) +{ + drs->rs_state = state; +} +*/ +import "C" + +func daos_gds2cds(sus []*daos.StorageUsageStats) C.struct_daos_space { + return C.struct_daos_space{ + s_total: [2]C.uint64_t{ + C.uint64_t(sus[0].Total), + C.uint64_t(sus[1].Total), + }, + s_free: [2]C.uint64_t{ + C.uint64_t(sus[0].Free), + C.uint64_t(sus[1].Free), + }, + } +} + +func daos_gpi2cpi(gpi *daos.PoolInfo) *C.daos_pool_info_t { + cpi := &C.daos_pool_info_t{ + pi_uuid: uuidToC(gpi.UUID), + pi_ntargets: C.uint32_t(gpi.TotalTargets), + pi_nnodes: C.uint32_t(gpi.TotalEngines), + pi_ndisabled: C.uint32_t(gpi.DisabledTargets), + pi_map_ver: C.uint32_t(gpi.Version), + pi_leader: C.uint32_t(gpi.ServiceLeader), + pi_bits: C.uint64_t(gpi.QueryMask), + pi_rebuild_st: C.struct_daos_rebuild_status{ + rs_errno: C.int32_t(gpi.Rebuild.Status), + rs_obj_nr: C.uint64_t(gpi.Rebuild.Objects), + rs_rec_nr: C.uint64_t(gpi.Rebuild.Records), + }, + pi_space: C.struct_daos_pool_space{ + ps_ntargets: C.uint32_t(gpi.ActiveTargets), + ps_space: daos_gds2cds(gpi.TierStats), + ps_free_min: [2]C.uint64_t{ + C.uint64_t(gpi.TierStats[0].Min), + C.uint64_t(gpi.TierStats[1].Min), + }, + ps_free_max: [2]C.uint64_t{ + C.uint64_t(gpi.TierStats[0].Max), + C.uint64_t(gpi.TierStats[1].Max), + }, + ps_free_mean: [2]C.uint64_t{ + C.uint64_t(gpi.TierStats[0].Mean), + C.uint64_t(gpi.TierStats[1].Mean), + }, + }, + } + + // some funky mismatch between the Go/C states... fix this later. + switch gpi.Rebuild.State { + case daos.PoolRebuildStateIdle: + cpi.pi_rebuild_st.rs_version = 0 + case daos.PoolRebuildStateBusy: + cpi.pi_rebuild_st.rs_version = 1 + C.set_rebuild_state(&cpi.pi_rebuild_st, C.DRS_IN_PROGRESS) + case daos.PoolRebuildStateDone: + cpi.pi_rebuild_st.rs_version = 1 + C.set_rebuild_state(&cpi.pi_rebuild_st, C.DRS_COMPLETED) + } + return cpi +} + +// defaultPoolInfo should be used to get a copy of the default pool info. +func defaultPoolInfo() *daos.PoolInfo { + return copyPoolInfo(&daos_default_PoolInfo) +} + +func copyPoolInfo(in *daos.PoolInfo) *daos.PoolInfo { + if in == nil { + return nil + } + + out := new(daos.PoolInfo) + *out = *in + + if in.Rebuild != nil { + out.Rebuild = new(daos.PoolRebuildStatus) + *out.Rebuild = *in.Rebuild + } + if in.TierStats != nil { + out.TierStats = make([]*daos.StorageUsageStats, len(in.TierStats)) + for i, s := range in.TierStats { + out.TierStats[i] = new(daos.StorageUsageStats) + *out.TierStats[i] = *s + } + } + if in.ServiceReplicas != nil { + out.ServiceReplicas = make([]ranklist.Rank, len(in.ServiceReplicas)) + copy(out.ServiceReplicas, in.ServiceReplicas) + } + if in.EnabledRanks != nil { + out.EnabledRanks = ranklist.NewRankSet() + out.EnabledRanks.Replace(in.EnabledRanks) + } + if in.DisabledRanks != nil { + out.DisabledRanks = ranklist.NewRankSet() + out.DisabledRanks.Replace(in.DisabledRanks) + } + + return out +} + +var ( + daos_default_pool_connect_Handle C.daos_handle_t = C.daos_handle_t{cookie: 42} + + daos_default_PoolInfo daos.PoolInfo = daos.PoolInfo{ + QueryMask: daos.DefaultPoolQueryMask, + State: daos.PoolServiceStateDegraded, + UUID: test.MockPoolUUID(1), + Label: "test-pool", + TotalTargets: 48, + TotalEngines: 3, + ActiveTargets: 32, + DisabledTargets: 16, + Version: 2, + ServiceLeader: 1, + ServiceReplicas: []ranklist.Rank{0, 1, 2}, + EnabledRanks: ranklist.MustCreateRankSet("0,2"), + DisabledRanks: ranklist.MustCreateRankSet("1"), + Rebuild: &daos.PoolRebuildStatus{ + Status: 0, + Objects: 1, + Records: 2, + State: daos.PoolRebuildStateBusy, + }, + TierStats: []*daos.StorageUsageStats{ + { + MediaType: daos.StorageMediaTypeScm, + Total: 64 * humanize.TByte, + Free: 16 * humanize.TByte, + Min: 1 * humanize.TByte, + Max: 4 * humanize.TByte, + Mean: 2 * humanize.TByte, + }, + { + MediaType: daos.StorageMediaTypeNvme, + Total: 64 * humanize.PByte, + Free: 16 * humanize.PByte, + Min: 1 * humanize.PByte, + Max: 4 * humanize.PByte, + Mean: 2 * humanize.PByte, + }, + }, + } + + daos_default_PoolQueryTargetInfo daos.PoolQueryTargetInfo = daos.PoolQueryTargetInfo{ + Type: daos.PoolQueryTargetType(1), + State: daos.PoolTargetStateUp, + Space: func() []*daos.StorageUsageStats { + tiStats := make([]*daos.StorageUsageStats, len(daos_default_PoolInfo.TierStats)) + for i, tier := range daos_default_PoolInfo.TierStats { + tiStats[i] = &daos.StorageUsageStats{ + MediaType: tier.MediaType, + Total: tier.Total, + Free: tier.Free, + } + } + return tiStats + }(), + } +) + +func defaultPoolHdl() *C.daos_handle_t { + newHdl := C.daos_handle_t{cookie: daos_default_pool_connect_Handle.cookie} + return &newHdl +} + +func reset_daos_pool_stubs() { + reset_daos_pool_connect() + reset_daos_pool_disconnect() + reset_daos_pool_query() + reset_daos_pool_query_target() + reset_daos_pool_list_attr() + reset_daos_pool_get_attr() + reset_daos_pool_set_attr() + reset_daos_pool_del_attr() + reset_daos_pool_list_cont() + + reset_daos_mgmt_list_pools() +} + +var ( + daos_pool_connect_SetPoolID string + daos_pool_connect_SetSys string + daos_pool_connect_SetFlags daos.PoolConnectFlag + daos_pool_connect_QueryMask daos.PoolQueryMask + daos_pool_connect_Handle *C.daos_handle_t = defaultPoolHdl() + daos_pool_connect_Info *daos.PoolInfo = defaultPoolInfo() + daos_pool_connect_Count int = 0 + daos_pool_connect_RC C.int = 0 +) + +func reset_daos_pool_connect() { + daos_pool_connect_SetPoolID = "" + daos_pool_connect_SetSys = "" + daos_pool_connect_SetFlags = 0 + daos_pool_connect_QueryMask = 0 + daos_pool_connect_Handle = defaultPoolHdl() + daos_pool_connect_Info = defaultPoolInfo() + daos_pool_connect_Count = 0 + daos_pool_connect_RC = 0 +} + +func daos_pool_connect(poolID *C.char, sys *C.char, flags C.uint32_t, poolHdl *C.daos_handle_t, poolInfo *C.daos_pool_info_t, ev *C.struct_daos_event) C.int { + daos_pool_connect_Count++ + if daos_pool_connect_RC != 0 { + return daos_pool_connect_RC + } + + // capture the parameters set by the test + daos_pool_connect_SetPoolID = C.GoString(poolID) + daos_pool_connect_SetSys = C.GoString(sys) + daos_pool_connect_SetFlags = daos.PoolConnectFlag(flags) + daos_pool_connect_QueryMask = daos.PoolQueryMask(poolInfo.pi_bits) + + // set the return values + poolHdl.cookie = daos_pool_connect_Handle.cookie + *poolInfo = *daos_gpi2cpi(daos_pool_connect_Info) + + return daos_pool_connect_RC +} + +var ( + daos_pool_disconnect_Count int = 0 + daos_pool_disconnect_RC C.int = 0 +) + +func reset_daos_pool_disconnect() { + daos_pool_disconnect_Count = 0 + daos_pool_disconnect_RC = 0 +} + +func daos_pool_disconnect(poolHdl C.daos_handle_t) C.int { + daos_pool_disconnect_Count++ + return daos_pool_disconnect_RC +} + +var ( + daos_pool_query_PoolInfo *daos.PoolInfo = defaultPoolInfo() + daos_pool_query_RC C.int = 0 +) + +func reset_daos_pool_query() { + daos_pool_query_PoolInfo = defaultPoolInfo() + daos_pool_query_RC = 0 +} + +func daos_pool_query(poolHdl C.daos_handle_t, rankList **C.d_rank_list_t, retPoolInfo *C.daos_pool_info_t, props *C.daos_prop_t, ev *C.struct_daos_event) C.int { + if daos_pool_query_RC != 0 { + return daos_pool_query_RC + } + + if retPoolInfo == nil { + *rankList = ranklistFromGo(daos_pool_query_PoolInfo.DisabledRanks) + return daos_pool_query_RC + } + + queryBits := retPoolInfo.pi_bits + *retPoolInfo = *daos_gpi2cpi(daos_pool_query_PoolInfo) + retPoolInfo.pi_bits = queryBits + + if queryBits&C.DPI_ENGINES_ENABLED != 0 { + *rankList = ranklistFromGo(daos_pool_query_PoolInfo.EnabledRanks) + } + if queryBits&C.DPI_ENGINES_DISABLED != 0 { + *rankList = ranklistFromGo(daos_pool_query_PoolInfo.DisabledRanks) + } + + if props != nil { + propEntries := unsafe.Slice(props.dpp_entries, props.dpp_nr) + for i := range propEntries { + switch propEntries[i].dpe_type { + case C.DAOS_PROP_PO_LABEL: + C.set_dpe_str(&propEntries[i], C.CString(daos_pool_query_PoolInfo.Label)) + case C.DAOS_PROP_PO_SVC_LIST: + rlPtr := ranklistFromGo(ranklist.RankSetFromRanks(daos_pool_query_PoolInfo.ServiceReplicas)) + C.set_dpe_val_ptr(&propEntries[i], (unsafe.Pointer)(rlPtr)) + } + } + } + + return daos_pool_query_RC +} + +var ( + daos_pool_query_target_SetTgt C.uint32_t = C.uint32_t(ranklist.NilRank) + daos_pool_query_target_SetRank C.uint32_t = C.uint32_t(ranklist.NilRank) + daos_pool_query_target_Info *daos.PoolQueryTargetInfo = &daos_default_PoolQueryTargetInfo + daos_pool_query_target_RC C.int = 0 +) + +func reset_daos_pool_query_target() { + daos_pool_query_target_SetTgt = C.uint32_t(ranklist.NilRank) + daos_pool_query_target_SetRank = C.uint32_t(ranklist.NilRank) + daos_pool_query_target_Info = &daos_default_PoolQueryTargetInfo + daos_pool_query_target_RC = 0 +} + +func daos_pool_query_target(poolHdl C.daos_handle_t, tgt C.uint32_t, rank C.uint32_t, info *C.daos_target_info_t, ev *C.struct_daos_event) C.int { + if daos_pool_query_target_RC != 0 { + return daos_pool_query_target_RC + } + + daos_pool_query_target_SetTgt = tgt + daos_pool_query_target_SetRank = rank + + info.ta_type = C.daos_target_type_t(daos_pool_query_target_Info.Type) + info.ta_state = C.daos_target_state_t(daos_pool_query_target_Info.State) + info.ta_space = daos_gds2cds(daos_pool_query_target_Info.Space) + + return daos_pool_query_target_RC +} + +var ( + daos_pool_list_attr_AttrList daos.AttributeList = daos_default_AttrList + daos_pool_list_attr_CallCount int + daos_pool_list_attr_RCList []C.int + daos_pool_list_attr_RC C.int = 0 +) + +func reset_daos_pool_list_attr() { + daos_pool_list_attr_AttrList = daos_default_AttrList + daos_pool_list_attr_CallCount = 0 + daos_pool_list_attr_RCList = nil + daos_pool_list_attr_RC = 0 +} + +func daos_pool_list_attr(poolHdl C.daos_handle_t, buf *C.char, size *C.size_t, ev *C.struct_daos_event) C.int { + return list_attrs(buf, size, daos_pool_list_attr_RCList, &daos_pool_list_attr_CallCount, daos_pool_list_attr_RC, daos_pool_list_attr_AttrList) +} + +var ( + daos_pool_get_attr_SetN int + daos_pool_get_attr_ReqNames map[string]struct{} + daos_pool_get_attr_CallCount int + daos_pool_get_attr_RCList []C.int + daos_pool_get_attr_AttrList daos.AttributeList = daos_default_AttrList + daos_pool_get_attr_RC C.int = 0 +) + +func reset_daos_pool_get_attr() { + daos_pool_get_attr_SetN = 0 + daos_pool_get_attr_ReqNames = nil + daos_pool_get_attr_CallCount = 0 + daos_pool_get_attr_RCList = nil + daos_pool_get_attr_AttrList = daos_default_AttrList + daos_pool_get_attr_RC = 0 +} + +func daos_pool_get_attr(poolHdl C.daos_handle_t, n C.int, names **C.char, values *unsafe.Pointer, sizes *C.size_t, ev *C.struct_daos_event) C.int { + return get_attr(n, names, values, sizes, daos_pool_get_attr_RCList, &daos_pool_get_attr_CallCount, daos_pool_get_attr_RC, daos_pool_get_attr_AttrList, &daos_pool_get_attr_SetN, &daos_pool_get_attr_ReqNames) +} + +var ( + daos_pool_set_attr_AttrList daos.AttributeList + daos_pool_set_attr_RC C.int = 0 +) + +func reset_daos_pool_set_attr() { + daos_pool_set_attr_AttrList = nil + daos_pool_set_attr_RC = 0 +} + +func daos_pool_set_attr(poolHdl C.daos_handle_t, n C.int, names **C.char, values *unsafe.Pointer, sizes *C.size_t, ev *C.struct_daos_event) C.int { + return set_attr(n, names, values, sizes, daos_pool_set_attr_RC, &daos_pool_set_attr_AttrList) +} + +var ( + daos_pool_del_attr_AttrNames []string + daos_pool_del_attr_RC C.int = 0 +) + +func reset_daos_pool_del_attr() { + daos_pool_del_attr_AttrNames = nil + daos_pool_del_attr_RC = 0 +} + +func daos_pool_del_attr(poolHdl C.daos_handle_t, n C.int, name **C.char, ev *C.struct_daos_event) C.int { + return del_attr(n, name, daos_pool_del_attr_RC, &daos_pool_del_attr_AttrNames) +} + +var ( + daos_pool_list_cont_RC C.int = 0 +) + +func reset_daos_pool_list_cont() { + daos_pool_list_cont_RC = 0 +} + +func daos_pool_list_cont(poolHdl C.daos_handle_t, nCont *C.daos_size_t, conts *C.struct_daos_pool_cont_info, ev *C.struct_daos_event) C.int { + if daos_pool_list_cont_RC != 0 { + return daos_pool_list_cont_RC + } + + return daos_pool_list_cont_RC +} + +var ( + daos_mgmt_list_pools_SetSys string + daos_mgmt_list_pools_RetPools []*daos.PoolInfo = []*daos.PoolInfo{defaultPoolInfo()} + daos_mgmt_list_pools_CallCount int + daos_mgmt_list_pools_RCList []C.int + daos_mgmt_list_pools_RC C.int = 0 +) + +func reset_daos_mgmt_list_pools() { + daos_mgmt_list_pools_SetSys = "" + daos_mgmt_list_pools_RetPools = []*daos.PoolInfo{defaultPoolInfo()} + daos_mgmt_list_pools_CallCount = 0 + daos_mgmt_list_pools_RCList = nil + daos_mgmt_list_pools_RC = 0 +} + +func daos_mgmt_list_pools(sysName *C.char, poolCount *C.daos_size_t, pools *C.daos_mgmt_pool_info_t, ev *C.struct_daos_event) C.int { + if len(daos_mgmt_list_pools_RCList) > 0 { + rc := daos_mgmt_list_pools_RCList[daos_mgmt_list_pools_CallCount] + daos_mgmt_list_pools_CallCount++ + if rc != 0 { + return rc + } + } + if daos_mgmt_list_pools_RC != 0 { + return daos_mgmt_list_pools_RC + } + + *poolCount = C.daos_size_t(len(daos_mgmt_list_pools_RetPools)) + + daos_mgmt_list_pools_SetSys = C.GoString(sysName) + if *poolCount == 0 || pools == nil { + return daos_mgmt_list_pools_RC + } + + poolSlice := unsafe.Slice(pools, *poolCount) + for i, pool := range daos_mgmt_list_pools_RetPools { + poolSlice[i].mgpi_uuid = uuidToC(pool.UUID) + poolSlice[i].mgpi_label = C.CString(pool.Label) + poolSlice[i].mgpi_svc = ranklistFromGo(ranklist.RankSetFromRanks(pool.ServiceReplicas)) + poolSlice[i].mgpi_ldr = C.d_rank_t(pool.ServiceLeader) + } + + return daos_mgmt_list_pools_RC +} diff --git a/src/control/lib/daos/api/libdaos_stubs.go b/src/control/lib/daos/api/libdaos_stubs.go index 341b90bdd34..20ae8301b9b 100644 --- a/src/control/lib/daos/api/libdaos_stubs.go +++ b/src/control/lib/daos/api/libdaos_stubs.go @@ -1,5 +1,6 @@ // // (C) Copyright 2024 Intel Corporation. +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -18,6 +19,9 @@ import ( /* #include #include + +#include "util.h" + */ import "C" @@ -33,6 +37,14 @@ func daos_fini() {} func dc_agent_fini() {} +var ( + daos_handle_is_valid_Bool C.bool = true +) + +func daos_handle_is_valid(handle C.daos_handle_t) C.bool { + return daos_handle_is_valid_Bool +} + var ( defaultSystemInfo *daos.SystemInfo = &daos.SystemInfo{ Name: build.DefaultSystemName, diff --git a/src/control/lib/daos/api/pool.go b/src/control/lib/daos/api/pool.go new file mode 100644 index 00000000000..9f81dfd7547 --- /dev/null +++ b/src/control/lib/daos/api/pool.go @@ -0,0 +1,682 @@ +// +// (C) Copyright 2025 Google LLC +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package api + +import ( + "context" + "unsafe" + + "github.com/google/uuid" + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/build" + "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/lib/ranklist" + "github.com/daos-stack/daos/src/control/logging" +) + +/* +#include +#include +#include + +#include "util.h" + +static inline uint32_t +get_rebuild_state(struct daos_rebuild_status *drs) +{ + if (drs == NULL) + return 0; + + return drs->rs_state; +} +*/ +import "C" + +type ( + // PoolHandle is an opaque type used to represent a DAOS Pool connection. + PoolHandle struct { + connHandle + } +) + +const ( + poolHandleKey ctxHdlKey = "poolHandle" +) + +// phFromContext retrieves the PoolHandle from the supplied context, if available. +func phFromCtx(ctx context.Context) (*PoolHandle, error) { + if ctx == nil { + return nil, errNilCtx + } + + ph, ok := ctx.Value(poolHandleKey).(*PoolHandle) + if !ok { + return nil, errNoCtxHdl + } + + return ph, nil +} + +// toCtx returns a new context with the PoolHandle stashed in it. +// NB: Will panic if the context already has a different PoolHandle stashed. +func (ph *PoolHandle) toCtx(ctx context.Context) context.Context { + if ph == nil { + return ctx + } + + stashed, _ := phFromCtx(ctx) + if stashed != nil { + if stashed.UUID() == ph.UUID() { + return ctx + } + panic("attempt to stash different PoolHandle in context") + } + + return context.WithValue(ctx, poolHandleKey, ph) +} + +// newPoolSpaceInfo constructs a Go type from the underlying C type. +func newPoolSpaceInfo(dps *C.struct_daos_pool_space, mt C.uint) *daos.StorageUsageStats { + if dps == nil { + return nil + } + + return &daos.StorageUsageStats{ + Total: uint64(dps.ps_space.s_total[mt]), + Free: uint64(dps.ps_space.s_free[mt]), + Min: uint64(dps.ps_free_min[mt]), + Max: uint64(dps.ps_free_max[mt]), + Mean: uint64(dps.ps_free_mean[mt]), + MediaType: daos.StorageMediaType(mt), + } +} + +// newPoolRebuildStatus constructs a Go type from the underlying C type. +func newPoolRebuildStatus(drs *C.struct_daos_rebuild_status) *daos.PoolRebuildStatus { + if drs == nil { + return nil + } + + compatRebuildState := func() daos.PoolRebuildState { + switch { + case drs.rs_version == 0: + return daos.PoolRebuildStateIdle + case C.get_rebuild_state(drs) == C.DRS_COMPLETED: + return daos.PoolRebuildStateDone + default: + return daos.PoolRebuildStateBusy + } + } + + return &daos.PoolRebuildStatus{ + Status: int32(drs.rs_errno), + Objects: uint64(drs.rs_obj_nr), + Records: uint64(drs.rs_rec_nr), + State: compatRebuildState(), + } +} + +// newPoolInfo constructs a Go type from the underlying C type. +func newPoolInfo(cpi *C.daos_pool_info_t) *daos.PoolInfo { + if cpi == nil { + return nil + } + + poolInfo := new(daos.PoolInfo) + + poolInfo.QueryMask = daos.PoolQueryMask(cpi.pi_bits) + poolInfo.UUID = uuid.Must(uuidFromC(cpi.pi_uuid)) + poolInfo.TotalTargets = uint32(cpi.pi_ntargets) + poolInfo.DisabledTargets = uint32(cpi.pi_ndisabled) + poolInfo.ActiveTargets = uint32(cpi.pi_space.ps_ntargets) + poolInfo.TotalEngines = uint32(cpi.pi_nnodes) + poolInfo.ServiceLeader = uint32(cpi.pi_leader) + poolInfo.Version = uint32(cpi.pi_map_ver) + poolInfo.State = daos.PoolServiceStateReady + if poolInfo.DisabledTargets > 0 { + poolInfo.State = daos.PoolServiceStateDegraded + } + + poolInfo.Rebuild = newPoolRebuildStatus(&cpi.pi_rebuild_st) + if poolInfo.QueryMask.HasOption(daos.PoolQueryOptionSpace) { + poolInfo.TierStats = []*daos.StorageUsageStats{ + newPoolSpaceInfo(&cpi.pi_space, C.DAOS_MEDIA_SCM), + newPoolSpaceInfo(&cpi.pi_space, C.DAOS_MEDIA_NVME), + } + } + + return poolInfo +} + +func poolInfoFromProps(pi *daos.PoolInfo, propEntries []C.struct_daos_prop_entry) { + if pi == nil || len(propEntries) == 0 { + return + } + + for _, entry := range propEntries { + switch entry.dpe_type { + case C.DAOS_PROP_PO_LABEL: + pi.Label = C.GoString(C.get_dpe_str(&entry)) + case C.DAOS_PROP_PO_SVC_LIST: + rlPtr := C.get_dpe_val_ptr(&entry) + if rlPtr == nil { + return + } + rs, err := rankSetFromC((*C.d_rank_list_t)(rlPtr)) + if err != nil { + return + } + pi.ServiceReplicas = rs.Ranks() + } + } +} + +// Disconnect signals that the client no longer needs the DAOS pool +// connection and that it is safe to release resources allocated for +// the connection. +func (ph *PoolHandle) Disconnect(ctx context.Context) error { + if ph == nil { + return ErrInvalidPoolHandle + } + logging.FromContext(ctx).Debugf("PoolHandle.Disconnect(%s)", ph) + + if err := daosError(daos_pool_disconnect(ph.daosHandle)); err != nil { + return errors.Wrap(err, "failed to disconnect from pool") + } + ph.invalidate() + + return nil +} + +// UUID returns the DAOS pool's UUID. +func (ph *PoolHandle) UUID() uuid.UUID { + if ph == nil { + return uuid.Nil + } + return ph.connHandle.UUID +} + +type ( + // PoolConnectReq defines the parameters for a PoolConnect request. + PoolConnectReq struct { + SysName string + ID string + Flags daos.PoolConnectFlag + Query bool + } + + // PoolConnectResp contains the response to a PoolConnect request. + PoolConnectResp struct { + Connection *PoolHandle + Info *daos.PoolInfo + } +) + +// PoolConnect establishes a connection to the specified DAOS pool. +// NB: The caller is responsible for disconnecting from the pool when +// finished. +func PoolConnect(ctx context.Context, req PoolConnectReq) (*PoolConnectResp, error) { + if ctx == nil { + return nil, errNilCtx + } + logging.FromContext(ctx).Debugf("PoolConnect(%+v)", req) + + if _, err := phFromCtx(ctx); err == nil { + return nil, ErrContextHandleConflict + } + + if req.ID == "" { + return nil, errors.Wrap(daos.InvalidInput, "no pool ID provided") + } + if req.SysName == "" { + req.SysName = build.DefaultSystemName + } + if req.Flags == 0 { + req.Flags = daos.PoolConnectFlagReadOnly + } + + var dpi C.daos_pool_info_t + if req.Query { + dpi.pi_bits = C.ulong(daos.DefaultPoolQueryMask) + } + var poolConn PoolHandle + + cPoolID := C.CString(req.ID) + defer freeString(cPoolID) + cSys := C.CString(req.SysName) + defer freeString(cSys) + + if err := daosError(daos_pool_connect(cPoolID, cSys, C.uint(req.Flags), &poolConn.daosHandle, &dpi, nil)); err != nil { + return nil, errors.Wrap(err, "failed to connect to pool") + } + + poolInfo := newPoolInfo(&dpi) + poolConn.connHandle.UUID = poolInfo.UUID + if req.ID != poolInfo.UUID.String() { + poolInfo.Label = req.ID + } else { + // If the connection was made with a UUID, then we don't know the label without + // a query. This should be a rare scenario. If the request allows it, try a query. + poolInfo.Label = MissingPoolLabel + if req.Query { + qpi, err := poolConn.Query(ctx, daos.HealthOnlyPoolQueryMask) + if err != nil { + if dcErr := poolConn.Disconnect(ctx); dcErr != nil { + logging.FromContext(ctx).Error(dcErr.Error()) + } + return nil, errors.Wrap(err, "failed to query pool for label") + } + poolInfo.Label = qpi.Label + } + } + // Set the label on the connection for convenience. + poolConn.connHandle.Label = poolInfo.Label + + logging.FromContext(ctx).Debugf("Connected to Pool %s", &poolConn) + return &PoolConnectResp{ + Connection: &poolConn, + Info: poolInfo, + }, nil +} + +// getPoolConn retrieves the PoolHandle set in the context, if available, +// or tries to establish a new connection to the specified pool. +func getPoolConn(ctx context.Context, sysName, poolID string, flags daos.PoolConnectFlag) (*PoolHandle, func(), error) { + nulCleanup := func() {} + ph, err := phFromCtx(ctx) + if err == nil { + if poolID != "" { + return nil, nulCleanup, errors.Wrap(daos.InvalidInput, "PoolHandle found in context with non-empty poolID") + } + return ph, nulCleanup, nil + } + + resp, err := PoolConnect(ctx, PoolConnectReq{ + ID: poolID, + SysName: sysName, + Flags: flags, + Query: false, + }) + if err != nil { + return nil, nulCleanup, err + } + + cleanup := func() { + err := resp.Connection.Disconnect(ctx) + if err != nil { + logging.FromContext(ctx).Error(err.Error()) + } + } + return resp.Connection, cleanup, nil +} + +// Query is a convenience wrapper around the PoolQuery() function. +func (ph *PoolHandle) Query(ctx context.Context, mask daos.PoolQueryMask) (*daos.PoolInfo, error) { + if ph == nil { + return nil, ErrInvalidPoolHandle + } + return PoolQuery(ph.toCtx(ctx), "", "", mask) +} + +// PoolQuery retrieves information about the DAOS Pool, including health and rebuild status, +// storage usage, and other details. +func PoolQuery(ctx context.Context, sysName, poolID string, queryMask daos.PoolQueryMask) (*daos.PoolInfo, error) { + if queryMask == 0 { + queryMask = daos.DefaultPoolQueryMask + } + poolConn, disconnect, err := getPoolConn(ctx, sysName, poolID, daos.PoolConnectFlagReadOnly) + if err != nil { + return nil, err + } + defer disconnect() + logging.FromContext(ctx).Debugf("PoolQuery(%s:%s)", poolConn, queryMask) + + var enabledRanks *C.d_rank_list_t + var disabledRanks *C.d_rank_list_t + defer func() { + C.d_rank_list_free(enabledRanks) + C.d_rank_list_free(disabledRanks) + }() + + // Query for some additional information stored as properties. + queryProps := C.daos_prop_alloc(2) + if queryProps == nil { + return nil, errors.Wrap(daos.NoMemory, "failed to allocate property list") + } + propEntries := unsafe.Slice(queryProps.dpp_entries, queryProps.dpp_nr) + propEntries[0].dpe_type = C.DAOS_PROP_PO_LABEL + propEntries[1].dpe_type = C.DAOS_PROP_PO_SVC_LIST + defer func() { + C.daos_prop_free(queryProps) + }() + + var rc C.int + cPoolInfo := C.daos_pool_info_t{ + pi_bits: C.uint64_t(queryMask), + } + if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) && queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { + enaQm := queryMask + enaQm.ClearOptions(daos.PoolQueryOptionDisabledEngines) + cPoolInfo.pi_bits = C.uint64_t(enaQm) + rc = daos_pool_query(poolConn.daosHandle, &enabledRanks, &cPoolInfo, queryProps, nil) + if err := daosError(rc); err != nil { + return nil, errors.Wrap(err, "failed to query pool") + } + + /* second query to just get disabled ranks */ + rc = daos_pool_query(poolConn.daosHandle, &disabledRanks, nil, nil, nil) + } else if queryMask.HasOption(daos.PoolQueryOptionEnabledEngines) { + rc = daos_pool_query(poolConn.daosHandle, &enabledRanks, &cPoolInfo, queryProps, nil) + } else if queryMask.HasOption(daos.PoolQueryOptionDisabledEngines) { + rc = daos_pool_query(poolConn.daosHandle, &disabledRanks, &cPoolInfo, queryProps, nil) + } else { + rc = daos_pool_query(poolConn.daosHandle, nil, &cPoolInfo, queryProps, nil) + } + + if err := daosError(rc); err != nil { + return nil, errors.Wrap(err, "failed to query pool") + } + + poolInfo := newPoolInfo(&cPoolInfo) + poolInfo.QueryMask = queryMask + poolInfoFromProps(poolInfo, propEntries) + + if enabledRanks != nil { + poolInfo.EnabledRanks, err = rankSetFromC(enabledRanks) + if err != nil { + return nil, err + } + } + if disabledRanks != nil { + poolInfo.DisabledRanks, err = rankSetFromC(disabledRanks) + if err != nil { + return nil, err + } + } + + return poolInfo, nil +} + +func newPoolTargetInfo(ptinfo *C.daos_target_info_t) *daos.PoolQueryTargetInfo { + return &daos.PoolQueryTargetInfo{ + Type: daos.PoolQueryTargetType(ptinfo.ta_type), + State: daos.PoolQueryTargetState(ptinfo.ta_state), + Space: []*daos.StorageUsageStats{ + { + Total: uint64(ptinfo.ta_space.s_total[C.DAOS_MEDIA_SCM]), + Free: uint64(ptinfo.ta_space.s_free[C.DAOS_MEDIA_SCM]), + MediaType: C.DAOS_MEDIA_SCM, + }, + { + Total: uint64(ptinfo.ta_space.s_total[C.DAOS_MEDIA_NVME]), + Free: uint64(ptinfo.ta_space.s_free[C.DAOS_MEDIA_NVME]), + MediaType: C.DAOS_MEDIA_NVME, + }, + }, + } +} + +// QueryTargets is a convenience wrapper around the PoolQueryTargets() function. +func (ph *PoolHandle) QueryTargets(ctx context.Context, rank ranklist.Rank, targets *ranklist.RankSet) ([]*daos.PoolQueryTargetInfo, error) { + if ph == nil { + return nil, ErrInvalidPoolHandle + } + return PoolQueryTargets(ph.toCtx(ctx), "", "", rank, targets) +} + +// PoolQueryTargets retrieves information about storage targets in the DAOS Pool. +func PoolQueryTargets(ctx context.Context, sysName, poolID string, rank ranklist.Rank, reqTargets *ranklist.RankSet) ([]*daos.PoolQueryTargetInfo, error) { + targets := ranklist.NewRankSet() + targets.Replace(reqTargets) + + if targets.Count() == 0 { + pi, err := PoolQuery(ctx, sysName, poolID, daos.HealthOnlyPoolQueryMask) + if err != nil || (pi.TotalTargets == 0 || pi.TotalEngines == 0) { + if err != nil { + return nil, errors.Wrap(err, "pool query failed") + } + return nil, errors.New("failed to derive target count from pool query") + } + tgtCount := pi.TotalTargets / pi.TotalEngines + for i := uint32(0); i < tgtCount; i++ { + targets.Add(ranklist.Rank(i)) + } + } + poolConn, disconnect, err := getPoolConn(ctx, sysName, poolID, daos.PoolConnectFlagReadOnly) + if err != nil { + return nil, err + } + defer disconnect() + logging.FromContext(ctx).Debugf("PoolQueryTargets(%s:%d:[%s])", poolConn, rank, targets) + + ptInfo := C.daos_target_info_t{} + var rc C.int + + infos := make([]*daos.PoolQueryTargetInfo, 0, targets.Count()) + for _, tgt := range targets.Ranks() { + rc = daos_pool_query_target(poolConn.daosHandle, C.uint32_t(tgt), C.uint32_t(rank), &ptInfo, nil) + if err := daosError(rc); err != nil { + return nil, errors.Wrapf(err, "failed to query pool %s rank:target %d:%d", poolID, rank, tgt) + } + + infos = append(infos, newPoolTargetInfo(&ptInfo)) + } + + return infos, nil +} + +// ListAttributes is a convenience wrapper around the PoolListAttributes() function. +func (ph *PoolHandle) ListAttributes(ctx context.Context) ([]string, error) { + if ph == nil { + return nil, ErrInvalidPoolHandle + } + return PoolListAttributes(ph.toCtx(ctx), "", "") +} + +// PoolListAttributes returns a list of user-definable pool attribute names. +func PoolListAttributes(ctx context.Context, sysName, poolID string) ([]string, error) { + poolConn, disconnect, err := getPoolConn(ctx, sysName, poolID, daos.PoolConnectFlagReadOnly) + if err != nil { + return nil, err + } + defer disconnect() + logging.FromContext(ctx).Debugf("PoolListAttributes(%s)", poolConn) + + if err := ctx.Err(); err != nil { + return nil, ctxErr(err) + } + + return listDaosAttributes(poolConn.daosHandle, poolAttr) +} + +// GetAttributes is a convenience wrapper around the PoolGetAttributes() function. +func (ph *PoolHandle) GetAttributes(ctx context.Context, attrNames ...string) (daos.AttributeList, error) { + if ph == nil { + return nil, ErrInvalidPoolHandle + } + return PoolGetAttributes(ph.toCtx(ctx), "", "", attrNames...) +} + +// PoolGetAttributes fetches the specified pool attributes. If no +// attribute names are provided, all attributes are fetched. +func PoolGetAttributes(ctx context.Context, sysName, poolID string, names ...string) (daos.AttributeList, error) { + poolConn, disconnect, err := getPoolConn(ctx, sysName, poolID, daos.PoolConnectFlagReadOnly) + if err != nil { + return nil, err + } + defer disconnect() + logging.FromContext(ctx).Debugf("PoolGetAttributes(%s:%v)", poolConn, names) + + if err := ctx.Err(); err != nil { + return nil, ctxErr(err) + } + + return getDaosAttributes(poolConn.daosHandle, poolAttr, names) +} + +// SetAttributes is a convenience wrapper around the PoolSetAttributes() function. +func (ph *PoolHandle) SetAttributes(ctx context.Context, attrs ...*daos.Attribute) error { + if ph == nil { + return ErrInvalidPoolHandle + } + return PoolSetAttributes(ph.toCtx(ctx), "", "", attrs...) +} + +// PoolSetAttributes sets the specified pool attributes. +func PoolSetAttributes(ctx context.Context, sysName, poolID string, attrs ...*daos.Attribute) error { + poolConn, disconnect, err := getPoolConn(ctx, sysName, poolID, daos.PoolConnectFlagReadOnly) + if err != nil { + return err + } + defer disconnect() + logging.FromContext(ctx).Debugf("PoolSetAttributes(%s:%v)", poolConn, attrs) + + if err := ctx.Err(); err != nil { + return ctxErr(err) + } + + return setDaosAttributes(poolConn.daosHandle, poolAttr, attrs) +} + +// DeleteAttributes is a convenience wrapper around the PoolDeleteAttributes() function. +func (ph *PoolHandle) DeleteAttributes(ctx context.Context, attrNames ...string) error { + if ph == nil { + return ErrInvalidPoolHandle + } + return PoolDeleteAttributes(ph.toCtx(ctx), "", "", attrNames...) +} + +// PoolDeleteAttributes deletes the specified pool attributes. +func PoolDeleteAttributes(ctx context.Context, sysName, poolID string, attrNames ...string) error { + poolConn, disconnect, err := getPoolConn(ctx, sysName, poolID, daos.PoolConnectFlagReadOnly) + if err != nil { + return err + } + defer disconnect() + logging.FromContext(ctx).Debugf("PoolDeleteAttributes(%s:%+v)", poolConn, attrNames) + + if err := ctx.Err(); err != nil { + return ctxErr(err) + } + + return delDaosAttributes(poolConn.daosHandle, poolAttr, attrNames) +} + +type ( + // GetPoolListReq defines the parameters for a GetPoolList request. + GetPoolListReq struct { + SysName string + Query bool + } +) + +// GetPoolList returns a list of DAOS pools in the system. +func GetPoolList(ctx context.Context, req GetPoolListReq) ([]*daos.PoolInfo, error) { + if ctx == nil { + return nil, errNilCtx + } + + log := logging.FromContext(ctx) + log.Debugf("GetPoolList(%+v)", req) + + if req.SysName == "" { + req.SysName = build.DefaultSystemName + } + cSysName := C.CString(req.SysName) + defer freeString(cSysName) + + var cPools []C.daos_mgmt_pool_info_t + for { + var rc C.int + var poolCount C.size_t + + // First, fetch the total number of pools in the system. + // We may not have access to all of them, so this is an upper bound. + rc = daos_mgmt_list_pools(cSysName, &poolCount, nil, nil) + if err := daosError(rc); err != nil { + return nil, errors.Wrap(err, "failed to list pools") + } + log.Debugf("pools in system: %d", poolCount) + + if poolCount < 1 { + return nil, nil + } + + // Now, fetch the pools into a buffer sized for the number of pools found. + cPools = make([]C.daos_mgmt_pool_info_t, poolCount) + rc = daos_mgmt_list_pools(cSysName, &poolCount, &cPools[0], nil) + err := daosError(rc) + if err == nil { + cPools = cPools[:poolCount] // adjust the slice to the number of pools retrieved + log.Debugf("fetched %d pools", len(cPools)) + break + } + if err == daos.StructTooSmall { + log.Notice("server-side pool list changed; re-fetching") + continue + } + log.Errorf("failed to fetch pool list: %s", err) + return nil, errors.Wrap(err, "failed to list pools") + } + + pools := make([]*daos.PoolInfo, 0, len(cPools)) + for i := 0; i < len(cPools); i++ { + cPool := &cPools[i] + + svcLdr := uint32(cPool.mgpi_ldr) + svcRanks, err := rankSetFromC(cPool.mgpi_svc) + if err != nil { + return nil, err + } + defer func() { + C.d_rank_list_free(cPool.mgpi_svc) + }() + poolUUID, err := uuidFromC(cPool.mgpi_uuid) + if err != nil { + return nil, err + } + poolLabel := C.GoString(cPool.mgpi_label) + + var pool *daos.PoolInfo + if req.Query { + pcResp, err := PoolConnect(ctx, PoolConnectReq{ + ID: poolUUID.String(), + SysName: req.SysName, + Flags: daos.PoolConnectFlagReadOnly, + Query: true, + }) + if err != nil { + log.Errorf("failed to connect to pool %q: %s", poolLabel, err) + continue + } + if err := pcResp.Connection.Disconnect(ctx); err != nil { + log.Errorf("failed to disconnect from pool %q: %s", poolLabel, err) + } + pool = pcResp.Info + + // Add a few missing pieces that the query doesn't fill in. + pool.Label = poolLabel + pool.ServiceLeader = svcLdr + pool.ServiceReplicas = svcRanks.Ranks() + } else { + // Just populate the basic info. + pool = &daos.PoolInfo{ + UUID: poolUUID, + Label: poolLabel, + ServiceLeader: svcLdr, + ServiceReplicas: svcRanks.Ranks(), + State: daos.PoolServiceStateReady, + } + } + + pools = append(pools, pool) + } + + log.Debugf("fetched %d/%d pools", len(pools), len(cPools)) + return pools, nil +} diff --git a/src/control/lib/daos/api/pool_test.go b/src/control/lib/daos/api/pool_test.go new file mode 100644 index 00000000000..448d90b6da2 --- /dev/null +++ b/src/control/lib/daos/api/pool_test.go @@ -0,0 +1,1076 @@ +// +// (C) Copyright 2025 Google LLC +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package api + +import ( + "context" + "fmt" + "reflect" + "sort" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/build" + "github.com/daos-stack/daos/src/control/common/test" + "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/lib/ranklist" + "github.com/daos-stack/daos/src/control/logging" +) + +var ( + testPoolName = "test-pool" +) + +func TestAPI_PoolConnect(t *testing.T) { + defaultReq := PoolConnectReq{ + ID: daos_default_PoolInfo.Label, + SysName: build.DefaultSystemName, + Flags: daos.PoolConnectFlagReadWrite, + Query: true, + } + + for name, tc := range map[string]struct { + setup func(t *testing.T) + ctx context.Context + connReq PoolConnectReq + checkParams func(t *testing.T) + expResp *PoolConnectResp + expErr error + }{ + "nil context": { + connReq: defaultReq, + expErr: errNilCtx, + }, + "no poolID in req": { + ctx: test.Context(t), + connReq: PoolConnectReq{ + SysName: defaultReq.SysName, + Flags: defaultReq.Flags, + Query: defaultReq.Query, + }, + expErr: errors.Wrap(daos.InvalidInput, "no pool ID provided"), + }, + "context already has a connection for a pool": { + ctx: func() context.Context { + otherPoolHdl := &PoolHandle{ + connHandle: connHandle{ + UUID: test.MockPoolUUID(99), + Label: "not-the-pool-you're-looking-for", + }, + } + return otherPoolHdl.toCtx(test.Context(t)) + }(), + connReq: defaultReq, + checkParams: func(t *testing.T) { + test.CmpAny(t, "pool connect count", 0, daos_pool_connect_Count) + }, + expErr: ErrContextHandleConflict, + }, + "daos_pool_connect() fails": { + setup: func(t *testing.T) { + daos_pool_connect_RC = -_Ctype_int(daos.IOError) + }, + ctx: test.Context(t), + connReq: defaultReq, + expErr: errors.Wrap(daos.IOError, "failed to connect to pool"), + }, + "daos_pool_connect() succeeds": { + ctx: test.Context(t), + connReq: defaultReq, + checkParams: func(t *testing.T) { + test.CmpAny(t, "poolID", defaultReq.ID, daos_pool_connect_SetPoolID) + test.CmpAny(t, "sysName", defaultReq.SysName, daos_pool_connect_SetSys) + test.CmpAny(t, "flags", defaultReq.Flags, daos_pool_connect_SetFlags) + test.CmpAny(t, "query", daos.DefaultPoolQueryMask, daos_pool_connect_QueryMask) + }, + expResp: &PoolConnectResp{ + Connection: &PoolHandle{ + connHandle: connHandle{ + Label: daos_default_PoolInfo.Label, + UUID: daos_default_PoolInfo.UUID, + daosHandle: daos_default_pool_connect_Handle, + }, + }, + Info: defaultPoolInfo(), + }, + }, + "Connect with UUID and query enabled": { + ctx: test.Context(t), + connReq: PoolConnectReq{ + ID: daos_default_PoolInfo.UUID.String(), + SysName: defaultReq.SysName, + Flags: defaultReq.Flags, + Query: true, + }, + expResp: &PoolConnectResp{ + Connection: &PoolHandle{ + connHandle: connHandle{ + Label: daos_default_PoolInfo.Label, + UUID: daos_default_PoolInfo.UUID, + daosHandle: daos_default_pool_connect_Handle, + }, + }, + Info: defaultPoolInfo(), + }, + }, + "Connect with UUID and query enabled -- query fails": { + setup: func(t *testing.T) { + daos_pool_query_RC = -_Ctype_int(daos.IOError) + }, + ctx: test.Context(t), + connReq: PoolConnectReq{ + ID: daos_default_PoolInfo.UUID.String(), + SysName: defaultReq.SysName, + Flags: defaultReq.Flags, + Query: true, + }, + checkParams: func(t *testing.T) { + test.CmpAny(t, "pool disconnect count", 1, daos_pool_disconnect_Count) + }, + expErr: daos.IOError, + }, + "Connect with UUID and query disabled": { + ctx: test.Context(t), + connReq: PoolConnectReq{ + ID: daos_default_PoolInfo.UUID.String(), + SysName: defaultReq.SysName, + Flags: defaultReq.Flags, + Query: false, + }, + expResp: &PoolConnectResp{ + Connection: &PoolHandle{ + connHandle: connHandle{ + Label: MissingPoolLabel, + UUID: daos_default_PoolInfo.UUID, + daosHandle: daos_default_pool_connect_Handle, + }, + }, + Info: func() *daos.PoolInfo { + out := defaultPoolInfo() + out.Label = MissingPoolLabel + return out + }(), + }, + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + log, buf := logging.NewTestLogger(name) + defer test.ShowBufferOnFailure(t, buf) + + if tc.checkParams != nil { + defer tc.checkParams(t) + } + + gotResp, gotErr := PoolConnect(mustLogCtx(tc.ctx, log), tc.connReq) + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr != nil { + return + } + + cmpOpts := cmp.Options{ + cmp.Comparer(func(a, b *PoolHandle) bool { + return a != nil && b != nil && a.String() == b.String() + }), + // These fields aren't populated in the PoolConnect() query. + cmpopts.IgnoreFields(daos.PoolInfo{}, + "EnabledRanks", "DisabledRanks", "DeadRanks", "ServiceReplicas", + ), + } + test.CmpAny(t, "PoolConnectResp", tc.expResp, gotResp, cmpOpts...) + }) + } +} + +var ( + testCtxPoolHandle = &PoolHandle{ + connHandle: connHandle{ + UUID: test.MockPoolUUID(43), + Label: "test-ctx-pool", + }, + } + + testConnPoolHandle = &PoolHandle{ + connHandle: connHandle{ + daosHandle: daos_default_pool_connect_Handle, + UUID: daos_default_PoolInfo.UUID, + Label: daos_default_PoolInfo.Label, + }, + } +) + +func TestAPI_getPoolConn(t *testing.T) { + for name, tc := range map[string]struct { + setup func(t *testing.T) + ctx context.Context + poolID string + flags daos.PoolConnectFlag + checkParams func(t *testing.T) + expHdl *PoolHandle + expErr error + }{ + "pool handle in context with non-empty ID": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + poolID: testPoolName, + expErr: errors.New("PoolHandle found in context with non-empty poolID"), + }, + "pool handle in context": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + expHdl: testCtxPoolHandle, + }, + "pool handle not in context, no poolID": { + ctx: test.Context(t), + expErr: errors.Wrap(daos.InvalidInput, "no pool ID provided"), + }, + "pool handle from Connect()": { + ctx: test.Context(t), + poolID: daos_default_PoolInfo.Label, + checkParams: func(t *testing.T) { + test.CmpAny(t, "poolID", daos_default_PoolInfo.Label, daos_pool_connect_SetPoolID) + test.CmpAny(t, "sysName", build.DefaultSystemName, daos_pool_connect_SetSys) + test.CmpAny(t, "flags", daos.PoolConnectFlagReadOnly, daos_pool_connect_SetFlags) + test.CmpAny(t, "query", daos.PoolQueryMask(0), daos_pool_connect_QueryMask) + }, + expHdl: testConnPoolHandle, + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + log, buf := logging.NewTestLogger(name) + defer test.ShowBufferOnFailure(t, buf) + + ctx := tc.ctx + if ctx == nil { + ctx = test.Context(t) + } + + if tc.checkParams != nil { + defer tc.checkParams(t) + } + + ph, cleanup, gotErr := getPoolConn(mustLogCtx(ctx, log), "", tc.poolID, tc.flags) + test.CmpErr(t, tc.expErr, gotErr) + if tc.expErr != nil { + return + } + t.Cleanup(cleanup) + + cmpOpts := cmp.Options{ + cmp.Comparer(func(a, b *PoolHandle) bool { + return a != nil && b != nil && a.String() == b.String() + }), + } + test.CmpAny(t, "PoolHandle", tc.expHdl, ph, cmpOpts...) + }) + } +} + +func TestAPI_PoolQuery(t *testing.T) { + for name, tc := range map[string]struct { + setup func(t *testing.T) + ctx context.Context + poolID string + queryMask daos.PoolQueryMask + checkParams func(t *testing.T) + expResp *daos.PoolInfo + expErr error + }{ + "nil context": { + expErr: errNilCtx, + }, + "pool handle in context with non-empty ID": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + poolID: testPoolName, + expErr: errors.New("PoolHandle found in context with non-empty poolID"), + }, + "pool handle not in context, no poolID": { + ctx: test.Context(t), + expErr: errors.Wrap(daos.InvalidInput, "no pool ID provided"), + }, + "daos_pool_query() fails": { + setup: func(t *testing.T) { + daos_pool_query_RC = -_Ctype_int(daos.IOError) + }, + ctx: test.Context(t), + poolID: daos_default_PoolInfo.Label, + expErr: errors.Wrap(daos.IOError, "failed to query pool"), + }, + "daos_pool_query() fails on enabled ranks": { + setup: func(t *testing.T) { + daos_pool_query_RC = -_Ctype_int(daos.IOError) + }, + ctx: test.Context(t), + poolID: daos_default_PoolInfo.Label, + queryMask: daos.MustNewPoolQueryMask(daos.PoolQueryOptionEnabledEngines, daos.PoolQueryOptionDisabledEngines), + expErr: errors.Wrap(daos.IOError, "failed to query pool"), + }, + "unspecified query mask": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + expResp: func() *daos.PoolInfo { + out := defaultPoolInfo() + out.QueryMask = daos.DefaultPoolQueryMask + out.EnabledRanks = nil + return out + }(), + }, + "default query mask": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + queryMask: daos.DefaultPoolQueryMask, + expResp: func() *daos.PoolInfo { + out := defaultPoolInfo() + out.QueryMask = daos.DefaultPoolQueryMask + out.EnabledRanks = nil + return out + }(), + }, + "health-only query mask": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + queryMask: daos.HealthOnlyPoolQueryMask, + expResp: func() *daos.PoolInfo { + out := defaultPoolInfo() + out.QueryMask = daos.HealthOnlyPoolQueryMask + out.EnabledRanks = nil + out.TierStats = nil + return out + }(), + }, + "enabled ranks": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + queryMask: daos.MustNewPoolQueryMask(daos.PoolQueryOptionEnabledEngines), + expResp: func() *daos.PoolInfo { + out := defaultPoolInfo() + out.QueryMask = daos.MustNewPoolQueryMask(daos.PoolQueryOptionEnabledEngines) + out.DisabledRanks = nil + out.TierStats = nil + return out + }(), + }, + "enabled & disabled ranks": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + queryMask: daos.MustNewPoolQueryMask(daos.PoolQueryOptionEnabledEngines, daos.PoolQueryOptionDisabledEngines), + expResp: func() *daos.PoolInfo { + out := defaultPoolInfo() + out.QueryMask = daos.MustNewPoolQueryMask(daos.PoolQueryOptionEnabledEngines, daos.PoolQueryOptionDisabledEngines) + out.TierStats = nil + return out + }(), + }, + "space-only": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + queryMask: daos.MustNewPoolQueryMask(daos.PoolQueryOptionSpace), + expResp: func() *daos.PoolInfo { + out := defaultPoolInfo() + out.QueryMask = daos.MustNewPoolQueryMask(daos.PoolQueryOptionSpace) + out.EnabledRanks = nil + out.DisabledRanks = nil + return out + }(), + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + log, buf := logging.NewTestLogger(name) + defer test.ShowBufferOnFailure(t, buf) + + if tc.checkParams != nil { + defer tc.checkParams(t) + } + + gotResp, err := PoolQuery(mustLogCtx(tc.ctx, log), "", tc.poolID, tc.queryMask) + test.CmpErr(t, tc.expErr, err) + if tc.expErr != nil { + return + } + + cmpOpts := cmp.Options{ + cmp.Comparer(func(a, b ranklist.RankSet) bool { + return a.String() == b.String() + }), + } + test.CmpAny(t, "PoolQuery() PoolInfo", tc.expResp, gotResp, cmpOpts...) + }) + } +} + +func TestAPI_PoolQueryTargets(t *testing.T) { + allTgtCt := daos_default_PoolInfo.TotalTargets / daos_default_PoolInfo.TotalEngines + + for name, tc := range map[string]struct { + setup func(t *testing.T) + ctx context.Context + poolID string + rank ranklist.Rank + targets *ranklist.RankSet + checkParams func(t *testing.T) + expResp []*daos.PoolQueryTargetInfo + expErr error + }{ + "nil context": { + expErr: errNilCtx, + }, + "pool handle in context with non-empty ID": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + poolID: testPoolName, + expErr: errors.New("PoolHandle found in context with non-empty poolID"), + }, + "pool handle not in context, no poolID": { + ctx: test.Context(t), + expErr: errors.Wrap(daos.InvalidInput, "no pool ID provided"), + }, + "daos_pool_query() fails": { + setup: func(t *testing.T) { + daos_pool_query_RC = -_Ctype_int(daos.IOError) + }, + ctx: test.Context(t), + poolID: daos_default_PoolInfo.Label, + expErr: errors.Wrap(daos.IOError, "failed to query pool"), + }, + "daos_pool_query_target() fails": { + setup: func(t *testing.T) { + daos_pool_query_target_RC = -_Ctype_int(daos.IOError) + }, + ctx: test.Context(t), + poolID: daos_default_PoolInfo.Label, + expErr: daos.IOError, + }, + "pool query returns zero targets": { + setup: func(t *testing.T) { + daos_pool_query_PoolInfo = defaultPoolInfo() + daos_pool_query_PoolInfo.TotalTargets = 0 + }, + ctx: test.Context(t), + poolID: daos_default_PoolInfo.Label, + expErr: errors.New("failed to derive target count"), + }, + "pool query returns zero engines": { + setup: func(t *testing.T) { + daos_pool_query_PoolInfo = defaultPoolInfo() + daos_pool_query_PoolInfo.TotalEngines = 0 + }, + ctx: test.Context(t), + poolID: daos_default_PoolInfo.Label, + expErr: errors.New("failed to derive target count"), + }, + "nil target set gets all": { + ctx: test.Context(t), + poolID: daos_default_PoolInfo.Label, + rank: 1, + targets: nil, + checkParams: func(t *testing.T) { + test.CmpAny(t, "rank", _Ctype_uint32_t(1), daos_pool_query_target_SetRank) + test.CmpAny(t, "last target", _Ctype_uint32_t(allTgtCt-1), daos_pool_query_target_SetTgt) + }, + expResp: func() []*daos.PoolQueryTargetInfo { + infos := make([]*daos.PoolQueryTargetInfo, allTgtCt) + for i := range infos { + infos[i] = &daos_default_PoolQueryTargetInfo + } + return infos + }(), + }, + "empty target set gets all": { + ctx: test.Context(t), + poolID: daos_default_PoolInfo.Label, + rank: 1, + targets: ranklist.NewRankSet(), + checkParams: func(t *testing.T) { + test.CmpAny(t, "rank", _Ctype_uint32_t(1), daos_pool_query_target_SetRank) + test.CmpAny(t, "last target", _Ctype_uint32_t(allTgtCt-1), daos_pool_query_target_SetTgt) + }, + expResp: func() []*daos.PoolQueryTargetInfo { + infos := make([]*daos.PoolQueryTargetInfo, allTgtCt) + for i := range infos { + infos[i] = &daos_default_PoolQueryTargetInfo + } + return infos + }(), + }, + "specified target should not query pool for target list": { + setup: func(t *testing.T) { + daos_pool_query_RC = -_Ctype_int(daos.IOError) // fail if the pool is queried + }, + ctx: test.Context(t), + poolID: daos_default_PoolInfo.Label, + rank: 1, + targets: ranklist.MustCreateRankSet("1"), + checkParams: func(t *testing.T) { + test.CmpAny(t, "rank", _Ctype_uint32_t(1), daos_pool_query_target_SetRank) + test.CmpAny(t, "last target", _Ctype_uint32_t(1), daos_pool_query_target_SetTgt) + }, + expResp: func() []*daos.PoolQueryTargetInfo { + infos := make([]*daos.PoolQueryTargetInfo, 1) + for i := range infos { + infos[i] = &daos_default_PoolQueryTargetInfo + } + return infos + }(), + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + log, buf := logging.NewTestLogger(name) + defer test.ShowBufferOnFailure(t, buf) + + if tc.checkParams != nil { + defer tc.checkParams(t) + } + + gotResp, err := PoolQueryTargets(mustLogCtx(tc.ctx, log), "", tc.poolID, tc.rank, tc.targets) + test.CmpErr(t, tc.expErr, err) + if tc.expErr != nil { + return + } + + cmpOpts := cmp.Options{ + cmp.Comparer(func(a, b ranklist.RankSet) bool { + return a.String() == b.String() + }), + } + test.CmpAny(t, "PoolQueryTargets() response", tc.expResp, gotResp, cmpOpts...) + }) + } +} + +func TestAPI_PoolListAttributes(t *testing.T) { + for name, tc := range map[string]struct { + setup func(t *testing.T) + ctx context.Context + poolID string + expNames []string + expErr error + }{ + "nil context": { + expErr: errNilCtx, + }, + "pool handle in context with non-empty ID": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + poolID: testPoolName, + expErr: errors.New("PoolHandle found in context with non-empty poolID"), + }, + "pool handle not in context, no poolID": { + ctx: test.Context(t), + expErr: errors.Wrap(daos.InvalidInput, "no pool ID provided"), + }, + "daos_pool_list_attr() fails (get buf size)": { + setup: func(t *testing.T) { + daos_pool_list_attr_RC = -_Ctype_int(daos.IOError) + }, + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + expErr: errors.Wrap(daos.IOError, "failed to list pool attributes"), + }, + "daos_pool_list_attr() fails (fetch names)": { + setup: func(t *testing.T) { + daos_pool_list_attr_RCList = []_Ctype_int{ + 0, + -_Ctype_int(daos.IOError), + } + }, + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + expErr: errors.Wrap(daos.IOError, "failed to list pool attributes"), + }, + "no attributes set": { + setup: func(t *testing.T) { + daos_pool_list_attr_AttrList = nil + }, + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + }, + "success": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + expNames: []string{ + daos_default_AttrList[0].Name, + daos_default_AttrList[1].Name, + daos_default_AttrList[2].Name, + }, + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + log, buf := logging.NewTestLogger(name) + defer test.ShowBufferOnFailure(t, buf) + + gotNames, err := PoolListAttributes(mustLogCtx(tc.ctx, log), "", tc.poolID) + test.CmpErr(t, tc.expErr, err) + if tc.expErr != nil { + return + } + + test.CmpAny(t, "PoolListAttributes()", tc.expNames, gotNames) + }) + } +} + +func TestAPI_PoolGetAttributes(t *testing.T) { + for name, tc := range map[string]struct { + setup func(t *testing.T) + ctx context.Context + poolID string + attrNames []string + checkParams func(t *testing.T) + expAttrs daos.AttributeList + expErr error + }{ + "nil context": { + expErr: errNilCtx, + }, + "pool handle in context with non-empty ID": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + poolID: testPoolName, + expErr: errors.New("PoolHandle found in context with non-empty poolID"), + }, + "pool handle not in context, no poolID": { + ctx: test.Context(t), + expErr: errors.Wrap(daos.InvalidInput, "no pool ID provided"), + }, + "daos_pool_list_attr() fails": { + setup: func(t *testing.T) { + daos_pool_list_attr_RC = -_Ctype_int(daos.IOError) + }, + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + expErr: errors.Wrap(daos.IOError, "failed to list pool attributes"), + }, + "daos_pool_get_attr() fails (sizes)": { + setup: func(t *testing.T) { + daos_pool_get_attr_RC = -_Ctype_int(daos.IOError) + }, + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + expErr: errors.Wrap(daos.IOError, "failed to get pool attribute sizes"), + }, + "daos_pool_get_attr() fails (values)": { + setup: func(t *testing.T) { + daos_pool_get_attr_RCList = []_Ctype_int{ + 0, + -_Ctype_int(daos.IOError), + } + }, + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + expErr: errors.Wrap(daos.IOError, "failed to get pool attribute values"), + }, + "empty requested attribute name": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + attrNames: test.JoinArgs(nil, "a", ""), + expErr: errors.Errorf("empty pool attribute name at index 1"), + }, + "no attributes set; attributes requested": { + setup: func(t *testing.T) { + daos_pool_get_attr_AttrList = nil + }, + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + attrNames: test.JoinArgs(nil, "foo"), + checkParams: func(t *testing.T) { + test.CmpAny(t, "req attr names", map[string]struct{}{"foo": {}}, daos_pool_get_attr_ReqNames) + }, + expErr: errors.Wrap(daos.Nonexistent, "failed to get pool attribute sizes"), + }, + "unknown attribute requested": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + attrNames: test.JoinArgs(nil, "foo"), + checkParams: func(t *testing.T) { + test.CmpAny(t, "req attr names", map[string]struct{}{"foo": {}}, daos_pool_get_attr_ReqNames) + }, + expErr: errors.Wrap(daos.Nonexistent, "failed to get pool attribute sizes"), + }, + "no attributes set; no attributes requested": { + setup: func(t *testing.T) { + daos_pool_list_attr_AttrList = nil + }, + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + }, + "success; all attributes": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + expAttrs: daos_default_AttrList, + }, + "success; requested attributes": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + attrNames: test.JoinArgs(nil, daos_default_AttrList[0].Name, daos_default_AttrList[2].Name), + checkParams: func(t *testing.T) { + reqNames := test.JoinArgs(nil, daos_default_AttrList[0].Name, daos_default_AttrList[2].Name) + sort.Strings(reqNames) + gotNames := daos_test_get_mappedNames(daos_pool_get_attr_ReqNames) + sort.Strings(gotNames) + test.CmpAny(t, "req attr names", reqNames, gotNames) + }, + expAttrs: daos.AttributeList{ + daos_default_AttrList[0], + daos_default_AttrList[2], + }, + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + log, buf := logging.NewTestLogger(name) + defer test.ShowBufferOnFailure(t, buf) + + if tc.checkParams != nil { + defer tc.checkParams(t) + } + + gotAttrs, err := PoolGetAttributes(mustLogCtx(tc.ctx, log), "", tc.poolID, tc.attrNames...) + test.CmpErr(t, tc.expErr, err) + if tc.expErr != nil { + return + } + + test.CmpAny(t, "PoolGetAttributes() daos.AttributeList", tc.expAttrs, gotAttrs) + }) + } +} + +func TestAPI_PoolSetAttributes(t *testing.T) { + for name, tc := range map[string]struct { + setup func(t *testing.T) + ctx context.Context + poolID string + toSet daos.AttributeList + expErr error + }{ + "nil context": { + expErr: errNilCtx, + }, + "pool handle in context with non-empty ID": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + poolID: testPoolName, + expErr: errors.New("PoolHandle found in context with non-empty poolID"), + }, + "pool handle not in context, no poolID": { + ctx: test.Context(t), + expErr: errors.Wrap(daos.InvalidInput, "no pool ID provided"), + }, + "no attributes to set": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + expErr: errors.Wrap(daos.InvalidInput, "no pool attributes provided"), + }, + "nil toSet attribute": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + toSet: append(daos_default_AttrList, nil), + expErr: errors.Wrap(daos.InvalidInput, "nil pool attribute at index 3"), + }, + "toSet attribute with empty name": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + toSet: append(daos_default_AttrList, &daos.Attribute{Name: ""}), + expErr: errors.Wrap(daos.InvalidInput, "empty pool attribute name at index 3"), + }, + "toSet attribute with empty value": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + toSet: append(daos_default_AttrList, &daos.Attribute{Name: "empty"}), + expErr: errors.Wrap(daos.InvalidInput, "empty pool attribute value at index 3"), + }, + "daos_pool_set_attr() fails": { + setup: func(t *testing.T) { + daos_pool_set_attr_RC = -_Ctype_int(daos.IOError) + }, + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + toSet: daos_default_AttrList, + expErr: errors.Wrap(daos.IOError, "failed to set pool attributes"), + }, + "success": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + toSet: daos_default_AttrList, + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + log, buf := logging.NewTestLogger(name) + defer test.ShowBufferOnFailure(t, buf) + + err := PoolSetAttributes(mustLogCtx(tc.ctx, log), "", tc.poolID, tc.toSet...) + test.CmpErr(t, tc.expErr, err) + if tc.expErr != nil { + return + } + + test.CmpAny(t, "PoolSetAttributes() daos.AttributeList", tc.toSet, daos_pool_set_attr_AttrList) + }) + } +} + +func TestAPI_PoolDeleteAttributes(t *testing.T) { + for name, tc := range map[string]struct { + setup func(t *testing.T) + ctx context.Context + poolID string + toDelete []string + expErr error + }{ + "nil context": { + expErr: errNilCtx, + }, + "pool handle in context with non-empty ID": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + poolID: testPoolName, + expErr: errors.New("PoolHandle found in context with non-empty poolID"), + }, + "pool handle not in context, no poolID": { + ctx: test.Context(t), + expErr: errors.Wrap(daos.InvalidInput, "no pool ID provided"), + }, + "no attributes to delete": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + expErr: errors.Wrap(daos.InvalidInput, "no pool attribute names provided"), + }, + "empty name in toDelete list": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + toDelete: test.JoinArgs(nil, "foo", "", "bar"), + expErr: errors.Wrap(daos.InvalidInput, "empty pool attribute name at index 1"), + }, + "daos_pool_del_attr() fails": { + setup: func(t *testing.T) { + daos_pool_del_attr_RC = -_Ctype_int(daos.IOError) + }, + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + toDelete: test.JoinArgs(nil, daos_default_AttrList[0].Name), + expErr: errors.Wrap(daos.IOError, "failed to delete pool attributes"), + }, + "success": { + ctx: testCtxPoolHandle.toCtx(test.Context(t)), + toDelete: test.JoinArgs(nil, daos_default_AttrList[0].Name), + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + log, buf := logging.NewTestLogger(name) + defer test.ShowBufferOnFailure(t, buf) + + err := PoolDeleteAttributes(mustLogCtx(tc.ctx, log), "", tc.poolID, tc.toDelete...) + test.CmpErr(t, tc.expErr, err) + if tc.expErr != nil { + return + } + + test.CmpAny(t, "PoolDeleteAttributes() AttrNames", tc.toDelete, daos_pool_del_attr_AttrNames) + }) + } +} + +func TestAPI_PoolHandleMethods(t *testing.T) { + testHandle := &PoolHandle{} + + thType := reflect.TypeOf(testHandle) + for i := 0; i < thType.NumMethod(); i++ { + method := thType.Method(i) + methArgs := make([]reflect.Value, 0) + var expResults int + + switch method.Name { + case "Disconnect": + expResults = 1 + case "Query": + methArgs = append(methArgs, reflect.ValueOf(daos.DefaultPoolQueryMask)) + expResults = 2 + case "QueryTargets": + methArgs = append(methArgs, reflect.ValueOf(ranklist.Rank(1)), reflect.ValueOf((*ranklist.RankSet)(nil))) + expResults = 2 + case "ListAttributes": + expResults = 2 + case "GetAttributes": + methArgs = append(methArgs, reflect.ValueOf(daos_default_AttrList[0].Name)) + expResults = 2 + case "SetAttributes": + methArgs = append(methArgs, reflect.ValueOf(daos_default_AttrList[0])) + expResults = 1 + case "DeleteAttributes": + methArgs = append(methArgs, reflect.ValueOf(daos_default_AttrList[0].Name)) + expResults = 1 + case "FillHandle", "IsValid", "String", "UUID", "ID": + // No tests for these. The main point of this suite is to ensure that the + // convenience wrappers handle inputs as expected. + continue + default: + // If you're here, you need to add a case to test your new method. + t.Fatalf("unhandled method %q", method.Name) + } + + // Not intended to be exhaustive; just verify that they accept the parameters + // we expect and return something sensible for errors. + for name, tc := range map[string]struct { + setup func(t *testing.T) + th *PoolHandle + expErr error + }{ + fmt.Sprintf("%s: nil handle", method.Name): { + th: nil, + expErr: ErrInvalidPoolHandle, + }, + fmt.Sprintf("%s: success", method.Name): { + th: testHandle, + }, + } { + t.Run(name, func(t *testing.T) { + thArg := reflect.ValueOf(tc.th) + if tc.th == nil { + thArg = reflect.New(thType).Elem() + } + ctxArg := reflect.ValueOf(test.Context(t)) + testArgs := append([]reflect.Value{thArg, ctxArg}, methArgs...) + t.Logf("\nargs: %+v", testArgs) + + retVals := method.Func.Call(testArgs) + if len(retVals) != expResults { + t.Fatalf("expected %d return values, got %d", expResults, len(retVals)) + } + + if err, ok := retVals[len(retVals)-1].Interface().(error); ok { + test.CmpErr(t, tc.expErr, err) + } else { + test.CmpErr(t, tc.expErr, nil) + } + }) + } + } +} + +func TestAPI_GetPoolList(t *testing.T) { + defaultReq := GetPoolListReq{ + SysName: "non-default", + Query: true, + } + defaultPoolInfoResp := []*daos.PoolInfo{ + { + State: daos.PoolServiceStateReady, + UUID: daos_default_PoolInfo.UUID, + Label: daos_default_PoolInfo.Label, + ServiceReplicas: daos_default_PoolInfo.ServiceReplicas, + ServiceLeader: daos_default_PoolInfo.ServiceLeader, + }, + } + + for name, tc := range map[string]struct { + setup func(t *testing.T) + ctx context.Context + req GetPoolListReq + checkParams func(t *testing.T) + expPools []*daos.PoolInfo + expErr error + }{ + "nil context": { + expErr: errNilCtx, + }, + "daos_mgmt_list_pools fails (sizes)": { + setup: func(t *testing.T) { + daos_mgmt_list_pools_RC = -_Ctype_int(daos.IOError) + }, + ctx: test.Context(t), + expErr: errors.Wrap(daos.IOError, "failed to list pools"), + }, + "daos_mgmt_list_pools fetch fails (not retryable)": { + setup: func(t *testing.T) { + daos_mgmt_list_pools_RCList = []_Ctype_int{ + 0, + -_Ctype_int(daos.NoMemory), + } + }, + ctx: test.Context(t), + expErr: errors.Wrap(daos.NoMemory, "failed to list pools"), + }, + "daos_pool_connect fails": { + setup: func(t *testing.T) { + daos_pool_connect_RC = -_Ctype_int(daos.IOError) + }, + ctx: test.Context(t), + req: defaultReq, + expPools: []*daos.PoolInfo{}, + }, + "daos_mgmt_list_pools fetch fails (retryable)": { + setup: func(t *testing.T) { + daos_mgmt_list_pools_RCList = []_Ctype_int{ + 0, + -_Ctype_int(daos.StructTooSmall), + 0, + 0, + } + }, + ctx: test.Context(t), + expPools: defaultPoolInfoResp, + }, + "default system name supplied": { + ctx: test.Context(t), + req: GetPoolListReq{}, + checkParams: func(t *testing.T) { + test.CmpAny(t, "sysName", build.DefaultSystemName, daos_mgmt_list_pools_SetSys) + }, + expPools: defaultPoolInfoResp, + }, + "success (no pools)": { + setup: func(t *testing.T) { + daos_mgmt_list_pools_RetPools = nil + }, + ctx: test.Context(t), + req: defaultReq, + }, + "success (no query)": { + ctx: test.Context(t), + req: GetPoolListReq{ + SysName: defaultReq.SysName, + }, + checkParams: func(t *testing.T) { + test.CmpAny(t, "sysName", defaultReq.SysName, daos_mgmt_list_pools_SetSys) + }, + expPools: defaultPoolInfoResp, + }, + "success (query)": { + ctx: test.Context(t), + req: defaultReq, + checkParams: func(t *testing.T) { + test.CmpAny(t, "sysName", defaultReq.SysName, daos_mgmt_list_pools_SetSys) + }, + expPools: func() []*daos.PoolInfo { + pi := copyPoolInfo(&daos_default_PoolInfo) + pi.EnabledRanks = nil + pi.DisabledRanks = nil + + return []*daos.PoolInfo{pi} + }(), + }, + } { + t.Run(name, func(t *testing.T) { + t.Cleanup(ResetTestStubs) + if tc.setup != nil { + tc.setup(t) + } + log, buf := logging.NewTestLogger(name) + defer test.ShowBufferOnFailure(t, buf) + + if tc.checkParams != nil { + defer tc.checkParams(t) + } + + gotPools, err := GetPoolList(mustLogCtx(tc.ctx, log), tc.req) + test.CmpErr(t, tc.expErr, err) + if tc.expErr != nil { + return + } + + test.CmpAny(t, "GetPoolList() PoolList", tc.expPools, gotPools) + }) + } +} diff --git a/src/control/lib/daos/api/test_stubs.go b/src/control/lib/daos/api/test_stubs.go new file mode 100644 index 00000000000..26ddd02da0a --- /dev/null +++ b/src/control/lib/daos/api/test_stubs.go @@ -0,0 +1,34 @@ +// +// (C) Copyright 2025 Google LLC +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// +//go:build test_stubs +// +build test_stubs + +package api + +import ( + "sync" +) + +var ( + testStubsMutex sync.Mutex +) + +// LockTestStubs takes a lock on the package stubs to avoid interference +// between tests in different packages. +func LockTestStubs() { + testStubsMutex.Lock() +} + +// UnlockTestStubs releases the lock on the package stubs. +func UnlockTestStubs() { + testStubsMutex.Unlock() +} + +// ResetTestStubs will call the reset functions for all test stubs in order +// to reset state between tests. +func ResetTestStubs() { + reset_daos_pool_stubs() +} diff --git a/src/control/lib/daos/api/util.go b/src/control/lib/daos/api/util.go new file mode 100644 index 00000000000..9c461cd5c16 --- /dev/null +++ b/src/control/lib/daos/api/util.go @@ -0,0 +1,117 @@ +package api + +import ( + "context" + "unsafe" + + "github.com/google/uuid" + "github.com/pkg/errors" + + "github.com/daos-stack/daos/src/control/lib/daos" + "github.com/daos-stack/daos/src/control/lib/ranklist" + "github.com/daos-stack/daos/src/control/logging" +) + +/* +#include +#include + +#include + +#include "util.h" +*/ +import "C" + +func goBool2int(in bool) (out C.int) { + if in { + out = 1 + } + return +} + +func copyUUID(dst *C.uuid_t, src uuid.UUID) error { + if dst == nil { + return errors.Wrap(daos.InvalidInput, "nil dest uuid_t") + } + + for i, v := range src { + dst[i] = C.uchar(v) + } + + return nil +} + +func uuidToC(in uuid.UUID) (out C.uuid_t) { + for i, v := range in { + out[i] = C.uchar(v) + } + + return +} + +func uuidFromC(cUUID C.uuid_t) (uuid.UUID, error) { + return uuid.FromBytes(C.GoBytes(unsafe.Pointer(&cUUID[0]), C.int(len(cUUID)))) +} + +func freeString(s *C.char) { + C.free(unsafe.Pointer(s)) +} + +func iterStringsBuf(cBuf unsafe.Pointer, expected C.size_t, cb func(string)) error { + var curLen C.size_t + + // Create a Go slice for easy iteration (no pointer arithmetic in Go). + bufSlice := unsafe.Slice((*C.char)(cBuf), expected) + for total := C.size_t(0); total < expected; total += curLen + 1 { + chunk := bufSlice[total:] + curLen = C.strnlen(&chunk[0], expected-total) + + if curLen >= expected-total { + return errors.Wrap(daos.NoMemory, "corrupt buffer") + } + + chunk = bufSlice[total : total+curLen] + cb(C.GoString(&chunk[0])) + } + + return nil +} + +func rankSetFromC(cRankList *C.d_rank_list_t) (*ranklist.RankSet, error) { + if cRankList == nil { + return nil, errors.Wrap(daos.InvalidInput, "nil ranklist") + } + + cRankSlice := unsafe.Slice(cRankList.rl_ranks, cRankList.rl_nr) + rs := ranklist.NewRankSet() + for _, cRank := range cRankSlice { + rs.Add(ranklist.Rank(cRank)) + } + + return rs, nil +} + +func ranklistFromGo(rs *ranklist.RankSet) *C.d_rank_list_t { + if rs == nil { + return nil + } + + rl := C.d_rank_list_alloc(C.uint32_t(rs.Count())) + cRanks := unsafe.Slice(rl.rl_ranks, rs.Count()) + for i, r := range rs.Ranks() { + cRanks[i] = C.d_rank_t(r) + } + + return rl +} + +func mustLogCtx(parent context.Context, log logging.Logger) context.Context { + if parent == nil { + return nil + } + ctx, err := logging.ToContext(parent, log) + if err != nil { + panic(err) + } + return ctx +} diff --git a/src/control/lib/daos/api/util.h b/src/control/lib/daos/api/util.h new file mode 100644 index 00000000000..d0ddb067ca2 --- /dev/null +++ b/src/control/lib/daos/api/util.h @@ -0,0 +1,101 @@ +#ifndef __DAOS_API_UTIL_H__ +#define __DAOS_API_UTIL_H__ + +// #define D_LOGFAC DD_FAC(client) +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include + +/* cgo is unable to work directly with preprocessor macros + * so we have to provide these glue helpers. + */ +static inline uint64_t +daos_prop_co_status_val(uint32_t status, uint32_t flag, uint32_t ver) +{ + return DAOS_PROP_CO_STATUS_VAL(status, flag, ver); +} + +static void +daos_free(void *ptr) +{ + D_FREE(ptr); +} + +/* cgo is unable to work directly with unions, so we have + * to provide these glue helpers. + */ +static inline char * +get_dpe_str(struct daos_prop_entry *dpe) +{ + if (dpe == NULL) + return NULL; + + return dpe->dpe_str; +} + +static inline uint64_t +get_dpe_val(struct daos_prop_entry *dpe) +{ + if (dpe == NULL) + return 0; + + return dpe->dpe_val; +} + +static inline void * +get_dpe_val_ptr(struct daos_prop_entry *dpe) +{ + if (dpe == NULL) + return NULL; + + return dpe->dpe_val_ptr; +} + +static inline bool +dpe_is_negative(struct daos_prop_entry *dpe) +{ + if (dpe == NULL) + return 0; + + return dpe->dpe_flags & DAOS_PROP_ENTRY_NOT_SET; +} + +static inline void +set_dpe_str(struct daos_prop_entry *dpe, d_string_t str) +{ + if (dpe == NULL) + return; + + dpe->dpe_str = str; +} + +static inline void +set_dpe_val(struct daos_prop_entry *dpe, uint64_t val) +{ + if (dpe == NULL) + return; + + dpe->dpe_val = val; +} + +static inline void +set_dpe_val_ptr(struct daos_prop_entry *dpe, void *val_ptr) +{ + if (dpe == NULL) + return; + + dpe->dpe_val_ptr = val_ptr; +} + +/*static inline uint32_t +get_rebuild_state(struct daos_rebuild_status *drs) +{ + if (drs == NULL) + return 0; + + return drs->rs_state; +}*/ + +#endif /* __DAOS_API_UTIL_H__ */ \ No newline at end of file diff --git a/src/control/lib/daos/attribute.go b/src/control/lib/daos/attribute.go new file mode 100644 index 00000000000..e27968c216e --- /dev/null +++ b/src/control/lib/daos/attribute.go @@ -0,0 +1,39 @@ +// +// (C) Copyright 2025 Google LLC +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package daos + +import "sort" + +type ( + // Attribute is a pool or container attribute. + Attribute struct { + Name string `json:"name"` + Value []byte `json:"value,omitempty"` + } + + // AttributeList is a list of attributes. + AttributeList []*Attribute +) + +// AsMap returns the attributes list as a map. +func (al AttributeList) AsMap() map[string][]byte { + m := make(map[string][]byte) + for _, a := range al { + m[a.Name] = a.Value + } + return m +} + +// AsList returns the attributes list as a sorted list of attribute names. +func (al AttributeList) AsList() []string { + names := make([]string, len(al)) + for i, a := range al { + names[i] = a.Name + } + sort.Strings(names) + return names +} diff --git a/src/control/lib/daos/pool.go b/src/control/lib/daos/pool.go index 8564882a84c..d8a3043892d 100644 --- a/src/control/lib/daos/pool.go +++ b/src/control/lib/daos/pool.go @@ -1,6 +1,7 @@ // // (C) Copyright 2020-2024 Intel Corporation. // (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -109,6 +110,9 @@ type ( // PoolQueryMask implements a bitmask for pool query options. PoolQueryMask C.uint64_t + + // PoolConnectFlag represents DAOS pool connect options. + PoolConnectFlag uint ) const ( @@ -129,11 +133,11 @@ const ( PoolQueryOptionDeadEngines PoolQueryOption = "dead_engines" // PoolConnectFlagReadOnly indicates that the connection is read-only. - PoolConnectFlagReadOnly = C.DAOS_PC_RO + PoolConnectFlagReadOnly PoolConnectFlag = C.DAOS_PC_RO // PoolConnectFlagReadWrite indicates that the connection is read-write. - PoolConnectFlagReadWrite = C.DAOS_PC_RW + PoolConnectFlagReadWrite PoolConnectFlag = C.DAOS_PC_RW // PoolConnectFlagExclusive indicates that the connection is exclusive. - PoolConnectFlagExclusive = C.DAOS_PC_EX + PoolConnectFlagExclusive PoolConnectFlag = C.DAOS_PC_EX ) func (pqo PoolQueryOption) String() string { diff --git a/src/control/lib/daos/util.go b/src/control/lib/daos/util.go new file mode 100644 index 00000000000..39776f95606 --- /dev/null +++ b/src/control/lib/daos/util.go @@ -0,0 +1,18 @@ +// +// (C) Copyright 2025 Google LLC +// +// SPDX-License-Identifier: BSD-2-Clause-Patent +// + +package daos + +import "unsafe" + +/* +#include +*/ +import "C" + +func freeString(s *C.char) { + C.free(unsafe.Pointer(s)) +} diff --git a/src/control/lib/ranklist/ranklist.go b/src/control/lib/ranklist/ranklist.go index c65d3e7259f..0b6861e005c 100644 --- a/src/control/lib/ranklist/ranklist.go +++ b/src/control/lib/ranklist/ranklist.go @@ -1,5 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. +// (C) Copyright 2025 Google LLC // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -98,7 +99,7 @@ func (rs *RankSet) Merge(other *RankSet) { // Replace replaces the contents of the receiver with the supplied RankSet. func (rs *RankSet) Replace(other *RankSet) { - if rs == nil || other == nil { + if rs == nil || other == nil || other.ns == nil { return } diff --git a/src/dtx/dtx_resync.c b/src/dtx/dtx_resync.c index 1a1460c6a57..de785050586 100644 --- a/src/dtx/dtx_resync.c +++ b/src/dtx/dtx_resync.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -87,30 +88,18 @@ dtx_resync_commit(struct ds_cont_child *cont, */ rc = vos_dtx_check(cont->sc_hdl, &dre->dre_xid, NULL, NULL, NULL, false); - /* Skip this DTX since it has been committed or aggregated. */ - if (rc == DTX_ST_COMMITTED || rc == DTX_ST_COMMITTABLE || rc == -DER_NONEXIST) - goto next; - - /* Remote ones are all ready, but local is not, then abort such DTX. - * If related RPC sponsor is still alive, related RPC will be resent. - */ - if (unlikely(rc == DTX_ST_INITED)) { - rc = dtx_abort(cont, &dre->dre_dte, dre->dre_epoch); - D_DEBUG(DB_TRACE, "As new leader for DTX "DF_DTI", abort it (1): "DF_RC"\n", - DP_DTI(&dre->dre_dte.dte_xid), DP_RC(rc)); - goto next; - } - - /* If we failed to check the status, then assume that it is + /* + * Skip this DTX since it has been committed or aggregated. + * If we failed to check the status, then assume that it is * not committed, then commit it (again), that is harmless. */ + if (rc != DTX_ST_COMMITTED && rc != -DER_NONEXIST) { + dtes[j] = dtx_entry_get(&dre->dre_dte); + dcks[j].oid = dre->dre_oid; + dcks[j].dkey_hash = dre->dre_dkey_hash; + j++; + } - dtes[j] = dtx_entry_get(&dre->dre_dte); - dcks[j].oid = dre->dre_oid; - dcks[j].dkey_hash = dre->dre_dkey_hash; - j++; - -next: dtx_dre_release(drh, dre); } diff --git a/src/dtx/tests/SConscript b/src/dtx/tests/SConscript index 160bff9113f..54a14127932 100644 --- a/src/dtx/tests/SConscript +++ b/src/dtx/tests/SConscript @@ -5,6 +5,8 @@ def scons(): """Execute build""" Import('denv', 'vts_objs') + # build dtx_tests + libraries = ['abt', 'bio', 'dtx', 'vos', 'gurt', 'daos_common_pmem', 'cmocka', 'pthread', 'uuid', 'cart', 'daos_tests'] @@ -19,10 +21,35 @@ def scons(): test_src = ['dtx_tests.c', 'sched_mock.c', 'ult_mock.c', 'srv_mock.c', 'pl_map_mock.c', '../../common/tls.c', 'dts_utils.c', 'dts_local.c', 'dts_local_rdb.c', - 'dts_structs.c', vts_objs] + vts_objs] dtx_tests = tenv.d_program('dtx_tests', test_src, LIBS=libraries) - tenv.Install('$PREFIX/bin/', [dtx_tests]) + # build dtx_ut + + libraries = ['abt', 'bio', 'cmocka', 'daos_common_pmem', 'gurt', 'uuid', 'vea', 'pthread'] + + tenv = denv.Clone() + tenv.Append(CPPPATH=[Dir('../../vos').srcnode()]) + tenv.require('pmdk') + tenv.AppendUnique(RPATH_FULL=['$PREFIX/lib64/daos_srv']) + tenv.Append(OBJPREFIX="c_") + + # Required for vos_dtx_discard_invalid() tests. + # These functions are validated by their respective unit tests. + tenv.AppendUnique(LINKFLAGS=['-Wl,--wrap=ilog_is_valid']) + tenv.AppendUnique(LINKFLAGS=['-Wl,--wrap=vos_irec_is_valid']) + tenv.AppendUnique(LINKFLAGS=['-Wl,--wrap=evt_desc_is_valid']) + tenv.AppendUnique(LINKFLAGS=['-Wl,--wrap=dbtree_lookup']) + + vos_src = Glob('../../vos/*.c') + + test_src = ['dtx_ut.c', 'dts_discard_invalid.c', 'dts_structs.c', + 'srv_mock.c', 'sched_mock.c'] + dtx_ut = tenv.d_program('dtx_ut', test_src + vos_src, LIBS=libraries) + + # install both + + tenv.Install('$PREFIX/bin/', [dtx_tests, dtx_ut]) if __name__ == "SCons.Script": diff --git a/src/dtx/tests/dts_discard_invalid.c b/src/dtx/tests/dts_discard_invalid.c new file mode 100644 index 00000000000..4a45cc83b9c --- /dev/null +++ b/src/dtx/tests/dts_discard_invalid.c @@ -0,0 +1,584 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#define D_LOGFAC DD_FAC(tests) + +#include +#include +#include +#include +#include + +#include "ilog.h" +#include "vos_layout.h" +#include "vos_internal.h" + +/* mocks */ + +static struct vos_pool Pool; +static struct vos_container Cont; +static daos_handle_t Coh; +static bool In_tx = false; + +static struct vos_dtx_act_ent Dae; +static struct vos_dtx_act_ent_df Dae_df; +static struct vos_dtx_act_ent_df Dae_df_exp; + +#define RECORDS_MAX 26 +static umem_off_t Records[RECORDS_MAX]; +static umem_off_t Records_df[RECORDS_MAX]; +static umem_off_t Records_df_exp[RECORDS_MAX]; + +#define DTX_ID_PTR ((struct dtx_id *)0x907) +#define VC_DTX_ACTIVE_HDL 0x456 +#define DBTREE_LOOKUP_ERROR_RC (-DER_NONEXIST) + +int +__wrap_dbtree_lookup(daos_handle_t coh, d_iov_t *key, d_iov_t *val_out) +{ + assert_int_equal(coh.cookie, VC_DTX_ACTIVE_HDL); + assert_non_null(key); + assert_int_equal(key->iov_len, key->iov_buf_len); + assert_int_equal(key->iov_len, sizeof(struct dtx_id)); + assert_ptr_equal(key->iov_buf, DTX_ID_PTR); + assert_non_null(val_out); + assert_int_equal(val_out->iov_len, 0); + assert_int_equal(val_out->iov_buf_len, 0); + assert_null(val_out->iov_buf); + val_out->iov_buf = (void *)mock(); + if (val_out->iov_buf != NULL) { + val_out->iov_len = val_out->iov_buf_len = sizeof(struct vos_dtx_act_ent); + return 0; + } + return DBTREE_LOOKUP_ERROR_RC; +} + +#define REC_UMEM_OFFSET 0x1267 +#define DTX_LID 0x356 +#define EPOCH 0x557 + +bool +__wrap_ilog_is_valid(struct umem_instance *umm, umem_off_t rec, uint32_t dtx_lid, + daos_epoch_t epoch) +{ + assert_ptr_equal(umm, &Pool.vp_umm); + check_expected(umem_off2offset(rec)); + assert_int_equal(dtx_lid, DTX_LID); + assert_int_equal(epoch, EPOCH); + return mock(); +} + +bool +__wrap_vos_irec_is_valid(const struct vos_irec_df *svt, uint32_t dtx_lid) +{ + check_expected(svt); + assert_int_equal(dtx_lid, DTX_LID); + return mock(); +} + +bool +__wrap_evt_desc_is_valid(const struct evt_desc *evt, uint32_t dtx_lid) +{ + check_expected(evt); + assert_int_equal(dtx_lid, DTX_LID); + return mock(); +} + +int +tx_begin(struct umem_instance *umm, struct umem_tx_stage_data *txd) +{ + assert_ptr_equal(umm, &Pool.vp_umm); + assert_null(txd); + int rc = mock(); + if (rc == 0) { + In_tx = true; + } + return rc; +} + +int +tx_commit(struct umem_instance *umm, void *data) +{ + assert_ptr_equal(umm, &Pool.vp_umm); + assert_null(data); + assert_true(In_tx); + In_tx = false; + return mock(); +} + +int +tx_abort(struct umem_instance *umm, int error) +{ + assert_ptr_equal(umm, &Pool.vp_umm); + check_expected(error); + assert_true(In_tx); + In_tx = false; + if (error) { + return error; + } + return mock(); +} + +int +tx_add_ptr(struct umem_instance *umm, void *ptr, size_t size) +{ + assert_ptr_equal(umm, &Pool.vp_umm); + check_expected(ptr); + check_expected(size); + return mock(); +} + +/* tests */ + +static void +test_missing_things(void **unused) +{ + daos_handle_t hdl_null = {0}; + int discarded = 0; + int rc; + + /* Missing arguments. */ + expect_assert_failure(vos_dtx_discard_invalid(hdl_null, NULL, NULL)); + expect_assert_failure(vos_dtx_discard_invalid(Coh, NULL, NULL)); + expect_assert_failure(vos_dtx_discard_invalid(Coh, DTX_ID_PTR, NULL)); + expect_assert_failure(vos_dtx_discard_invalid(Coh, NULL, &discarded)); + + /* DAE not in the DTX active table. */ + will_return(__wrap_dbtree_lookup, NULL); + rc = vos_dtx_discard_invalid(Coh, DTX_ID_PTR, &discarded); + assert_int_equal(rc, DBTREE_LOOKUP_ERROR_RC); +} + +struct rec_valid { + enum vos_dtx_record_types type; + bool valid; +}; + +static bool +prep_records_common(struct rec_valid tmpl[], int num, umem_off_t *rec, umem_off_t *rec_df, + umem_off_t *rec_df_exp) +{ + bool discarded = false; + + for (int i = 0; i < num; ++i) { + umem_off_t off = REC_UMEM_OFFSET + i; + rec[i] = off; + dtx_type2umoff_flag(&rec[i], tmpl[i].type); + rec_df[i] = rec[i]; + + switch (tmpl[i].type) { + case DTX_RT_ILOG: + expect_value(__wrap_ilog_is_valid, umem_off2offset(rec), off); + will_return(__wrap_ilog_is_valid, tmpl[i].valid); + break; + case DTX_RT_SVT: + expect_value(__wrap_vos_irec_is_valid, svt, off); + will_return(__wrap_vos_irec_is_valid, tmpl[i].valid); + break; + case DTX_RT_EVT: + expect_value(__wrap_evt_desc_is_valid, evt, off); + will_return(__wrap_evt_desc_is_valid, tmpl[i].valid); + break; + default: + fail_msg("Unknown record type: %d", tmpl[i].type); + } + + if (tmpl[i].valid) { + rec_df_exp[i] = rec[i]; + } else { + rec_df_exp[i] = UMOFF_NULL; + discarded = true; + } + } + + return discarded; +} + +static bool +prep_records_inline(struct rec_valid tmpl[], int num) +{ + Dae.dae_base.dae_rec_cnt = num; + + bool discarded = prep_records_common(tmpl, num, Dae.dae_base.dae_rec_inline, + Dae_df.dae_rec_inline, Dae_df_exp.dae_rec_inline); + if (discarded) { + expect_value(tx_add_ptr, ptr, &Dae_df.dae_rec_inline); + expect_value(tx_add_ptr, size, sizeof(umem_off_t) * num); + } + + return discarded; +} + +static bool +prep_records_noninline(struct rec_valid tmpl[], int num) +{ + /* link both volatile and durable format noninline records */ + Dae.dae_records = Records; + DAE_REC_OFF(&Dae) = umem_ptr2off(&Pool.vp_umm, &Records_df); + + /* noninline records come always on top off the inline records */ + Dae.dae_base.dae_rec_cnt = DTX_INLINE_REC_CNT + num; + + bool discarded = prep_records_common(tmpl, num, Records, Records_df, Records_df_exp); + if (discarded) { + expect_value(tx_add_ptr, ptr, &Records_df); + expect_value(tx_add_ptr, size, sizeof(umem_off_t) * num); + } + + return discarded; +} + +#define TX_ERROR_RC 0x156 + +static void +test_tx_begin_fail(void **unused) +{ + int discarded = 0; + int rc; + + /* tx_begin() fails. */ + will_return(__wrap_dbtree_lookup, &Dae); + will_return(tx_begin, TX_ERROR_RC); + rc = vos_dtx_discard_invalid(Coh, DTX_ID_PTR, &discarded); + assert_int_equal(rc, TX_ERROR_RC); +} + +static void +test_tx_abort_fail(void **unused) +{ + int discarded = 0; + int rc; + + /* tx_abort() (when nothing to commit) fails. */ + will_return(__wrap_dbtree_lookup, &Dae); + will_return(tx_begin, 0); + expect_value(tx_abort, error, 0); + will_return(tx_abort, TX_ERROR_RC); + rc = vos_dtx_discard_invalid(Coh, DTX_ID_PTR, &discarded); + assert_int_equal(rc, TX_ERROR_RC); +} + +struct rec_valid One_rec[] = {{.type = DTX_RT_ILOG, .valid = false}}; + +static void +test_tx_add_ptr_inline_fail(void **unused) +{ + int discarded = 0; + int rc; + + /* tx_add_ptr() for inline records fails. */ + will_return(__wrap_dbtree_lookup, &Dae); + will_return(tx_begin, 0); + prep_records_inline(One_rec, ARRAY_SIZE(One_rec)); + will_return(tx_add_ptr, TX_ERROR_RC); + expect_value(tx_abort, error, TX_ERROR_RC); + rc = vos_dtx_discard_invalid(Coh, DTX_ID_PTR, &discarded); + assert_int_equal(rc, TX_ERROR_RC); +} + +static void +test_tx_add_ptr_noninline_fail(void **unused) +{ + int discarded = 0; + int rc; + + /* tx_add_ptr() for non-inline records fails. */ + will_return(__wrap_dbtree_lookup, &Dae); + will_return(tx_begin, 0); + prep_records_noninline(One_rec, ARRAY_SIZE(One_rec)); + will_return(tx_add_ptr, TX_ERROR_RC); + expect_value(tx_abort, error, TX_ERROR_RC); + rc = vos_dtx_discard_invalid(Coh, DTX_ID_PTR, &discarded); + assert_int_equal(rc, TX_ERROR_RC); +} + +static void +test_tx_commit_fail(void **unused) +{ + int discarded = 0; + int rc; + + /* tx_commit() fails. */ + will_return(__wrap_dbtree_lookup, &Dae); + will_return(tx_begin, 0); + prep_records_noninline(One_rec, ARRAY_SIZE(One_rec)); + will_return(tx_add_ptr, 0); + will_return(tx_commit, TX_ERROR_RC); + rc = vos_dtx_discard_invalid(Coh, DTX_ID_PTR, &discarded); + assert_int_equal(rc, TX_ERROR_RC); +} + +static void +reset_dfs(); + +#define DTX_RT_MIN DTX_RT_ILOG +#define DTX_RT_MAX DTX_RT_EVT +#define DTX_RT_NUM 3 + +static void +test_discard_inline_all(void **unused) +{ + struct rec_valid recs[] = { + {DTX_RT_ILOG, false}, + {DTX_RT_SVT, false}, + {DTX_RT_EVT, false}, + {DTX_RT_ILOG, false}, + }; + + int discarded = 0; + int rc; + + /* discard all inline records at once */ + will_return(__wrap_dbtree_lookup, &Dae); + will_return(tx_begin, 0); + prep_records_inline(recs, ARRAY_SIZE(recs)); + will_return(tx_add_ptr, 0); + will_return(tx_commit, 0); + rc = vos_dtx_discard_invalid(Coh, DTX_ID_PTR, &discarded); + assert_int_equal(rc, 0); + assert_int_equal(discarded, ARRAY_SIZE(recs)); + assert_memory_equal(&Dae_df, &Dae_df_exp, sizeof(Dae_df)); + assert_memory_equal(&Records_df, &Records_df_exp, sizeof(Records_df)); +} + +typedef void (*execute_fn)(struct rec_valid *recs, int num); + +static void +prep_discard_one_common(execute_fn execute) +{ + struct rec_valid recs[4]; + + /* pick the type of the record to be discarded */ + for (enum vos_dtx_record_types type = DTX_RT_MIN; type <= DTX_RT_MAX; ++type) { + enum vos_dtx_record_types other_type = (type + 1) % DTX_RT_NUM + DTX_RT_MIN; + /* pick which entry will be discarded */ + for (int i = 0; i < ARRAY_SIZE(recs); ++i) { + /* initialize the array describing the scenario */ + for (int j = 0; j < ARRAY_SIZE(recs); ++j) { + if (j == i) { + recs[j].type = type; + recs[j].valid = false; + } else { + recs[j].type = other_type; + recs[j].valid = true; + } + } + /* reset durable format mocks */ + reset_dfs(); + execute(recs, ARRAY_SIZE(recs)); + } + } +} + +static void +discard_inline_one_execute(struct rec_valid *recs, int num) +{ + int discarded = 0; + int rc; + + will_return(__wrap_dbtree_lookup, &Dae); + will_return(tx_begin, 0); + prep_records_inline(recs, num); + will_return(tx_add_ptr, 0); + will_return(tx_commit, 0); + rc = vos_dtx_discard_invalid(Coh, DTX_ID_PTR, &discarded); + assert_int_equal(rc, 0); + assert_int_equal(discarded, 1); + assert_memory_equal(&Dae_df, &Dae_df_exp, sizeof(Dae_df)); + assert_memory_equal(&Records_df, &Records_df_exp, sizeof(Records_df)); +} + +static void +test_discard_inline_one(void **unused) +{ + /* discard just one inline record */ + prep_discard_one_common(discard_inline_one_execute); +} + +static void +test_discard_noninline_all(void **unused) +{ + struct rec_valid recs[] = { + {DTX_RT_ILOG, false}, + {DTX_RT_SVT, false}, + {DTX_RT_EVT, false}, + {DTX_RT_ILOG, false}, + }; + + int discarded = 0; + int rc; + + /* discard all noninline records at once */ + will_return(__wrap_dbtree_lookup, &Dae); + will_return(tx_begin, 0); + prep_records_noninline(recs, ARRAY_SIZE(recs)); + will_return(tx_add_ptr, 0); + will_return(tx_commit, 0); + rc = vos_dtx_discard_invalid(Coh, DTX_ID_PTR, &discarded); + assert_int_equal(rc, 0); + assert_int_equal(discarded, ARRAY_SIZE(recs)); + assert_memory_equal(&Dae_df, &Dae_df, sizeof(Dae_df)); + assert_memory_equal(&Records_df, &Records_df_exp, sizeof(umem_off_t) * ARRAY_SIZE(recs)); +} + +static void +discard_noninline_one_execute(struct rec_valid *recs, int num) +{ + int discarded = 0; + int rc; + + will_return(__wrap_dbtree_lookup, &Dae); + will_return(tx_begin, 0); + prep_records_noninline(recs, num); + will_return(tx_add_ptr, 0); + will_return(tx_commit, 0); + rc = vos_dtx_discard_invalid(Coh, DTX_ID_PTR, &discarded); + assert_int_equal(rc, 0); + assert_int_equal(discarded, 1); + assert_memory_equal(&Dae_df, &Dae_df_exp, sizeof(Dae_df)); + assert_memory_equal(&Records_df, &Records_df_exp, sizeof(umem_off_t) * num); +} + +static void +test_discard_noninline_one(void **unused) +{ + /* discard just one noninline record */ + prep_discard_one_common(discard_noninline_one_execute); +} + +#define RAND_SEED 2025 +#define RAND_RECORDS_NUM_MAX (RECORDS_MAX + DTX_INLINE_REC_CNT) + +static void +test_discard_rand(void **unused) +{ + int discarded = 0; + int discarded_exp = 0; + /* tx_add_ptr() it is called on condition at least one record in a group is about to be + * discarded */ + bool call_tx_add_ptr; + int rc; + + srand(RAND_SEED); + + int num = rand() % RAND_RECORDS_NUM_MAX; + struct rec_valid *recs = calloc(num, sizeof(struct rec_valid)); + for (int i = 0; i < num; ++i) { + recs[i].type = rand() % DTX_RT_MAX + DTX_RT_MIN; + recs[i].valid = (rand() % 2 == 0 ? true : false); + + if (!recs[i].valid) { + ++discarded_exp; + } + } + + printf("srand(%d), num=%d, discarded=%d\n", RAND_SEED, num, discarded_exp); + + will_return(__wrap_dbtree_lookup, &Dae); + will_return(tx_begin, 0); + + /* Note: The inline records are processed first hence they have to be initialized first as + * well. */ + call_tx_add_ptr = prep_records_inline(recs, min(num, DTX_INLINE_REC_CNT)); + if (call_tx_add_ptr) { + will_return(tx_add_ptr, 0); + } + + if (num > DTX_INLINE_REC_CNT) { + call_tx_add_ptr = + prep_records_noninline(&recs[DTX_INLINE_REC_CNT], num - DTX_INLINE_REC_CNT); + if (call_tx_add_ptr) { + will_return(tx_add_ptr, 0); + } + } + + will_return(tx_commit, 0); + rc = vos_dtx_discard_invalid(Coh, DTX_ID_PTR, &discarded); + assert_int_equal(rc, 0); + assert_int_equal(discarded, discarded_exp); + assert_memory_equal(&Dae_df, &Dae_df_exp, sizeof(Dae_df)); + if (num > DTX_INLINE_REC_CNT) { + assert_memory_equal(&Records_df, &Records_df_exp, + sizeof(umem_off_t) * (num - DTX_INLINE_REC_CNT)); + } + + free(recs); +} + +/* setup & teardown */ + +static umem_ops_t umm_ops = {.mo_tx_begin = tx_begin, + .mo_tx_commit = tx_commit, + .mo_tx_abort = tx_abort, + .mo_tx_add_ptr = tx_add_ptr}; + +static void +reset_dfs() +{ + /* durable format mocks primed with a pattern intentionally to detect UMOFF_NULL (discard) + * when set */ + memset(&Dae_df, 0xef, sizeof(Dae_df)); + memset(&Dae_df_exp, 0xef, sizeof(Dae_df)); + memset(&Records_df, 0xef, sizeof(Records_df)); + memset(&Records_df_exp, 0xef, sizeof(Records_df)); +} + +static int +setup_cont(void **unused) +{ + /* reset globals */ + memset(&Pool, 0, sizeof(Pool)); + memset(&Cont, 0, sizeof(Cont)); + memset(&Dae, 0, sizeof(Dae)); + memset(&Records, 0, sizeof(Records)); + In_tx = false; + + reset_dfs(); + + Pool.vp_umm.umm_ops = &umm_ops; + Cont.vc_pool = &Pool; + Cont.vc_dtx_active_hdl.cookie = VC_DTX_ACTIVE_HDL; + Coh.cookie = (uint64_t)&Cont; + Dae.dae_df_off = umem_ptr2off(&Pool.vp_umm, &Dae_df); + DAE_LID(&Dae) = DTX_LID; + DAE_EPOCH(&Dae) = EPOCH; + + return 0; +} + +static int +teardown_cont(void **unused) +{ + /* nop */ + return 0; +} + +/* compilation unit's entry point */ +#define TEST(name, func) \ + { \ + name ": vos_dtx_discard_invalid - " #func, test_##func, setup_cont, teardown_cont \ + } + +static const struct CMUnitTest discard_invalid_tests_all[] = { + TEST("DTX400", missing_things), + TEST("DTX401", tx_begin_fail), + TEST("DTX402", tx_abort_fail), + TEST("DTX403", tx_add_ptr_inline_fail), + TEST("DTX404", tx_add_ptr_noninline_fail), + TEST("DTX405", tx_commit_fail), + TEST("DTX406", discard_inline_all), + TEST("DTX407", discard_inline_one), + TEST("DTX408", discard_noninline_all), + TEST("DTX409", discard_noninline_one), + TEST("DTX410", discard_rand), +}; + +int +run_discard_invalid_tests(void) +{ + const char *test_name = "vos_dtx_discard_invalid"; + + return cmocka_run_group_tests_name(test_name, discard_invalid_tests_all, NULL, NULL); +} diff --git a/src/dtx/tests/dts_structs.c b/src/dtx/tests/dts_structs.c index f73eaad6e2f..bddfdf9816c 100644 --- a/src/dtx/tests/dts_structs.c +++ b/src/dtx/tests/dts_structs.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -11,11 +11,13 @@ #include #include +#include +#include +#include #include #include #include - -#include "vts_io.h" +#include #define SET_STRUCT_COMMON(a, c) memset((void *)(&(a)), c, sizeof(a)) @@ -129,7 +131,7 @@ static const struct CMUnitTest structs_tests_all[] = { }; int -run_structs_tests(const char *cfg) +run_structs_tests(void) { const char *test_name = "DTX structs checks"; diff --git a/src/dtx/tests/dtx_tests.c b/src/dtx/tests/dtx_tests.c index 6197e3680bb..e5ca2100d1a 100644 --- a/src/dtx/tests/dtx_tests.c +++ b/src/dtx/tests/dtx_tests.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2023-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -23,8 +24,6 @@ int run_local_tests(const char *cfg); int run_local_rdb_tests(const char *cfg); -int -run_structs_tests(const char *cfg); static void print_usage() @@ -49,7 +48,6 @@ run_all_tests(int keys) failed += run_local_tests(cfg_desc_io); failed += run_local_rdb_tests(cfg_desc_io); - failed += run_structs_tests(cfg_desc_io); return failed; } diff --git a/src/dtx/tests/dtx_ut.c b/src/dtx/tests/dtx_ut.c new file mode 100644 index 00000000000..e608c6c6964 --- /dev/null +++ b/src/dtx/tests/dtx_ut.c @@ -0,0 +1,113 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * Launcher for all DTX unit tests. + */ +#define D_LOGFAC DD_FAC(tests) + +#include +#include +#include +#include +#include +#include +#include +#include + +int +run_structs_tests(void); +int +run_discard_invalid_tests(void); + +static void +print_usage() +{ + print_message("Use one of these opt(s) for specific test\n"); + print_message("dtx_ut -h|--help\n"); + print_message("Default runs all tests\n"); + print_message("The following options can be used with any of the above:\n"); + print_message(" -f|--filter \n"); + print_message(" -e|--exclude \n"); +} + +static inline int +run_all_tests(int keys) +{ + int failed = 0; + + failed += run_structs_tests(); + failed += run_discard_invalid_tests(); + + return failed; +} + +int +main(int argc, char **argv) +{ + int rc = 0; + int nr_failed = 0; + int opt = 0; + int index = 0; + const char *short_options = "he:f:"; + static struct option long_options[] = { + {"help", no_argument, 0, 'h'}, + {"exclude", required_argument, 0, 'e'}, + {"filter", required_argument, 0, 'f'}, + {NULL}, + }; + + d_register_alt_assert(mock_assert); + + rc = daos_debug_init(DAOS_LOG_DEFAULT); + if (rc) { + print_error("Error initializing debug system\n"); + return rc; + } + + while ((opt = getopt_long(argc, argv, short_options, long_options, &index)) != -1) { + switch (opt) { + case 'h': + print_usage(); + goto exit_0; + + case 'e': +#if CMOCKA_FILTER_SUPPORTED == 1 /** requires cmocka 1.1.5 */ + cmocka_set_skip_filter(optarg); +#else + D_PRINT("filter not enabled"); +#endif + + break; + case 'f': +#if CMOCKA_FILTER_SUPPORTED == 1 /** requires cmocka 1.1.5 */ + { + /** Add wildcards for easier filtering */ + char filter[sizeof(optarg) + 2]; + + sprintf(filter, "*%s*", optarg); + cmocka_set_test_filter(filter); + printf("Test filter: %s\n", filter); + } +#else + D_PRINT("filter not enabled"); +#endif + break; + default: + break; + } + } + + nr_failed = run_all_tests(0); + + if (nr_failed) + print_error("ERROR, %i TEST(S) FAILED\n", nr_failed); + else + print_message("\nSUCCESS! NO TEST FAILURES\n"); + +exit_0: + daos_debug_fini(); + return nr_failed; +} diff --git a/src/dtx/tests/sched_mock.c b/src/dtx/tests/sched_mock.c index 470246ebec2..a85d72897b3 100644 --- a/src/dtx/tests/sched_mock.c +++ b/src/dtx/tests/sched_mock.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -59,3 +60,10 @@ sched_req_wait(struct sched_request *req, bool abort) { assert_true(false); } + +uint64_t +sched_cur_seq(void) +{ + assert_true(false); + return UINT64_MAX; +} diff --git a/src/include/cart/types.h b/src/include/cart/types.h index 1a7ed2eedc4..cdd181f4394 100644 --- a/src/include/cart/types.h +++ b/src/include/cart/types.h @@ -1,5 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -60,6 +61,7 @@ typedef struct crt_init_options { /** whether or not to use expected sizes */ cio_use_expected_size:1, cio_use_unexpected_size:1; + /** overrides the value of the environment variable CRT_CTX_NUM */ int cio_ctx_max_num; @@ -94,6 +96,9 @@ typedef struct crt_init_options { /** use single thread to access context */ bool cio_thread_mode_single; + + /** force busy wait (testing only, not in production) */ + bool cio_progress_busy; } crt_init_options_t; typedef int crt_status_t; diff --git a/src/include/daos/array.h b/src/include/daos/array.h index d52e6c90e3c..a1c4fa4d067 100644 --- a/src/include/daos/array.h +++ b/src/include/daos/array.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -13,6 +14,11 @@ #include #include +/** limits for list io write/read */ +extern unsigned int array_list_io_limit; +void + daos_array_env_init(); + /* task functions for array operations */ int dc_array_create(tse_task_t *task); int dc_array_open(tse_task_t *task); diff --git a/src/include/daos/common.h b/src/include/daos/common.h index 64ab66c04e4..2225a417c47 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2015-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -605,6 +606,8 @@ daos_errno2der(int err) case EINVAL: return -DER_INVAL; case ENOTDIR: return -DER_NOTDIR; case EIO: return -DER_IO; + case ENOTSUP: + return -DER_NOTSUPPORTED; case EFAULT: case ENXIO: case ENODEV: @@ -661,6 +664,8 @@ daos_der2errno(int err) case -DER_NOTDIR: return ENOTDIR; case -DER_STALE: return ESTALE; case -DER_TX_RESTART: return ERESTART; + case -DER_NOTSUPPORTED: + return ENOTSUP; default: return EIO; } }; diff --git a/src/include/daos_array.h b/src/include/daos_array.h index 6cc69a4b152..ace0735b3a7 100644 --- a/src/include/daos_array.h +++ b/src/include/daos_array.h @@ -1,5 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -23,6 +24,11 @@ extern "C" { #include #include +/** limit of arr_nr (list-io entries) for file offsets in a single update */ +#define DAOS_ARRAY_LIST_IO_LIMIT 16384 +/** Tiny recx limit (in bytes) in the array IODs where the list limit is high */ +#define DAOS_ARRAY_RG_LEN_THD 16 + /** Range of contiguous records */ typedef struct { /** Index of the first record in the range */ @@ -260,6 +266,8 @@ daos_array_close(daos_handle_t oh, daos_event_t *ev); * \param[in] oh Array object open handle. * \param[in] th Transaction handle. * \param[in] iod IO descriptor of ranges to read from the array. + * There is a limit on the number of descriptors (DAOS_ARRAY_LIST_IO_LIMIT) if + * the length on the ranges are under DAOS_ARRAY_RG_LEN_THD. * \param[in] sgl A scatter/gather list (sgl) to the store array data. * Buffer sizes do not have to match the individual range * sizes as long as the total size does. User allocates the @@ -286,6 +294,8 @@ daos_array_read(daos_handle_t oh, daos_handle_t th, daos_array_iod_t *iod, * \param[in] oh Array object open handle. * \param[in] th Transaction handle. * \param[in] iod IO descriptor of ranges to write to the array. + * There is a limit on the number of descriptors (DAOS_ARRAY_LIST_IO_LIMIT) if + * the length on the ranges are under DAOS_ARRAY_RG_LEN_THD. * \param[in] sgl A scatter/gather list (sgl) to the store array data. * Buffer sizes do not have to match the individual range * sizes as long as the total size does. diff --git a/src/include/daos_fs.h b/src/include/daos_fs.h index 7c5ac13076d..ec597ad220c 100644 --- a/src/include/daos_fs.h +++ b/src/include/daos_fs.h @@ -1,5 +1,6 @@ /* * (C) Copyright 2018-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -624,6 +625,8 @@ dfs_read(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, * \param[in] dfs Pointer to the mounted file system. * \param[in] obj Opened file object. * \param[in] iod IO descriptor for list-io. + * There is a limit on the number of descriptors (DAOS_ARRAY_LIST_IO_LIMIT) if + * the length on the ranges are under DAOS_ARRAY_RG_LEN_THD. * \param[in] sgl Scatter/Gather list for data buffer. * \param[out] read_size * How much data is actually read. @@ -657,7 +660,9 @@ dfs_write(dfs_t *dfs, dfs_obj_t *obj, d_sg_list_t *sgl, daos_off_t off, * * \param[in] dfs Pointer to the mounted file system. * \param[in] obj Opened file object. - * \param[in] iod IO descriptor of file view. + * \param[in] iod IO descriptor for list-io. + * There is a limit on the number of descriptors (DAOS_ARRAY_LIST_IO_LIMIT) if + * the length on the ranges are under DAOS_ARRAY_RG_LEN_THD. * \param[in] sgl Scatter/Gather list for data buffer. * \param[in] ev Completion event, it is optional and can be NULL. * Function will run in blocking mode if \a ev is NULL. diff --git a/src/include/daos_srv/evtree.h b/src/include/daos_srv/evtree.h index 292c8848c87..a29f9312168 100644 --- a/src/include/daos_srv/evtree.h +++ b/src/include/daos_srv/evtree.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -814,4 +815,16 @@ evt_feats_get(struct evt_root *root) */ int evt_feats_set(struct evt_root *root, struct umem_instance *umm, uint64_t feats); +/** Validate the provided evt. + * + * Note: It is designed for catastrophic recovery. Not to perform at run-time. + * + * \param evt[in] + * \param dtx_lid[in] local id of the DTX entry the evt is supposed to belong to + * + * \return true if evt is valid. + **/ +bool +evt_desc_is_valid(const struct evt_desc *evt, uint32_t dtx_lid); + #endif /* __DAOS_EV_TREE_H__ */ diff --git a/src/include/daos_srv/vos.h b/src/include/daos_srv/vos.h index 5a7f89dea84..f82a4ebfdec 100644 --- a/src/include/daos_srv/vos.h +++ b/src/include/daos_srv/vos.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2015-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -190,6 +190,18 @@ vos_dtx_commit(daos_handle_t coh, struct dtx_id dtis[], int count, bool keep_act int vos_dtx_abort(daos_handle_t coh, struct dtx_id *dti, daos_epoch_t epoch); +/** + * Discard the active DTX entry's records if invalid. + * + * \param coh [IN] Container open handle. + * \param dti [IN] The DTX identifier to be validated. + * \param discarded [OUT] The number of discarded records. + * + * \return Zero on success, negative value if error. + */ +int +vos_dtx_discard_invalid(daos_handle_t coh, struct dtx_id *dti, int *discarded); + /** * Set flags on the active DTXs. * diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 89b1d554964..af3c130cf33 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -3938,9 +3938,11 @@ static int obj_tree_lookup_cont(daos_handle_t toh, uuid_t co_uuid, daos_handle_t *cont_toh) { struct tree_cache_root *cont_root = NULL; - d_iov_t key_iov; - d_iov_t tmp_iov; - int rc; + d_iov_t key_iov; + d_iov_t tmp_iov; + daos_handle_t migrated_toh; + struct umem_attr uma; + int rc; D_ASSERT(daos_handle_is_valid(toh)); @@ -3957,9 +3959,17 @@ obj_tree_lookup_cont(daos_handle_t toh, uuid_t co_uuid, daos_handle_t *cont_toh) return rc; } + memset(&uma, 0, sizeof(uma)); + uma.uma_id = UMEM_CLASS_VMEM; cont_root = tmp_iov.iov_buf; - *cont_toh = cont_root->tcr_root_hdl; - return 0; + D_ASSERT(daos_handle_is_valid(cont_root->tcr_root_hdl)); + rc = dbtree_open_inplace(&cont_root->tcr_btr_root, &uma, &migrated_toh); + if (rc == 0) + *cont_toh = migrated_toh; + else + DL_ERROR(rc, DF_UUID" failed to open cont migrated tree", DP_UUID(co_uuid)); + + return rc; } static int @@ -4040,10 +4050,11 @@ reint_post_cont_iter_cb(daos_handle_t ih, vos_iter_entry_t *entry, vos_iter_param_t param = { 0 }; struct vos_iter_anchors anchor = { 0 }; daos_handle_t toh = arg->ria_migrated_tree_hdl; + daos_handle_t cont_toh = { 0 }; struct ds_cont_child *cont_child = NULL; int rc; - rc = obj_tree_lookup_cont(toh, entry->ie_couuid, &arg->ria_cont_toh); + rc = obj_tree_lookup_cont(toh, entry->ie_couuid, &cont_toh); if (rc) { if (rc == -DER_NONEXIST) { D_DEBUG(DB_TRACE, DF_RB": cont "DF_UUID" non-exist in migrate tree, " @@ -4063,7 +4074,7 @@ reint_post_cont_iter_cb(daos_handle_t ih, vos_iter_entry_t *entry, goto out; } - D_ASSERT(daos_handle_is_valid(arg->ria_cont_toh)); + D_ASSERT(daos_handle_is_valid(cont_toh)); rc = ds_cont_child_lookup(tls->mpt_pool_uuid, entry->ie_couuid, &cont_child); if (rc == -DER_NONEXIST || rc == -DER_SHUTDOWN) { @@ -4083,6 +4094,7 @@ reint_post_cont_iter_cb(daos_handle_t ih, vos_iter_entry_t *entry, param.ip_epr.epr_hi = DAOS_EPOCH_MAX; param.ip_flags = VOS_IT_FOR_MIGRATION; uuid_copy(arg->ria_co_uuid, entry->ie_couuid); + arg->ria_cont_toh = cont_toh; rc = vos_iterate(¶m, VOS_ITER_OBJ, false, &anchor, reint_post_obj_iter_cb, NULL, arg, NULL); if (rc) @@ -4091,6 +4103,8 @@ reint_post_cont_iter_cb(daos_handle_t ih, vos_iter_entry_t *entry, ds_cont_child_put(cont_child); out: + if (daos_handle_is_valid(cont_toh)) + dbtree_close(cont_toh); if (--arg->ria_yield_cnt <= 0) { D_DEBUG(DB_REBUILD, DF_RB " rebuild yield: %d\n", DP_RB_MPT(tls), rc); arg->ria_yield_cnt = REINT_ITER_YIELD_CNT; diff --git a/src/tests/ftest/daos_test/rebuild.py b/src/tests/ftest/daos_test/rebuild.py index 62c66b7da21..3188f08894d 100644 --- a/src/tests/ftest/daos_test/rebuild.py +++ b/src/tests/ftest/daos_test/rebuild.py @@ -1,5 +1,6 @@ ''' (C) Copyright 2018-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent ''' @@ -365,3 +366,19 @@ def test_rebuild_35(self): :avocado: tags=DaosCoreTestRebuild,daos_test,daos_core_test_rebuild,test_rebuild_35 """ self.run_subtest() + + def test_rebuild_36(self): + """Jira ID: DAOS-14013 + + Test Description: + Run daos_test -r -s5 -u subtests=36 + + Use cases: + Core tests for daos_test rebuild + + :avocado: tags=all,daily_regression + :avocado: tags=hw,medium + :avocado: tags=unittest,rebuild + :avocado: tags=DaosCoreTestRebuild,daos_test,daos_core_test_rebuild,test_rebuild_36 + """ + self.run_subtest() diff --git a/src/tests/ftest/daos_test/rebuild.yaml b/src/tests/ftest/daos_test/rebuild.yaml index 9ceb3c02afb..be9285fce58 100644 --- a/src/tests/ftest/daos_test/rebuild.yaml +++ b/src/tests/ftest/daos_test/rebuild.yaml @@ -13,6 +13,7 @@ timeouts: test_rebuild_33: 200 test_rebuild_34: 200 test_rebuild_35: 180 + test_rebuild_36: 200 pool: nvme_size: 0G @@ -81,6 +82,7 @@ daos_tests: test_rebuild_33: DAOS_Rebuild_33 test_rebuild_34: DAOS_Rebuild_34 test_rebuild_35: DAOS_Rebuild_35 + test_rebuild_36: DAOS_Rebuild_36 daos_test: test_rebuild_0to10: r test_rebuild_12to15: r @@ -104,6 +106,7 @@ daos_tests: test_rebuild_33: r test_rebuild_34: r test_rebuild_35: r + test_rebuild_36: r args: test_rebuild_0to10: -s3 -u subtests="0-10" test_rebuild_12to15: -s3 -u subtests="12-15" @@ -127,6 +130,7 @@ daos_tests: test_rebuild_33: -s5 -u subtests="33" test_rebuild_34: -s5 -u subtests="34" test_rebuild_35: -s5 -u subtests="35" + test_rebuild_36: -s5 -u subtests="36" stopped_ranks: test_rebuild_26: ["random"] test_rebuild_27: ["random"] diff --git a/src/tests/ftest/nvme/enospace.py b/src/tests/ftest/nvme/enospace.py index c7a996c110f..d8d5baf3c04 100644 --- a/src/tests/ftest/nvme/enospace.py +++ b/src/tests/ftest/nvme/enospace.py @@ -1,5 +1,6 @@ ''' (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent ''' @@ -33,7 +34,42 @@ def __init__(self, *args, **kwargs): """Initialize a NvmeEnospace object.""" super().__init__(*args, **kwargs) - self.metric_names = ['engine_pool_vos_space_scm_used', 'engine_pool_vos_space_nvme_used'] + self.space_metric_names = [ + 'engine_pool_vos_space_scm_used', + 'engine_pool_vos_space_nvme_used' + ] + self.aggr_metric_names = [ + # -- Merged records -- + "engine_pool_vos_aggregation_merged_size", + "engine_pool_vos_aggregation_merged_recs", + # -- Deleted records -- + "engine_pool_vos_aggregation_deleted_ev", + "engine_pool_vos_aggregation_deleted_sv", + # -- Errors -- + "engine_pool_vos_aggregation_fail_count", + "engine_pool_vos_aggregation_csum_errors", + "engine_pool_vos_aggregation_uncommitted", + "engine_pool_vos_aggregation_agg_blocked", + "engine_pool_vos_aggregation_discard_blocked", + # -- Details stat counter -- + "engine_pool_vos_aggregation_obj_deleted", + "engine_pool_vos_aggregation_obj_scanned", + "engine_pool_vos_aggregation_obj_skipped", + "engine_pool_vos_aggregation_akey_deleted", + "engine_pool_vos_aggregation_akey_scanned", + "engine_pool_vos_aggregation_akey_skipped", + "engine_pool_vos_aggregation_dkey_deleted", + "engine_pool_vos_aggregation_dkey_scanned", + "engine_pool_vos_aggregation_dkey_skipped", + # -- Duration -- + "engine_pool_vos_aggregation_epr_duration", + "engine_pool_vos_aggregation_epr_duration_max", + "engine_pool_vos_aggregation_epr_duration_mean", + "engine_pool_vos_aggregation_epr_duration_min", + "engine_pool_vos_aggregation_epr_duration_stddev" + ] + self.metric_names = self.space_metric_names + self.aggr_metric_names + self.media_names = ['SCM', 'NVMe'] self.expected_errors = [self.DER_NOSPACE, self.DER_TIMEDOUT] @@ -55,26 +91,31 @@ def setUp(self): self.daos_cmd = DaosCommand(self.bin) self.create_pool_max_size() - def get_pool_space_metrics(self, pool_uuid): + def get_pool_space_metrics(self, pool, metrics): """Return the metrics on space usage of a given pool. Args: - pool_uuid (str): Unique id of a pool. + pool (TestPool): target TestPool. + metrics (dict): telemetry metrics. Returns: dict: metrics on space usage. """ - metrics = {} - for hostname, data in self.telemetry.get_metrics(",".join(self.metric_names)).items(): + pool_uuid = pool.uuid + space_metrics = {} + for hostname, data in metrics.items(): for metric_name, entry in data.items(): - if metric_name not in metrics: - metrics[metric_name] = { + if metric_name not in self.space_metric_names: + continue + + if metric_name not in space_metrics: + space_metrics[metric_name] = { "description": entry['description'], "hosts": {} } - hosts = metrics[metric_name]["hosts"] + hosts = space_metrics[metric_name]["hosts"] for metric in entry['metrics']: if metric['labels']['pool'].casefold() != pool_uuid.casefold(): continue @@ -89,11 +130,60 @@ def get_pool_space_metrics(self, pool_uuid): target = metric['labels']['target'] hosts[hostname][rank][target] = metric['value'] - return metrics + return space_metrics + + def get_pool_aggr_metrics(self, pool, metrics): + """Return the metrics on aggregation counters and gauges. + + Args: + pool (TestPool): target TestPool. + metrics (dict): telemetry metrics. + + Returns: + dict: metrics on aggregation. + + """ + pool_uuid = pool.uuid + aggr_metrics = { + "metric_descriptions": {}, + "metric_values": {} + } + for hostname, data in metrics.items(): + if hostname not in aggr_metrics["metric_values"]: + aggr_metrics["metric_values"][hostname] = {} + hosts = aggr_metrics["metric_values"][hostname] + + for metric_name, entry in data.items(): + if metric_name not in self.aggr_metric_names: + continue + + if metric_name not in aggr_metrics["metric_descriptions"]: + aggr_metrics["metric_descriptions"][metric_name] = entry["description"] + + for metric in entry['metrics']: + if metric['labels']['pool'].casefold() != pool_uuid.casefold(): + continue + + rank = metric['labels']['rank'] + if rank not in hosts: + hosts[rank] = {} + ranks = hosts[rank] + + target = metric['labels']['target'] + if target not in ranks: + ranks[target] = {} + targets = ranks[target] + + targets[metric_name] = metric['value'] + + return aggr_metrics def get_pool_usage(self, pool_space): """Get the pool storage used % for SCM and NVMe. + Args: + pool_space (object): space usage information of a pool. + Returns: list: a list of SCM/NVMe pool space usage in %(float) @@ -106,14 +196,55 @@ def get_pool_usage(self, pool_space): return pool_usage - def display_pool_stats(self, pool_space, pool_space_metrics): - """Display statistics on pool usage. + def display_table(self, title, table, align_idx): + """Pretty print table content. + + Args: + title (str): Title of the table. + table (list): Table to print on stdout. + align_idx (int): Last column to left align. + """ + cols_size = [ + max(i) for i in [[len(row[j]) for row in table] for j in range(len(table[0]))]] + line_size = sum(cols_size) + 3 * (len(cols_size) - 1) + + self.log.debug("") + line = f"{' ' + title + ' ':-^{line_size}}" + self.log.debug(line) + + line = "" + for idx, elt in enumerate(table[0]): + line += f"{elt:^{cols_size[idx]}}" + if idx + 1 != len(table[0]): + line += " | " + self.log.debug(line) + + line = "" + for idx, size in enumerate(cols_size): + line += '-' * size + if idx + 1 != len(cols_size): + line += "-+-" + self.log.debug(line) + + for row in table[1:]: + line = "" + for idx, elt in enumerate(row): + align_op = "<" + if idx > align_idx: + align_op = ">" + line += f"{elt:{align_op}{cols_size[idx]}}" + if idx + 1 != len(row): + line += " | " + self.log.debug(line) + + def display_pool_space(self, pool_space, pool_space_metrics): + """Display space usage statistics of a given pool. Args: pool_space (object): space usage information of a pool. pool_space_metrics (dict): dict of metrics on space usage of a pool. """ - + self.log.debug("") title = f"{' Pool Space Usage ':-^80}" self.log.debug(title) @@ -135,34 +266,65 @@ def display_pool_stats(self, pool_space, pool_space_metrics): for metric in pool_space_metrics.values(): table = [["Hostname", "Rank", "Target", "Size"]] - cols_size = [] - for cell in table[0]: - cols_size.append(len(cell)) for hostname, ranks in metric['hosts'].items(): for rank, targets in ranks.items(): for target, size in targets.items(): row = [hostname, rank, target, get_display_size(size)] table.append(row) - for idx, elt in enumerate(cols_size): - cols_size[idx] = max(elt, len(row[idx])) hostname = "" rank = "" - for idx, elt in enumerate(table[0]): - table[0][idx] = f"{elt:^{cols_size[idx]}}" - row = ' | '.join(table[0]) - title = f"{' ' + metric['description'] + ' ':-^{len(row)}}" - self.log.debug("") - self.log.debug(title) - self.log.debug(row) - self.log.debug("-" * len(row)) - for row in table[1:]: - for idx, elt in enumerate(row): - align_op = "<" - if idx + 1 == len(row): - align_op = ">" - row[idx] = f"{elt:{align_op}{cols_size[idx]}}" - self.log.debug(" | ".join(row)) + self.display_table(metric['description'], table, 2) + + def display_pool_aggregation(self, metrics): + """Display record aggregation statistics of a given pool. + + Args: + metrics (dict): dict of metrics on pool aggregation. + """ + table = [["Hostname", "Rank", "Target"]] + for it in self.aggr_metric_names: + table[0].append(metrics["metric_descriptions"][it]) + + for hostname in sorted(metrics["metric_values"]): + row = [hostname] + + for rank in sorted(metrics["metric_values"][hostname]): + if not row: + row = [""] + row.append(rank) + + for target in sorted(metrics["metric_values"][hostname][rank]): + if not row: + row = ["", ""] + row.append(target) + + idx = 3 + for metric_name in self.aggr_metric_names: + value = metrics["metric_values"][hostname][rank][target][metric_name] + if metric_name == "engine_pool_vos_aggregation_merged_size": + row.append(get_display_size(value)) + else: + row.append(str(value)) + idx += 1 + + table.append(row) + row = None + + self.display_table('Pool Aggregation stats', table, 2) + + def display_stats(self): + """Display usage statistics of the tested pool.""" + self.pool.get_info() + metrics = self.telemetry.get_metrics(",".join(self.metric_names)) + + pool_space = self.pool.info.pi_space + pool_space_metrics = self.get_pool_space_metrics(self.pool, metrics) + self.display_pool_space(pool_space, pool_space_metrics) + + pool_aggr_metrics = self.get_pool_aggr_metrics(self.pool, metrics) + self.display_pool_aggregation(pool_aggr_metrics) + self.log.debug("") def verify_enospace_log(self, log_file): """Function checking logs consistency. @@ -207,10 +369,14 @@ def err_to_str(err_no): "Number of errors %s (%s) is > 0: got=%d", err_to_str(error), error, errors_count[error]) - def delete_all_containers(self): - """Delete all the containers.""" + def delete_all_containers(self, pool): + """Delete all the containers of a given pool. + + Args: + pool (TestPool): target TestPool. + """ # List all the container - kwargs = {"pool": self.pool.uuid} + kwargs = {"pool": pool.uuid} data = self.daos_cmd.container_list(**kwargs) containers = [uuid_label["uuid"] for uuid_label in data["response"]] @@ -291,17 +457,22 @@ def run_enospace_foreground(self, log_file): log_file (str): name prefix of the log files to check. """ self.log.info('----Starting main IOR load----') + self.display_stats() # Fill 75% of current SCM free space. Aggregation is Enabled so NVMe space will # start to fill up. self.log.info('--Filling 75% of the current SCM free space--') - self.start_ior_load(storage='SCM', operation="Auto_Write", percent=75) - self.log.info(self.pool.pool_percentage_used()) + try: + self.start_ior_load(storage='SCM', operation="Auto_Write", percent=75) + finally: + self.display_stats() # Fill 50% of current SCM free space. Aggregation is Enabled so NVMe space will # continue to fill up. - self.start_ior_load(storage='SCM', operation="Auto_Write", percent=50) - self.log.info(self.pool.pool_percentage_used()) + try: + self.start_ior_load(storage='SCM', operation="Auto_Write", percent=50) + finally: + self.display_stats() # Fill 60% of current SCM free space. This time, NVMe will be Full so data will # not be moved to NVMe and continue to fill up SCM. SCM will be full and this @@ -314,18 +485,14 @@ def run_enospace_foreground(self, log_file): self.log.info('Test is expected to fail because of DER_NOSPACE') else: self.fail('This test is suppose to FAIL because of DER_NOSPACE but it Passed') - - # Display the pool statistics - self.pool.get_info() - pool_space = self.pool.info.pi_space - pool_space_metrics = self.get_pool_space_metrics(self.pool.uuid) - self.display_pool_stats(pool_space, pool_space_metrics) + finally: + self.display_stats() # verify the DER_NO_SPACE error count is expected and no other Error in client log self.verify_enospace_log(log_file) # Check both NVMe and SCM are full. - pool_usage = self.get_pool_usage(pool_space) + pool_usage = self.get_pool_usage(self.pool.info.pi_space) for idx, elt in enumerate(self.media_names): if pool_usage[idx] >= self.pool_usage_min[idx]: continue @@ -413,7 +580,7 @@ def test_enospace_lazy_with_fg(self): log_file = f"-loop_{_loop}".join(os.path.splitext(self.client_log)) self.run_enospace_foreground(log_file) # Delete all the containers - self.delete_all_containers() + self.delete_all_containers(self.pool) # Delete container will take some time to release the space time.sleep(60) @@ -475,7 +642,7 @@ def test_enospace_time_with_fg(self): log_file = f"-loop_{_loop}".join(os.path.splitext(self.client_log)) self.run_enospace_with_bg_job(log_file) # Delete all the containers - self.delete_all_containers() + self.delete_all_containers(self.pool) # Delete container will take some time to release the space time.sleep(60) @@ -571,7 +738,7 @@ def test_enospace_no_aggregation(self): self.verify_enospace_log(log_file) # Delete all the containers - self.delete_all_containers() + self.delete_all_containers(self.pool) # Wait for the SCM space to be released. (Usage goes below 60%) scm_released = False diff --git a/src/tests/ftest/server/metadata.py b/src/tests/ftest/server/metadata.py index 89ae6af6236..fbeaa1ef047 100644 --- a/src/tests/ftest/server/metadata.py +++ b/src/tests/ftest/server/metadata.py @@ -1,5 +1,6 @@ """ (C) Copyright 2019-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -9,7 +10,7 @@ from avocado.core.exceptions import TestFail from exception_utils import CommandFailure from ior_utils import IorCommand -from job_manager_utils import get_job_manager +from job_manager_utils import get_job_manager, stop_job_manager from thread_manager import ThreadManager @@ -63,7 +64,6 @@ class ObjectMetadata(TestWithServers): def __init__(self, *args, **kwargs): """Initialize a TestWithServers object.""" super().__init__(*args, **kwargs) - self.ior_managers = [] # Minimum number of containers that should be able to be created self.created_containers_min = self.params.get("created_cont_min", "/run/metadata/*") @@ -71,21 +71,6 @@ def __init__(self, *args, **kwargs): # Number of created containers that should not be possible self.created_containers_limit = self.params.get("created_cont_max", "/run/metadata/*") - def pre_tear_down(self): - """Tear down steps to optionally run before tearDown(). - - Returns: - list: a list of error strings to report at the end of tearDown(). - - """ - error_list = [] - if self.ior_managers: - self.test_log.info("Stopping IOR job managers") - error_list = self._stop_managers(self.ior_managers, "IOR job manager") - else: - self.log.debug("no pre-teardown steps defined") - return error_list - def create_pool(self, svc_ops_enabled=True): """Create a pool and display the svc ranks. @@ -284,29 +269,26 @@ def metadata_fillup(self, svc_ops_enabled=True): # Keep track of the number of sequential no space container # create errors. Once the max has been reached stop the loop. if status: + if in_failure: + self.log.info( + "Container: %d - [no space -> available] creation successful after %d" + " sequential 'no space' error(s) ", loop + 1, sequential_fail_counter) + in_failure = False sequential_fail_counter = 0 else: sequential_fail_counter += 1 + if not in_failure: + self.log.info( + "Container: %d - [available -> no space] detected new sequential " + "'no space' error", loop + 1) + in_failure = True + if sequential_fail_counter >= sequential_fail_max: self.log.info( - "Container %d - %d/%d sequential no space " - "container create errors", sequential_fail_counter, - sequential_fail_max, loop) + "Container %d - [no space limit] reached %d/%d sequential 'no space' " + "errors", loop + 1, sequential_fail_counter, sequential_fail_max) break - if status and in_failure: - self.log.info( - "Container: %d - no space -> available " - "transition, sequential no space failures: %d", - loop, sequential_fail_counter) - in_failure = False - elif not status and not in_failure: - self.log.info( - "Container: %d - available -> no space " - "transition, sequential no space failures: %d", - loop, sequential_fail_counter) - in_failure = True - except TestFail as error: self.log.error(str(error)) self.fail("fail (unexpected container create error)") @@ -320,17 +302,17 @@ def metadata_fillup(self, svc_ops_enabled=True): self.created_containers_min) self.fail("Created too few containers") self.log.info( - "Successfully created %d / %d containers)", len(self.container), loop) + "Successfully created %d containers in %d loops)", len(self.container), loop + 1) # Phase 2 clean up some containers (expected to succeed) - msg = "Cleaning up {} containers after pool is full.".format(num_cont_to_destroy) + msg = (f"Cleaning up {num_cont_to_destroy}/{len(self.container)} containers after pool " + "is full.") self.log_step(msg) if not self.destroy_num_containers(num_cont_to_destroy): self.fail("Fail (unexpected container destroy error)") - # Do not destroy containers in teardown (destroy pool while metadata rdb is full) - for container in self.container: - container.skip_cleanup() + # The remaining containers are not directly destroyed in teardown due to + # 'register_cleanup: False' test yaml entry. They are handled by the pool destroy. self.log.info("Leaving pool metadata rdb full (containers will not be destroyed)") self.log.info("Test passed") @@ -469,6 +451,7 @@ def test_metadata_server_restart(self): self.create_pool() files_per_thread = 400 total_ior_threads = 5 + ior_managers = [] processes = self.params.get("slots", "/run/ior/clientslots/*") @@ -487,19 +470,26 @@ def test_metadata_server_restart(self): ior_cmd.flags.value = self.params.get("ior{}flags".format(operation), "/run/ior/*") # Define the job manager for the IOR command - self.ior_managers.append( + ior_managers.append( get_job_manager(self, "Clush", ior_cmd)) - env = ior_cmd.get_default_env(str(self.ior_managers[-1])) - self.ior_managers[-1].assign_hosts(self.hostlist_clients, self.workdir, None) - self.ior_managers[-1].assign_processes(processes) - self.ior_managers[-1].assign_environment(env) - self.ior_managers[-1].verbose = False + env = ior_cmd.get_default_env(str(ior_managers[-1])) + ior_managers[-1].assign_hosts(self.hostlist_clients, self.workdir, None) + ior_managers[-1].assign_processes(processes) + ior_managers[-1].assign_environment(env) + ior_managers[-1].verbose = False + + # Disable cleanup methods for all ior commands. + ior_managers[-1].register_cleanup_method = None # Add a thread for these IOR arguments thread_manager.add( - test=self, manager=self.ior_managers[-1], loops=files_per_thread) + test=self, manager=ior_managers[-1], loops=files_per_thread) self.log.info("Created %s thread %s", operation, index) + # Manually add one cleanup method for all ior threads + if operation == "write": + self.register_cleanup(stop_job_manager, job_manager=ior_managers[0]) + # Launch the IOR threads self.log.info("Launching %d IOR %s threads", thread_manager.qty, operation) failed_thread_count = thread_manager.check_run() diff --git a/src/tests/ftest/server/metadata.yaml b/src/tests/ftest/server/metadata.yaml index 442d4b1a838..7f77b2ae2be 100644 --- a/src/tests/ftest/server/metadata.yaml +++ b/src/tests/ftest/server/metadata.yaml @@ -1,11 +1,13 @@ hosts: test_servers: 4 test_clients: 1 + timeouts: test_metadata_fillup_svc_ops_disabled: 400 test_metadata_fillup_svc_ops_enabled: 400 test_metadata_addremove: 1300 test_metadata_server_restart: 500 + server_config: name: daos_server engines_per_host: 2 @@ -52,9 +54,12 @@ pool: # properties: svc_ops_entry_age:150 # properties: svc_ops_entry_age:300 # properties: svc_ops_entry_age:600 + container: control_method: API silent: true + register_cleanup: False + ior: clientslots: slots: 1 @@ -62,6 +67,7 @@ ior: iorwriteflags: "-w -W -k -G 1" iorreadflags: "-r -R -G 1" dfs_oclass: "SX" + metadata: mean_percent: 1 num_addremove_loops: 4 diff --git a/src/tests/ftest/util/command_utils.py b/src/tests/ftest/util/command_utils.py index dbed7ac3c44..f5d2885f94e 100644 --- a/src/tests/ftest/util/command_utils.py +++ b/src/tests/ftest/util/command_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2018-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -478,7 +479,7 @@ def get_params(self, test): super().get_params(test) for namespace in ['/run/client/*', self.namespace]: if namespace is not None: - self.env.update_from_list(test.params.get("env_vars", namespace, [])) + self.env.update_from_list(test.params.get("env_vars", namespace, None) or []) def _get_new(self): """Get a new object based upon this one. diff --git a/src/tests/suite/daos_rebuild.c b/src/tests/suite/daos_rebuild.c index 20a751504ac..0819841b1ab 100644 --- a/src/tests/suite/daos_rebuild.c +++ b/src/tests/suite/daos_rebuild.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1563,6 +1564,42 @@ rebuild_cont_destroy_and_reintegrate(void **state) reintegrate_single_pool_rank(arg, 5, true); } +static void +rebuild_incr_reint_basic(void **state) +{ + test_arg_t *arg = *state; + daos_obj_id_t oids[OBJ_NR]; + daos_obj_id_t update_oids[OBJ_NR]; + int rc; + int i; + + if (!test_runable(arg, 6)) + return; + + rc = daos_pool_set_prop(arg->pool.pool_uuid, "reintegration", "incremental"); + assert_rc_equal(rc, 0); + for (i = 0; i < OBJ_NR; i++) { + oids[i] = daos_test_oid_gen(arg->coh, DAOS_OC_R3S_SPEC_RANK, 0, 0, arg->myrank); + oids[i] = dts_oid_set_rank(oids[i], 5); + } + + dt_no_punch = true; + rebuild_io(arg, oids, OBJ_NR); + arg->no_rebuild = 0; + rebuild_single_pool_rank(arg, 5, true); + + for (i = 0; i < OBJ_NR; i++) + update_oids[i] = daos_test_oid_gen(arg->coh, OC_RP_3GX, 0, 0, arg->myrank); + rebuild_io(arg, update_oids, OBJ_NR); + + reintegrate_single_pool_rank(arg, 5, true); + rebuild_io_verify(arg, oids, OBJ_NR); + rebuild_io_verify(arg, update_oids, OBJ_NR); + + rc = daos_pool_set_prop(arg->pool.pool_uuid, "reintegration", "data_sync"); + assert_rc_equal(rc, 0); + dt_no_punch = false; +} /** create a new pool/container for each test */ static const struct CMUnitTest rebuild_tests[] = { {"REBUILD0: drop rebuild scan reply", @@ -1655,6 +1692,9 @@ static const struct CMUnitTest rebuild_tests[] = { {"REBUILD35: destroy container then reintegrate", rebuild_cont_destroy_and_reintegrate, rebuild_sub_6nodes_rf1_setup, rebuild_sub_teardown}, + {"REBUILD36: basic incremental reintegration", + rebuild_incr_reint_basic, rebuild_sub_6nodes_rf1_setup, + rebuild_sub_teardown}, }; /* TODO: Enable aggregation once stable view rebuild is done. */ diff --git a/src/tests/suite/daos_rebuild_common.c b/src/tests/suite/daos_rebuild_common.c index 10976c7e834..e72b660afb3 100644 --- a/src/tests/suite/daos_rebuild_common.c +++ b/src/tests/suite/daos_rebuild_common.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -436,6 +437,11 @@ rebuild_io_obj_internal(struct ioreq *req, bool validate, int index) int k; int l; + if (dt_no_punch) { + akey_punch_idx = -1; + dkey_punch_idx = -1; + } + D_ALLOC(large_key, LARGE_KEY_SIZE); if (large_key == NULL) return -DER_NOMEM; diff --git a/src/tests/suite/daos_test.h b/src/tests/suite/daos_test.h index 9e862df8ddb..f220fe34cea 100644 --- a/src/tests/suite/daos_test.h +++ b/src/tests/suite/daos_test.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -49,6 +50,10 @@ /** Server crt group ID */ extern const char *server_group; +/** pool incremental reintegration */ +extern int dt_incr_reint; +extern bool dt_no_punch; + /** Pool service replicas */ extern unsigned int svc_nreplicas; extern const char *dmg_config_file; diff --git a/src/tests/suite/daos_test_common.c b/src/tests/suite/daos_test_common.c index 0831c799159..8c79dc17680 100644 --- a/src/tests/suite/daos_test_common.c +++ b/src/tests/suite/daos_test_common.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2018-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -32,6 +33,10 @@ int dt_obj_class; int dt_redun_lvl; int dt_redun_fac; +/** pool incremental reintegration */ +int dt_incr_reint; +bool dt_no_punch; /* will remove later */ + /* Create or import a single pool with option to store info in arg->pool * or an alternate caller-specified test_pool structure. * ipool (optional): import pool: store info for an existing pool to arg->pool. @@ -349,6 +354,8 @@ test_setup(void **state, unsigned int step, bool multi_rank, daos_prop_t co_props = {0}; struct daos_prop_entry dpp_entry[6] = {0}; struct daos_prop_entry *entry; + daos_prop_t po_props = {0}; + struct daos_prop_entry po_entry[1] = {0}; /* feed a seed for pseudo-random number generator */ gettimeofday(&now, NULL); @@ -395,6 +402,18 @@ test_setup(void **state, unsigned int step, bool multi_rank, arg->pool.destroyed = false; } + /** Look at variables set by test arguments and setup pool props */ + if (dt_incr_reint) { + print_message("\n-------\n" + "Incremental reintegration enabled in test!" + "\n-------\n"); + entry = &po_entry[po_props.dpp_nr]; + entry->dpe_type = DAOS_PROP_PO_REINT_MODE; + entry->dpe_val = DAOS_REINT_MODE_INCREMENTAL; + + po_props.dpp_nr++; + } + /** Look at variables set by test arguments and setup container props */ if (dt_csum_type) { print_message("\n-------\n" @@ -445,11 +464,13 @@ test_setup(void **state, unsigned int step, bool multi_rank, co_props.dpp_nr++; } + if (po_props.dpp_nr > 0) + po_props.dpp_entries = po_entry; if (co_props.dpp_nr > 0) co_props.dpp_entries = dpp_entry; while (!rc && step != arg->setup_state) - rc = test_setup_next_step(state, pool, NULL, &co_props); + rc = test_setup_next_step(state, pool, &po_props, &co_props); if (rc) { D_FREE(arg); diff --git a/src/tests/suite/dfs_unit_test.c b/src/tests/suite/dfs_unit_test.c index a5feb6c3ca5..2f1388bfc05 100644 --- a/src/tests/suite/dfs_unit_test.c +++ b/src/tests/suite/dfs_unit_test.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -7,6 +8,7 @@ #include "dfs_test.h" #include +#include #include #include #include @@ -871,29 +873,50 @@ dfs_test_io_error_code(void **state) test_arg_t *arg = *state; dfs_obj_t *file; daos_event_t ev, *evp; - daos_range_t iod_rgs; + daos_range_t iod_rg; + daos_range_t *iod_rgs; dfs_iod_t iod; d_sg_list_t sgl; d_iov_t iov; - char buf[10]; + char *buf; daos_size_t read_size; + int i; int rc; if (arg->myrank != 0) return; + D_ALLOC_ARRAY(iod_rgs, DAOS_ARRAY_LIST_IO_LIMIT + 1); + D_ALLOC_ARRAY(buf, DAOS_ARRAY_LIST_IO_LIMIT + 1); + rc = dfs_open(dfs_mt, NULL, "io_error", S_IFREG | S_IWUSR | S_IRUSR, O_RDWR | O_CREAT, 0, 0, NULL, &file); assert_int_equal(rc, 0); + /** set an IOD with a large nr count that is not supported */ + iod.iod_nr = DAOS_ARRAY_LIST_IO_LIMIT + 1; + for (i = 0; i < DAOS_ARRAY_LIST_IO_LIMIT + 1; i++) { + iod_rgs[i].rg_idx = i + 2; + iod_rgs[i].rg_len = 1; + } + iod.iod_rgs = iod_rgs; + d_iov_set(&iov, buf, DAOS_ARRAY_LIST_IO_LIMIT + 1); + sgl.sg_nr = 1; + sgl.sg_nr_out = 1; + sgl.sg_iovs = &iov; + rc = dfs_writex(dfs_mt, file, &iod, &sgl, NULL); + assert_int_equal(rc, ENOTSUP); + rc = dfs_readx(dfs_mt, file, &iod, &sgl, &read_size, NULL); + assert_int_equal(rc, ENOTSUP); + /* * set an IOD that has writes more data than sgl to trigger error in * array layer. */ iod.iod_nr = 1; - iod_rgs.rg_idx = 0; - iod_rgs.rg_len = 10; - iod.iod_rgs = &iod_rgs; + iod_rg.rg_idx = 0; + iod_rg.rg_len = 10; + iod.iod_rgs = &iod_rg; d_iov_set(&iov, buf, 5); sgl.sg_nr = 1; sgl.sg_nr_out = 1; @@ -942,6 +965,8 @@ dfs_test_io_error_code(void **state) assert_int_equal(rc, 0); rc = dfs_remove(dfs_mt, NULL, "io_error", 0, NULL); assert_int_equal(rc, 0); + D_FREE(buf); + D_FREE(iod_rgs); } int dfs_test_rc[DFS_TEST_MAX_THREAD_NR]; diff --git a/src/utils/ddb/ddb.c b/src/utils/ddb/ddb.c index 8493852f24f..07b96aa031f 100644 --- a/src/utils/ddb/ddb.c +++ b/src/utils/ddb/ddb.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -37,6 +38,7 @@ #define COMMAND_NAME_DTX_ACT_ABORT "dtx_act_abort" #define COMMAND_NAME_FEATURE "feature" #define COMMAND_NAME_RM_POOL "rm_pool" +#define COMMAND_NAME_DTX_ACT_DISCARD_INVALID "dtx_act_discard_invalid" /* Parse command line options for the 'ls' command */ static int @@ -542,55 +544,13 @@ vea_update_option_parse(struct ddb_ctx *ctx, struct vea_update_options *cmd_args return 0; } -/* Parse command line options for the 'dtx_act_commit' command */ -static int -dtx_act_commit_option_parse(struct ddb_ctx *ctx, struct dtx_act_commit_options *cmd_args, - uint32_t argc, char **argv) -{ - char *options_short = ""; - int index = 0; - struct option options_long[] = { - { NULL } - }; - - memset(cmd_args, 0, sizeof(*cmd_args)); - - /* Restart getopt */ - optind = 1; - opterr = 0; - if (getopt_long(argc, argv, options_short, options_long, &index) != -1) { - ddb_printf(ctx, "Unknown option: '%c'\n", optopt); - return -DER_INVAL; - } - - index = optind; - if (argc - index > 0) { - cmd_args->path = argv[index]; - index++; - } else { - ddb_print(ctx, "Expected argument 'path'\n"); - return -DER_INVAL; - } - if (argc - index > 0) { - cmd_args->dtx_id = argv[index]; - index++; - } else { - ddb_print(ctx, "Expected argument 'dtx_id'\n"); - return -DER_INVAL; - } - - if (argc - index > 0) { - ddb_printf(ctx, "Unexpected argument: %s\n", argv[index]); - return -DER_INVAL; - } - - return 0; -} - -/* Parse command line options for the 'dtx_act_abort' command */ +/** + * Parse command line options for the 'dtx_act_commit', 'dtx_act_abort', and 'dtx_act_abort' + * commands. + */ static int -dtx_act_abort_option_parse(struct ddb_ctx *ctx, struct dtx_act_abort_options *cmd_args, - uint32_t argc, char **argv) +dtx_act_option_parse(struct ddb_ctx *ctx, struct dtx_act_options *cmd_args, uint32_t argc, + char **argv) { char *options_short = ""; int index = 0; @@ -854,13 +814,15 @@ ddb_parse_cmd_args(struct ddb_ctx *ctx, uint32_t argc, char **argv, struct ddb_c } if (same(cmd, COMMAND_NAME_DTX_ACT_COMMIT)) { info->dci_cmd = DDB_CMD_DTX_ACT_COMMIT; - return dtx_act_commit_option_parse(ctx, &info->dci_cmd_option.dci_dtx_act_commit, - argc, argv); + return dtx_act_option_parse(ctx, &info->dci_cmd_option.dci_dtx_act, argc, argv); } if (same(cmd, COMMAND_NAME_DTX_ACT_ABORT)) { info->dci_cmd = DDB_CMD_DTX_ACT_ABORT; - return dtx_act_abort_option_parse(ctx, &info->dci_cmd_option.dci_dtx_act_abort, - argc, argv); + return dtx_act_option_parse(ctx, &info->dci_cmd_option.dci_dtx_act, argc, argv); + } + if (same(cmd, COMMAND_NAME_DTX_ACT_DISCARD_INVALID)) { + info->dci_cmd = DDB_CMD_DTX_ACT_DISCARD_INVALID; + return dtx_act_option_parse(ctx, &info->dci_cmd_option.dci_dtx_act, argc, argv); } if (same(cmd, COMMAND_NAME_RM_POOL)) { info->dci_cmd = DDB_CMD_RM_POOL; @@ -1043,11 +1005,15 @@ ddb_run_cmd(struct ddb_ctx *ctx, const char *cmd_str) break; case DDB_CMD_DTX_ACT_COMMIT: - rc = ddb_run_dtx_act_commit(ctx, &info.dci_cmd_option.dci_dtx_act_commit); + rc = ddb_run_dtx_act_commit(ctx, &info.dci_cmd_option.dci_dtx_act); break; case DDB_CMD_DTX_ACT_ABORT: - rc = ddb_run_dtx_act_abort(ctx, &info.dci_cmd_option.dci_dtx_act_abort); + rc = ddb_run_dtx_act_abort(ctx, &info.dci_cmd_option.dci_dtx_act); + break; + + case DDB_CMD_DTX_ACT_DISCARD_INVALID: + rc = ddb_run_dtx_act_discard_invalid(ctx, &info.dci_cmd_option.dci_dtx_act); break; case DDB_CMD_FEATURE: diff --git a/src/utils/ddb/ddb.h b/src/utils/ddb/ddb.h index a82bb292239..df8cf699d5d 100644 --- a/src/utils/ddb/ddb.h +++ b/src/utils/ddb/ddb.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -94,29 +95,30 @@ int ddb_init(void); void ddb_fini(void); enum ddb_cmd { - DDB_CMD_UNKNOWN = 0, - DDB_CMD_HELP = 1, - DDB_CMD_QUIT = 2, - DDB_CMD_LS = 3, - DDB_CMD_OPEN = 4, - DDB_CMD_VERSION = 5, - DDB_CMD_CLOSE = 6, - DDB_CMD_SUPERBLOCK_DUMP = 7, - DDB_CMD_VALUE_DUMP = 8, - DDB_CMD_RM = 9, - DDB_CMD_VALUE_LOAD = 10, - DDB_CMD_ILOG_DUMP = 11, - DDB_CMD_ILOG_COMMIT = 12, - DDB_CMD_ILOG_CLEAR = 13, - DDB_CMD_DTX_DUMP = 14, - DDB_CMD_DTX_CMT_CLEAR = 15, - DDB_CMD_SMD_SYNC = 16, - DDB_CMD_VEA_DUMP = 17, - DDB_CMD_VEA_UPDATE = 18, - DDB_CMD_DTX_ACT_COMMIT = 19, - DDB_CMD_DTX_ACT_ABORT = 20, - DDB_CMD_FEATURE = 21, - DDB_CMD_RM_POOL = 22, + DDB_CMD_UNKNOWN = 0, + DDB_CMD_HELP = 1, + DDB_CMD_QUIT = 2, + DDB_CMD_LS = 3, + DDB_CMD_OPEN = 4, + DDB_CMD_VERSION = 5, + DDB_CMD_CLOSE = 6, + DDB_CMD_SUPERBLOCK_DUMP = 7, + DDB_CMD_VALUE_DUMP = 8, + DDB_CMD_RM = 9, + DDB_CMD_VALUE_LOAD = 10, + DDB_CMD_ILOG_DUMP = 11, + DDB_CMD_ILOG_COMMIT = 12, + DDB_CMD_ILOG_CLEAR = 13, + DDB_CMD_DTX_DUMP = 14, + DDB_CMD_DTX_CMT_CLEAR = 15, + DDB_CMD_SMD_SYNC = 16, + DDB_CMD_VEA_DUMP = 17, + DDB_CMD_VEA_UPDATE = 18, + DDB_CMD_DTX_ACT_COMMIT = 19, + DDB_CMD_DTX_ACT_ABORT = 20, + DDB_CMD_FEATURE = 21, + DDB_CMD_RM_POOL = 22, + DDB_CMD_DTX_ACT_DISCARD_INVALID = 23, }; /* option and argument structures for commands that need them */ @@ -177,12 +179,7 @@ struct vea_update_options { char *blk_cnt; }; -struct dtx_act_commit_options { - char *path; - char *dtx_id; -}; - -struct dtx_act_abort_options { +struct dtx_act_options { char *path; char *dtx_id; }; @@ -214,11 +211,10 @@ struct ddb_cmd_info { struct dtx_dump_options dci_dtx_dump; struct dtx_cmt_clear_options dci_dtx_cmt_clear; struct smd_sync_options dci_smd_sync; - struct vea_update_options dci_vea_update; - struct dtx_act_commit_options dci_dtx_act_commit; - struct dtx_act_abort_options dci_dtx_act_abort; + struct vea_update_options dci_vea_update; struct feature_options dci_feature; struct rm_pool_options dci_rm_pool; + struct dtx_act_options dci_dtx_act; } dci_cmd_option; }; @@ -247,15 +243,19 @@ int ddb_run_dtx_cmt_clear(struct ddb_ctx *ctx, struct dtx_cmt_clear_options *opt int ddb_run_smd_sync(struct ddb_ctx *ctx, struct smd_sync_options *opt); int ddb_run_vea_dump(struct ddb_ctx *ctx); int ddb_run_vea_update(struct ddb_ctx *ctx, struct vea_update_options *opt); -int ddb_run_dtx_act_commit(struct ddb_ctx *ctx, struct dtx_act_commit_options *opt); -int ddb_run_dtx_act_abort(struct ddb_ctx *ctx, struct dtx_act_abort_options *opt); +int +ddb_run_dtx_act_commit(struct ddb_ctx *ctx, struct dtx_act_options *opt); +int +ddb_run_dtx_act_abort(struct ddb_ctx *ctx, struct dtx_act_options *opt); int ddb_run_feature(struct ddb_ctx *ctx, struct feature_options *opt); int ddb_feature_string2flags(struct ddb_ctx *ctx, const char *string, uint64_t *compat_flags, uint64_t *incompat_flags); int - ddb_run_rm_pool(struct ddb_ctx *ctx, struct rm_pool_options *opt); +ddb_run_rm_pool(struct ddb_ctx *ctx, struct rm_pool_options *opt); +int + ddb_run_dtx_act_discard_invalid(struct ddb_ctx *ctx, struct dtx_act_options *opt); void ddb_program_help(struct ddb_ctx *ctx); void ddb_commands_help(struct ddb_ctx *ctx); diff --git a/src/utils/ddb/ddb_commands.c b/src/utils/ddb/ddb_commands.c index 3dd2261f504..fca43b71f57 100644 --- a/src/utils/ddb/ddb_commands.c +++ b/src/utils/ddb/ddb_commands.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -886,6 +887,7 @@ ddb_run_vea_update(struct ddb_ctx *ctx, struct vea_update_options *opt) struct dtx_modify_args { struct dv_indexed_tree_path itp; struct dtx_id dti; + bool dti_all; daos_handle_t coh; }; @@ -914,10 +916,12 @@ dtx_modify_init(struct ddb_ctx *ctx, char *path, char *dtx_id_str, struct dtx_mo D_GOTO(error, rc); } - rc = ddb_parse_dtx_id(dtx_id_str, &args->dti); - if (!SUCCESS(rc)) { - ddb_errorf(ctx, "Invalid dtx_id: %s\n", dtx_id_str); - D_GOTO(error, rc); + if (!args->dti_all) { + rc = ddb_parse_dtx_id(dtx_id_str, &args->dti); + if (!SUCCESS(rc)) { + ddb_errorf(ctx, "Invalid dtx_id: %s\n", dtx_id_str); + D_GOTO(error, rc); + } } return 0; @@ -935,7 +939,7 @@ dtx_modify_fini(struct dtx_modify_args *args) } int -ddb_run_dtx_act_commit(struct ddb_ctx *ctx, struct dtx_act_commit_options *opt) +ddb_run_dtx_act_commit(struct ddb_ctx *ctx, struct dtx_act_options *opt) { struct dtx_modify_args args = {0}; int rc; @@ -964,7 +968,8 @@ ddb_run_dtx_act_commit(struct ddb_ctx *ctx, struct dtx_act_commit_options *opt) return rc; } -int ddb_run_dtx_act_abort(struct ddb_ctx *ctx, struct dtx_act_abort_options *opt) +int +ddb_run_dtx_act_abort(struct ddb_ctx *ctx, struct dtx_act_options *opt) { struct dtx_modify_args args = {0}; int rc; @@ -1075,3 +1080,69 @@ ddb_run_rm_pool(struct ddb_ctx *ctx, struct rm_pool_options *opt) return dv_pool_destroy(opt->path); } + +#define DTI_ALL "all" + +struct dtx_active_entry_discard_invalid_cb_arg { + struct ddb_ctx *ctx; + struct dtx_modify_args *args; +}; + +static int +dtx_active_entry_discard_invalid(struct dv_dtx_active_entry *entry, void *cb_arg) +{ + struct dtx_active_entry_discard_invalid_cb_arg *bundle = cb_arg; + struct ddb_ctx *ctx = bundle->ctx; + struct dtx_modify_args *args = bundle->args; + int discarded = 0; + int rc; + + ddb_printf(ctx, "ID: " DF_DTIF "\n", DP_DTI(&entry->ddtx_id)); + + rc = dv_dtx_active_entry_discard_invalid(args->coh, &entry->ddtx_id, &discarded); + if (SUCCESS(rc)) { + ddb_printf(ctx, "Entry's record(s) discarded: %d\n", discarded); + } else if (rc == -DER_NONEXIST) { + ddb_print(ctx, "No entry found\n"); + rc = 0; + } else { + ddb_errorf(ctx, "Error: " DF_RC "\n", DP_RC(rc)); + } + + return 0; +} + +int +ddb_run_dtx_act_discard_invalid(struct ddb_ctx *ctx, struct dtx_act_options *opt) +{ + struct dtx_modify_args args = {0}; + struct dtx_active_entry_discard_invalid_cb_arg bundle = {.ctx = ctx, .args = &args}; + int rc; + + if (!ctx->dc_write_mode) { + ddb_error(ctx, error_msg_write_mode_only); + return -DER_INVAL; + } + + if (opt->dtx_id != NULL && strcmp(opt->dtx_id, DTI_ALL) == 0) { + args.dti_all = true; + } + + rc = dtx_modify_init(ctx, opt->path, opt->dtx_id, &args); + if (!SUCCESS(rc)) { + return rc; + } + + if (args.dti_all) { + rc = dv_dtx_get_act_table(args.coh, dtx_active_entry_discard_invalid, &bundle); + if (!SUCCESS(rc)) { + return rc; + } + } else { + struct dv_dtx_active_entry entry = {.ddtx_id = args.dti}; + dtx_active_entry_discard_invalid(&entry, &bundle); + } + + dtx_modify_fini(&args); + return rc; +} diff --git a/src/utils/ddb/ddb_vos.c b/src/utils/ddb/ddb_vos.c index fe36ceffed7..ecf6ba5ba3d 100644 --- a/src/utils/ddb/ddb_vos.c +++ b/src/utils/ddb/ddb_vos.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1424,6 +1425,12 @@ dv_dtx_abort_active_entry(daos_handle_t coh, struct dtx_id *dti) return vos_dtx_abort(coh, dti, DAOS_EPOCH_MAX); } +int +dv_dtx_active_entry_discard_invalid(daos_handle_t coh, struct dtx_id *dti, int *discarded) +{ + return vos_dtx_discard_invalid(coh, dti, discarded); +} + int dv_delete(daos_handle_t poh, struct dv_tree_path *vtp) { diff --git a/src/utils/ddb/ddb_vos.h b/src/utils/ddb/ddb_vos.h index e4a2ad26992..790d62f431b 100644 --- a/src/utils/ddb/ddb_vos.h +++ b/src/utils/ddb/ddb_vos.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -190,6 +191,8 @@ int dv_dtx_get_act_table(daos_handle_t coh, dv_dtx_act_handler handler_cb, void int dv_dtx_clear_cmt_table(daos_handle_t coh); int dv_dtx_commit_active_entry(daos_handle_t coh, struct dtx_id *dti); int dv_dtx_abort_active_entry(daos_handle_t coh, struct dtx_id *dti); +int +dv_dtx_active_entry_discard_invalid(daos_handle_t coh, struct dtx_id *dti, int *discarded); /* Sync the smd table with information saved in blobs */ typedef int (*dv_smd_sync_complete)(void *cb_args, uuid_t pool_id, uint32_t vos_id, diff --git a/src/utils/ddb/tests/SConscript b/src/utils/ddb/tests/SConscript index f502847a597..14fa7f0cff5 100644 --- a/src/utils/ddb/tests/SConscript +++ b/src/utils/ddb/tests/SConscript @@ -26,28 +26,44 @@ def scons(): libs = ['vos', 'daos_common_pmem', 'abt', 'gurt', 'uuid', 'bio', 'cart', 'cmocka', 'ddb'] # spdk libraries - libs += ['spdk_event', 'spdk_log'] - libs += ['spdk_bdev', 'spdk_blob', 'spdk_blob_bdev', 'spdk_json'] - libs += ['spdk_nvme', 'spdk_init', 'spdk_thread', 'spdk_log'] - libs += ['spdk_env_dpdk', 'spdk_thread', 'spdk_bdev', 'rte_mempool'] - libs += ['rte_mempool_ring', 'rte_bus_pci', 'rte_pci', 'rte_ring'] - libs += ['rte_mbuf', 'rte_eal', 'rte_kvargs', 'spdk_bdev_aio'] - libs += ['spdk_bdev_nvme', 'spdk_blob', 'spdk_nvme', 'spdk_util'] - libs += ['spdk_json', 'spdk_jsonrpc', 'spdk_rpc', 'spdk_trace'] - libs += ['spdk_sock', 'spdk_log', 'spdk_notify', 'spdk_blob_bdev'] - libs += ['spdk_vmd', 'spdk_event_bdev', 'spdk_init', 'rte_power'] - src = ['ddb_cmd_options_tests.c', - 'ddb_commands_tests.c', - 'ddb_main_tests.c', - 'ddb_parse_tests.c', - 'ddb_path_tests.c', - 'ddb_test_driver.c', - 'ddb_vos_tests.c', - 'ddb_commands_print_tests.c'] - ddb_tests = denv.d_program('ddb_tests', [src], LIBS=libs) + spdk_libs = ['rte_bus_pci', 'rte_eal', 'rte_kvargs', 'rte_mbuf', 'rte_mempool_ring', + 'rte_mempool', 'rte_pci', 'rte_power', 'rte_ring', 'spdk_bdev_aio', + 'spdk_bdev_nvme', 'spdk_bdev', 'spdk_bdev', 'spdk_blob_bdev', 'spdk_blob_bdev', + 'spdk_blob', 'spdk_blob', 'spdk_env_dpdk', 'spdk_event_bdev', 'spdk_event', + 'spdk_init', 'spdk_init', 'spdk_json', 'spdk_json', 'spdk_jsonrpc', 'spdk_log', + 'spdk_log', 'spdk_log', 'spdk_notify', 'spdk_nvme', 'spdk_nvme', 'spdk_rpc', + 'spdk_sock', 'spdk_thread', 'spdk_thread', 'spdk_trace', 'spdk_util', 'spdk_vmd'] + src = ['ddb_cmd_options_tests.c', 'ddb_commands_print_tests.c', 'ddb_commands_tests.c', + 'ddb_main_tests.c', 'ddb_parse_tests.c', 'ddb_path_tests.c', 'ddb_test_driver.c', + 'ddb_vos_tests.c'] + ddb_tests = denv.d_program('ddb_tests', [src], LIBS=libs + spdk_libs) denv.Install('$PREFIX/bin/', ddb_tests) + # Build unit tests + denv = env.Clone() + prereqs.require(denv, 'argobots', 'spdk') + libs = ['uuid', 'daos_common_pmem', 'gurt', 'vea', 'abt', 'bio', 'cmocka', 'pthread'] + denv.AppendUnique(RPATH_FULL=['$PREFIX/lib64/daos_srv']) + denv.AppendUnique(CPPPATH=[Dir('../').srcnode()]) + denv.AppendUnique(CPPPATH=[Dir('../../../vos/').srcnode()]) + denv.AppendUnique(CPPPATH=[Dir('../../../bio/').srcnode()]) + # Required for dtx_act_discard_invalid tests. + # This function is validated by its respective unit tests. + denv.AppendUnique(LINKFLAGS=['-Wl,--wrap=vos_dtx_discard_invalid']) + + denv.Append(CPPDEFINES=['-DDAOS_PMEM_BUILD']) + vos_src = Glob('../../../vos/*.c') + + mock_src = Glob('../../../dtx/tests/*_mock.c') + # Pylint does not like the exclude keyword below, but it works. + # pylint: disable=unexpected-keyword-arg + ddb_src = Glob('../*.c', exclude=['../ddb_entry.c']) + src = ['ddb_ut.c', 'ddb_vos_ut.c'] + ddb_ut = denv.d_program('ddb_ut', src + vos_src + mock_src + ddb_src, LIBS=libs + spdk_libs) + + denv.Install('$PREFIX/bin/', ddb_ut) + if __name__ == "SCons.Script": scons() diff --git a/src/utils/ddb/tests/ddb_cmd_options_tests.c b/src/utils/ddb/tests/ddb_cmd_options_tests.c index 643815bb629..9481748af0a 100644 --- a/src/utils/ddb/tests/ddb_cmd_options_tests.c +++ b/src/utils/ddb/tests/ddb_cmd_options_tests.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -268,7 +269,7 @@ static void dtx_act_commit_options_parsing(void **state) { struct ddb_cmd_info info = {0}; - struct dtx_act_commit_options *options = &info.dci_cmd_option.dci_dtx_act_commit; + struct dtx_act_options *options = &info.dci_cmd_option.dci_dtx_act; /* test invalid arguments and options */ test_run_inval_cmd("dtx_act_commit", "path", "dtx_id", "extra"); /* too many argument */ @@ -284,7 +285,7 @@ static void dtx_act_abort_options_parsing(void **state) { struct ddb_cmd_info info = {0}; - struct dtx_act_abort_options *options = &info.dci_cmd_option.dci_dtx_act_abort; + struct dtx_act_options *options = &info.dci_cmd_option.dci_dtx_act; /* test invalid arguments and options */ test_run_inval_cmd("dtx_act_abort", "path", "dtx_id", "extra"); /* too many argument */ diff --git a/src/utils/ddb/tests/ddb_commands_tests.c b/src/utils/ddb/tests/ddb_commands_tests.c index ee3b8175d6a..b0fdbf5ad89 100644 --- a/src/utils/ddb/tests/ddb_commands_tests.c +++ b/src/utils/ddb/tests/ddb_commands_tests.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2022-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -339,7 +340,7 @@ clear_cmt_dtx_cmd_tests(void **state) static void dtx_commit_entry_tests(void **state) { - struct dtx_act_commit_options opt = {0}; + struct dtx_act_options opt = {0}; assert_invalid(ddb_run_dtx_act_commit(&g_ctx, &opt)); opt.path = "[0]/[0]"; @@ -352,7 +353,7 @@ dtx_commit_entry_tests(void **state) static void dtx_abort_entry_tests(void **state) { - struct dtx_act_abort_options opt = {0}; + struct dtx_act_options opt = {0}; assert_invalid(ddb_run_dtx_act_abort(&g_ctx, &opt)); @@ -362,6 +363,27 @@ dtx_abort_entry_tests(void **state) assert_success(ddb_run_dtx_act_abort(&g_ctx, &opt)); } +static void +dtx_act_discard_invalid_tests(void **state) +{ + struct dtx_act_options opt = {0}; + + g_ctx.dc_write_mode = false; + assert_invalid(ddb_run_dtx_act_discard_invalid(&g_ctx, &opt)); + + g_ctx.dc_write_mode = true; + assert_invalid(ddb_run_dtx_act_discard_invalid(&g_ctx, &opt)); + + opt.path = "[0]/[0]"; + assert_invalid(ddb_run_dtx_act_discard_invalid(&g_ctx, &opt)); + + opt.dtx_id = "12345678-1234-1234-1234-123456789012.1234"; + assert_success(ddb_run_dtx_act_discard_invalid(&g_ctx, &opt)); + + opt.dtx_id = "all"; + assert_success(ddb_run_dtx_act_discard_invalid(&g_ctx, &opt)); +} + static void feature_cmd_tests(void **state) { @@ -431,6 +453,7 @@ ddb_commands_tests_run() TEST(process_ilog_cmd_tests), TEST(clear_cmt_dtx_cmd_tests), TEST(dtx_commit_entry_tests), + TEST(dtx_act_discard_invalid_tests), TEST(dtx_abort_entry_tests), TEST(feature_cmd_tests), }; diff --git a/src/utils/ddb/tests/ddb_ut.c b/src/utils/ddb/tests/ddb_ut.c new file mode 100644 index 00000000000..3ea69695d55 --- /dev/null +++ b/src/utils/ddb/tests/ddb_ut.c @@ -0,0 +1,104 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ddb.h" + +int +ddb_vos_tests_run(void); + +struct ddb_test_driver_arguments { + bool dtda_create_vos_file; +}; + +static int +ddb_test_driver_arguments_parse(uint32_t argc, char **argv, struct ddb_test_driver_arguments *args) +{ + struct option program_options[] = {{"create_vos", optional_argument, NULL, 'c'}, {NULL}}; + int index = 0, opt; + + memset(args, 0, sizeof(*args)); + + optind = 1; + opterr = 0; + while ((opt = getopt_long(argc, argv, "c", program_options, &index)) != -1) { + switch (opt) { + case 'c': + args->dtda_create_vos_file = true; + break; + case '?': + printf("'%c' is unknown\n", optopt); + return -DER_INVAL; + default: + return -DER_INVAL; + } + } + + return 0; +} + +static bool +char_in_tests(char a, char *str, uint32_t str_len) +{ + int i; + + if (strlen(str) == 0) /* if there is no filter, always return true */ + return true; + for (i = 0; i < str_len; i++) { + if (a == str[i]) + return true; + } + + return false; +} + +/* + * ----------------------------------------------- + * Execute + * ----------------------------------------------- + */ +int +main(int argc, char *argv[]) +{ + struct ddb_test_driver_arguments args = {0}; + int rc; + + rc = ddb_init(); + if (rc != 0) + return -rc; + + ddb_test_driver_arguments_parse(argc, argv, &args); + + assert_false(args.dtda_create_vos_file); + +#define RUN_TEST_SUIT(c, func) \ + do { \ + if (char_in_tests(c, test_suites, ARRAY_SIZE(test_suites))) \ + rc += func(); \ + } while (0) + + /* filtering suites and tests */ + char test_suites[] = ""; +#if CMOCKA_FILTER_SUPPORTED == 1 /** requires cmocka 1.1.5 */ + cmocka_set_test_filter("*dtx_act_discard_invalid*"); +#endif + RUN_TEST_SUIT('c', ddb_vos_tests_run); + + ddb_fini(); + if (rc > 0) + printf("%d test(s) failed!\n", rc); + else + printf("All tests successful!\n"); + return rc; +} diff --git a/src/utils/ddb/tests/ddb_vos_ut.c b/src/utils/ddb/tests/ddb_vos_ut.c new file mode 100644 index 00000000000..a4f943bd6ad --- /dev/null +++ b/src/utils/ddb/tests/ddb_vos_ut.c @@ -0,0 +1,57 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#include +#include +#include +#include +#include "ddb_vos.h" + +#define COH_COOKIE 0x1515 +#define DTX_ID_PTR ((struct dtx_id *)0x6367) +#define DISCARDED_PTR ((int *)0x9303) + +int +__wrap_vos_dtx_discard_invalid(daos_handle_t coh, struct dtx_id *dti, int *discarded) +{ + assert_int_equal(coh.cookie, COH_COOKIE); + assert_ptr_equal(dti, DTX_ID_PTR); + assert_ptr_equal(discarded, DISCARDED_PTR); + + return mock(); +} + +#define SOME_ERROR (-DER_BAD_CERT) + +static void +dtx_act_discard_invalid_test(void **state) +{ + daos_handle_t coh = {.cookie = COH_COOKIE}; + int rc; + + will_return(__wrap_vos_dtx_discard_invalid, SOME_ERROR); + rc = dv_dtx_active_entry_discard_invalid(coh, DTX_ID_PTR, DISCARDED_PTR); + assert_int_equal(rc, SOME_ERROR); + + will_return(__wrap_vos_dtx_discard_invalid, 0); + rc = dv_dtx_active_entry_discard_invalid(coh, DTX_ID_PTR, DISCARDED_PTR); + assert_int_equal(rc, 0); +} + +#define TEST(x) \ + { \ + #x, x##_test, NULL, NULL \ + } + +const struct CMUnitTest dv_test_cases[] = { + TEST(dtx_act_discard_invalid), +}; + +int +ddb_vos_tests_run() +{ + return cmocka_run_group_tests_name("DDB VOS Interface Unit Tests", dv_test_cases, NULL, + NULL); +} diff --git a/src/vos/evtree.c b/src/vos/evtree.c index 59f8855c3c1..20b803192bd 100644 --- a/src/vos/evtree.c +++ b/src/vos/evtree.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -4088,3 +4089,12 @@ evt_feats_set(struct evt_root *root, struct umem_instance *umm, uint64_t feats) return rc; } +bool +evt_desc_is_valid(const struct evt_desc *evt, uint32_t dtx_lid) +{ + if (evt == NULL || evt->dc_magic != EVT_DESC_MAGIC) { + return false; + } + + return (evt->dc_dtx == dtx_lid); +} diff --git a/src/vos/ilog.c b/src/vos/ilog.c index 1d1d6508087..58e48dffd83 100644 --- a/src/vos/ilog.c +++ b/src/vos/ilog.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -17,8 +18,7 @@ #include "vos_layout.h" #include "vos_ts.h" #include "ilog.h" - -#define ILOG_TREE_ORDER 11 +#include "ilog_internal.h" enum { ILOG_ITER_NONE, @@ -27,27 +27,6 @@ enum { ILOG_ITER_FINI, }; -/** The ilog is split into two parts. If there is one entry, the ilog - * is embedded into the root df struct. If not, a b+tree is used. - * The tree is used more like a set where only the key is used. - */ - -struct ilog_tree { - umem_off_t it_root; - uint64_t it_embedded; -}; - -struct ilog_array { - /** Current length of array */ - uint32_t ia_len; - /** Allocated length of array */ - uint32_t ia_max_len; - /** Pad to 16 bytes */ - uint64_t ia_pad; - /** Entries in array */ - struct ilog_id ia_id[0]; -}; - struct ilog_array_cache { /** Pointer to entries */ struct ilog_id *ac_entries; @@ -57,15 +36,6 @@ struct ilog_array_cache { uint32_t ac_nr; }; -struct ilog_root { - union { - struct ilog_id lr_id; - struct ilog_tree lr_tree; - }; - uint32_t lr_ts_idx; - uint32_t lr_magic; -}; - struct ilog_context { /** Root pointer */ struct ilog_root *ic_root; @@ -193,14 +163,6 @@ ilog_init(void) return 0; } -/* 4 bit magic number + version */ -#define ILOG_MAGIC 0x00000006 -#define ILOG_MAGIC_BITS 4 -#define ILOG_MAGIC_MASK ((1 << ILOG_MAGIC_BITS) - 1) -#define ILOG_VERSION_INC (1 << ILOG_MAGIC_BITS) -#define ILOG_VERSION_MASK ~(ILOG_VERSION_INC - 1) -#define ILOG_MAGIC_VALID(magic) (((magic) & ILOG_MAGIC_MASK) == ILOG_MAGIC) - static inline uint32_t ilog_mag2ver(uint32_t magic) { if (!ILOG_MAGIC_VALID(magic)) @@ -278,13 +240,6 @@ ilog_tx_end(struct ilog_context *lctx, int rc) return umem_tx_end(lctx->ic_umm, rc); } -static inline bool -ilog_empty(struct ilog_root *root) -{ - return !root->lr_tree.it_embedded && - root->lr_tree.it_root == UMOFF_NULL; -} - static void ilog_addref(struct ilog_context *lctx) { @@ -1621,3 +1576,35 @@ ilog_version_get(daos_handle_t loh) return ilog_mag2ver(lctx->ic_root->lr_magic); } + +bool +ilog_is_valid(struct umem_instance *umm, umem_off_t rec, uint32_t dtx_lid, daos_epoch_t epoch) +{ + struct ilog_root *root = umem_off2ptr(umm, umem_off2offset(rec)); + struct ilog_array *array; + struct ilog_id *id; + + // !ILOG_ASSERT_VALID(ilog) + if (root == NULL || !ILOG_MAGIC_VALID(root->lr_magic)) { + return false; + } + + if (ilog_empty(root)) { + return false; + } + + if (root->lr_tree.it_embedded) { + id = &root->lr_id; + return (id->id_tx_id == dtx_lid && id->id_epoch == epoch); + } + + array = umem_off2ptr(umm, root->lr_tree.it_root); + for (int i = 0; i < array->ia_len; ++i) { + id = &array->ia_id[i]; + if (id->id_tx_id == dtx_lid && id->id_epoch == epoch) { + return true; + } + } + + return false; +} diff --git a/src/vos/ilog.h b/src/vos/ilog.h index 0cc7ceb5c4f..467b6819813 100644 --- a/src/vos/ilog.h +++ b/src/vos/ilog.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -319,4 +320,18 @@ ilog_is_punch(const struct ilog_entry *entry) entry->ie_id.id_update_minor_eph; } +/** Validate the provided ilog. + * + * Note: It is designed for catastrophic recovery. Not to perform at run-time. + * + * \param umm[in] unified memory class instance + * \param rec[in] offset of the ilog + * \param dtx_lid[in] expected local DTX id + * \param epoch[in] expected epoch + * + * \return true if ilog is valid. + **/ +bool +ilog_is_valid(struct umem_instance *umm, umem_off_t rec, uint32_t dtx_lid, daos_epoch_t epoch); + #endif /* __ILOG_H__ */ diff --git a/src/vos/ilog_internal.h b/src/vos/ilog_internal.h new file mode 100644 index 00000000000..e729f76ea4b --- /dev/null +++ b/src/vos/ilog_internal.h @@ -0,0 +1,61 @@ +/** + * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * VOS Object/Key incarnation log + * vos/ilog_internal.h + * + * Author: Jeff Olivier + */ + +#ifndef __ILOG_INTERNAL_H__ +#define __ILOG_INTERNAL_H__ + +/* 4 bit magic number + version */ +#define ILOG_MAGIC 0x00000006 +#define ILOG_MAGIC_BITS 4 +#define ILOG_MAGIC_MASK ((1 << ILOG_MAGIC_BITS) - 1) +#define ILOG_VERSION_INC (1 << ILOG_MAGIC_BITS) +#define ILOG_VERSION_MASK ~(ILOG_VERSION_INC - 1) +#define ILOG_MAGIC_VALID(magic) (((magic)&ILOG_MAGIC_MASK) == ILOG_MAGIC) + +/** The ilog is split into two parts. If there is one entry, the ilog + * is embedded into the root df struct. If not, a b+tree is used. + * The tree is used more like a set where only the key is used. + */ + +struct ilog_tree { + umem_off_t it_root; + uint64_t it_embedded; +}; + +struct ilog_root { + union { + struct ilog_id lr_id; + struct ilog_tree lr_tree; + }; + uint32_t lr_ts_idx; + uint32_t lr_magic; +}; + +static inline bool +ilog_empty(struct ilog_root *root) +{ + return !root->lr_tree.it_embedded && root->lr_tree.it_root == UMOFF_NULL; +} + +struct ilog_array { + /** Current length of array */ + uint32_t ia_len; + /** Allocated length of array */ + uint32_t ia_max_len; + /** Pad to 16 bytes */ + uint64_t ia_pad; + /** Entries in array */ + struct ilog_id ia_id[0]; +}; + +#endif /* __ILOG_INTERNAL_H__ */ diff --git a/src/vos/tests/SConscript b/src/vos/tests/SConscript index 85ec6b80660..1931efa4b34 100644 --- a/src/vos/tests/SConscript +++ b/src/vos/tests/SConscript @@ -30,7 +30,7 @@ def scons(): vos_test_src = ['vos_tests.c', vts_objs, 'vts_pool.c', 'vts_container.c', 'vts_aggregate.c', 'vts_gc.c', 'vts_checksum.c', 'vts_ilog.c', 'vts_array.c', 'vts_pm.c', 'vts_ts.c', 'vts_mvcc.c', - 'vos_cmd.c', 'vts_wal.c'] + 'vos_cmd.c', 'vts_wal.c', 'vts_evtree.c', 'vts_tree.c'] vos_tests = tenv.d_program('vos_tests', vos_test_src, LIBS=libraries) tenv.AppendUnique(CPPPATH=[Dir('../../common/tests').srcnode()]) evt_ctl = tenv.d_program('evt_ctl', ['evt_ctl.c', utest_utils, cmd_parser], LIBS=libraries) diff --git a/src/vos/tests/vos_tests.c b/src/vos/tests/vos_tests.c index 17db53f28c8..41bc56e8f36 100644 --- a/src/vos/tests/vos_tests.c +++ b/src/vos/tests/vos_tests.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -89,6 +90,8 @@ run_all_tests(int keys) failed += run_ilog_tests(cfg_desc_io); failed += run_csum_extent_tests(cfg_desc_io); failed += run_wal_tests(cfg_desc_io); + failed += run_evtree_tests(cfg_desc_io); + failed += run_tree_tests(cfg_desc_io); failed += run_io_test(&type_list[0], ARRAY_SIZE(type_list), keys, cfg_desc_io); diff --git a/src/vos/tests/vts_common.h b/src/vos/tests/vts_common.h index 11529f040b7..a5b72a00d3a 100644 --- a/src/vos/tests/vts_common.h +++ b/src/vos/tests/vts_common.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -141,6 +142,10 @@ int run_csum_extent_tests(const char *cfg); int run_mvcc_tests(const char *cfg); int run_wal_tests(const char *cfg); int +run_evtree_tests(const char *cfg); +int +run_tree_tests(const char *cfg); +int run_vos_command(const char *arg0, const char *cmd); void diff --git a/src/vos/tests/vts_evtree.c b/src/vos/tests/vts_evtree.c new file mode 100644 index 00000000000..e68b7e8ff1e --- /dev/null +++ b/src/vos/tests/vts_evtree.c @@ -0,0 +1,51 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * This file is part of vos/tests/ + * + * vos/tests/vts_evtree.c + */ +#define D_LOGFAC DD_FAC(tests) + +#include + +#include +#include +#include +#include + +#include "evt_priv.h" + +/* values picked arbitrarily where invalid means not as expected by the caller */ +#define DTX_LID_VALID ((uint32_t)123) +#define DTX_LID_INVALID ((uint32_t)DTX_LID_VALID + 1) + +static const struct evt_desc invalid_magic = {.dc_magic = (EVT_DESC_MAGIC + 1)}; + +static const struct evt_desc invalid_dtx_lid = {.dc_magic = EVT_DESC_MAGIC, + .dc_dtx = DTX_LID_INVALID}; + +static const struct evt_desc valid = {.dc_magic = EVT_DESC_MAGIC, .dc_dtx = DTX_LID_VALID}; + +static void +evt_desc_is_valid_test(void **state) +{ + assert_false(evt_desc_is_valid(NULL, DTX_LID_VALID)); + assert_false(evt_desc_is_valid(&invalid_magic, DTX_LID_VALID)); + assert_false(evt_desc_is_valid(&invalid_dtx_lid, DTX_LID_VALID)); + assert_true(evt_desc_is_valid(&valid, DTX_LID_VALID)); +} + +static const struct CMUnitTest evtree_tests_all[] = { + {"VOS1000: evt_desc_is_valid", evt_desc_is_valid_test, NULL, NULL}, +}; + +int +run_evtree_tests(const char *cfg) +{ + char *test_name = "evtree"; + return cmocka_run_group_tests_name(test_name, evtree_tests_all, NULL, NULL); +} diff --git a/src/vos/tests/vts_ilog.c b/src/vos/tests/vts_ilog.c index c696ff0b487..2d0072efc2c 100644 --- a/src/vos/tests/vts_ilog.c +++ b/src/vos/tests/vts_ilog.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -15,6 +16,7 @@ #include #include "vts_io.h" #include +#include "ilog_internal.h" #define LOG_FAIL(rc, expected_value, format, ...) \ do { \ @@ -1055,17 +1057,142 @@ ilog_test_discard(void **state) ilog_fetch_finish(&ilents); } +/* values picked arbitrarily where invalid means not as expected by the caller */ +#define DTX_LID_VALID ((uint32_t)123) +#define DTX_LID_INVALID (DTX_LID_VALID + 1) +#define EPOCH_VALID ((daos_epoch_t)225) +#define EPOCH_INVALID (EPOCH_VALID + 1) + +static uint32_t dtx_lid_all[] = {DTX_LID_VALID, DTX_LID_INVALID}; +static uint32_t epoch_all[] = {EPOCH_VALID, EPOCH_INVALID}; + +#define BOOL2STR(x) ((x) ? "true" : "false") + +#define ILOG_ARRAY_MAX 3 +#define ILOG_ARRAY_SIZE (sizeof(struct ilog_id) * ILOG_ARRAY_MAX) + +/* all cases of 3-item arrays containing and not containing the valid epoch */ +static struct ilog_id no_valid_epoch1[] = { + {.id_epoch = EPOCH_VALID - 3}, {.id_epoch = EPOCH_VALID - 2}, {.id_epoch = EPOCH_VALID - 1}}; +static struct ilog_id valid_epoch1[] = { + {.id_epoch = EPOCH_VALID - 2}, {.id_epoch = EPOCH_VALID - 1}, {.id_epoch = EPOCH_VALID}}; +static struct ilog_id valid_epoch2[] = { + {.id_epoch = EPOCH_VALID - 1}, {.id_epoch = EPOCH_VALID}, {.id_epoch = EPOCH_VALID + 1}}; +static struct ilog_id valid_epoch3[] = { + {.id_epoch = EPOCH_VALID}, {.id_epoch = EPOCH_VALID + 1}, {.id_epoch = EPOCH_VALID + 2}}; +static struct ilog_id no_valid_epoch2[] = { + {.id_epoch = EPOCH_VALID + 1}, {.id_epoch = EPOCH_VALID + 2}, {.id_epoch = EPOCH_VALID + 3}}; + +static struct ilog_id *no_valid_epoch_all[] = {no_valid_epoch1, no_valid_epoch2}; +static struct ilog_id *valid_epoch_all[] = {valid_epoch1, valid_epoch2, valid_epoch3}; + +static void +ilog_is_valid_test(void **state) +{ + struct umem_instance umm; + umem_off_t rec; + struct ilog_root *root; + struct ilog_array *array; + + struct umem_attr uma = {.uma_id = UMEM_CLASS_VMEM, .uma_pool = NULL}; + + umem_class_init(&uma, &umm); + + /* 1. ILOG rec is a NULL pointer. */ + rec = UMOFF_NULL; + assert_false(ilog_is_valid(&umm, rec, DTX_LID_VALID, EPOCH_VALID)); + + /* 2. Invalid magic. */ + rec = umem_zalloc(&umm, sizeof(struct ilog_root)); + root = umem_off2ptr(&umm, rec); + root->lr_magic = ILOG_MAGIC + 1; + assert_false(ILOG_MAGIC_VALID(root->lr_magic)); + assert_false(ilog_is_valid(&umm, rec, DTX_LID_VALID, EPOCH_VALID)); + + /* Set valid magic for all cases down below. */ + root->lr_magic = ILOG_MAGIC; + assert_true(ILOG_MAGIC_VALID(root->lr_magic)); + + /* 3. Empty ILOG can't reference dtx_lid nor epoch. */ + root->lr_tree.it_embedded = 0; + root->lr_tree.it_root = UMOFF_NULL; + assert_true(ilog_empty(root)); + assert_false(ilog_is_valid(&umm, rec, DTX_LID_VALID, EPOCH_VALID)); + + /* 4. Embedded - all cases */ + root->lr_tree.it_embedded = 1; + for (int i = 0; i < ARRAY_SIZE(dtx_lid_all); ++i) { + root->lr_id.id_tx_id = dtx_lid_all[i]; + for (int j = 0; j < ARRAY_SIZE(epoch_all); ++j) { + root->lr_id.id_epoch = epoch_all[j]; + bool exp = (dtx_lid_all[i] == DTX_LID_VALID && epoch_all[j] == EPOCH_VALID); + bool result = ilog_is_valid(&umm, rec, DTX_LID_VALID, EPOCH_VALID); + if (result != exp) { + fail_msg("ilog_is_valid() result is not as expected %s != %s for " + "{dtx_lid=%u, epoch=%u}", + BOOL2STR(result), BOOL2STR(exp), dtx_lid_all[i], + epoch_all[j]); + } + } + } + + /* Prepare ILOG array for all cases below. */ + root->lr_tree.it_embedded = 0; + root->lr_tree.it_root = umem_zalloc(&umm, sizeof(struct ilog_array) + ILOG_ARRAY_SIZE); + array = umem_off2ptr(&umm, root->lr_tree.it_root); + array->ia_len = ILOG_ARRAY_MAX; + array->ia_max_len = ILOG_ARRAY_MAX; + + /* 5. Array - no valid epoch */ + for (int i = 0; i < ARRAY_SIZE(dtx_lid_all); ++i) { + uint32_t dtx_lid = dtx_lid_all[i]; + for (int j = 0; j < ARRAY_SIZE(no_valid_epoch_all); ++j) { + /* prepare an array of ILOG id's with epochs from the template */ + memcpy(array->ia_id, no_valid_epoch_all[j], ILOG_ARRAY_SIZE); + /* fill-in dtx_lid for all of the array's entries */ + for (int k = 0; k < ILOG_ARRAY_MAX; ++k) { + array->ia_id[k].id_tx_id = dtx_lid; + } + if (ilog_is_valid(&umm, rec, DTX_LID_VALID, EPOCH_VALID)) { + fail_msg("ilog_is_valid() result is not as expected true != false " + "using no_valid_epoch_all[%d] and dtx_lid=%u", + j, dtx_lid); + } + } + } + + /* 6. Array - with valid epoch */ + for (int i = 0; i < ARRAY_SIZE(dtx_lid_all); ++i) { + uint32_t dtx_lid = dtx_lid_all[i]; + for (int j = 0; j < ARRAY_SIZE(valid_epoch_all); ++j) { + /* prepare an array of ILOG id's with epochs from the template */ + memcpy(array->ia_id, valid_epoch_all[j], ILOG_ARRAY_SIZE); + /* fill-in dtx_lid for all of the array's entries */ + for (int k = 0; k < ILOG_ARRAY_MAX; ++k) { + array->ia_id[k].id_tx_id = dtx_lid; + } + /* the valid epoch is there so dtx_lid's validity is decisive */ + bool exp = (dtx_lid == DTX_LID_VALID); + bool result = ilog_is_valid(&umm, rec, DTX_LID_VALID, EPOCH_VALID); + if (exp != result) { + fail_msg("ilog_is_valid() result is not as expected %s != %s using " + "valid_epoch_all[%d] and dtx_lid=%u", + BOOL2STR(result), BOOL2STR(exp), j, dtx_lid); + } + } + } + + umem_free(&umm, root->lr_tree.it_root); + umem_free(&umm, rec); +} + static const struct CMUnitTest inc_tests[] = { - { "VOS500.1: VOS incarnation log UPDATE", ilog_test_update, NULL, - NULL}, - { "VOS500.2: VOS incarnation log ABORT test", ilog_test_abort, NULL, - NULL}, - { "VOS500.3: VOS incarnation log PERSIST test", ilog_test_persist, NULL, - NULL}, - { "VOS500.4: VOS incarnation log AGGREGATE test", ilog_test_aggregate, - NULL, NULL}, - { "VOS500.5: VOS incarnation log DISCARD test", ilog_test_discard, - NULL, NULL}, + {"VOS500.1: VOS incarnation log UPDATE", ilog_test_update, NULL, NULL}, + {"VOS500.2: VOS incarnation log ABORT test", ilog_test_abort, NULL, NULL}, + {"VOS500.3: VOS incarnation log PERSIST test", ilog_test_persist, NULL, NULL}, + {"VOS500.4: VOS incarnation log AGGREGATE test", ilog_test_aggregate, NULL, NULL}, + {"VOS500.5: VOS incarnation log DISCARD test", ilog_test_discard, NULL, NULL}, + {"VOS501: ilog_is_valid", ilog_is_valid_test, NULL, NULL}, }; int diff --git a/src/vos/tests/vts_tree.c b/src/vos/tests/vts_tree.c new file mode 100644 index 00000000000..52d48f739e2 --- /dev/null +++ b/src/vos/tests/vts_tree.c @@ -0,0 +1,45 @@ +/** + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * This file is part of vos/tests/ + * + * vos/tests/vts_tree.c + */ +#define D_LOGFAC DD_FAC(tests) + +#include +#include +#include +#include + +#include "vos_internal.h" + +/* values picked arbitrarily where invalid means not as expected by the caller */ +#define DTX_LID_VALID ((uint32_t)123) +#define DTX_LID_INVALID ((uint32_t)DTX_LID_VALID + 1) + +static const struct vos_irec_df invalid_dtx_lid = {.ir_dtx = DTX_LID_INVALID}; + +static const struct vos_irec_df valid = {.ir_dtx = DTX_LID_VALID}; + +static void +vos_irec_is_valid_test(void **state) +{ + assert_false(vos_irec_is_valid(NULL, DTX_LID_VALID)); + assert_false(vos_irec_is_valid(&invalid_dtx_lid, DTX_LID_VALID)); + assert_true(vos_irec_is_valid(&valid, DTX_LID_VALID)); +} + +static const struct CMUnitTest tree_tests_all[] = { + {"VOS1100: vos_irec_is_valid", vos_irec_is_valid_test, NULL, NULL}, +}; + +int +run_tree_tests(const char *cfg) +{ + char *test_name = "tree"; + return cmocka_run_group_tests_name(test_name, tree_tests_all, NULL, NULL); +} diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index 4713971d65c..948b3a2f6d0 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -716,6 +716,14 @@ vos_mod_init(void) d_getenv_bool("DAOS_DKEY_PUNCH_PROPAGATE", &vos_dkey_punch_propagate); D_INFO("DKEY punch propagation is %s\n", vos_dkey_punch_propagate ? "enabled" : "disabled"); + /* + * NOTE: It is used to skip old partial committed DTX records that were generated when + * ran as DAOS-2.6.3-rc{1,2}. If the user has never used such version, please do + * NOT set this environment variable. + */ + d_getenv_bool("DAOS_SKIP_OLD_PARTIAL_DTX", &vos_skip_old_partial_dtx); + D_INFO("%s old partial committed DTX record\n", vos_skip_old_partial_dtx ? "Skip" : "Keep"); + vos_agg_gap = VOS_AGG_GAP_DEF; d_getenv_uint("DAOS_VOS_AGG_GAP", &vos_agg_gap); if (vos_agg_gap < VOS_AGG_GAP_MIN || vos_agg_gap > VOS_AGG_GAP_MAX) { diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 5ed24e20909..2127db776c9 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -24,12 +24,6 @@ #define DTX_ACT_BLOB_MAGIC 0x14130a2b #define DTX_CMT_BLOB_MAGIC 0x2502191c -enum { - DTX_UMOFF_ILOG = (1 << 0), - DTX_UMOFF_SVT = (1 << 1), - DTX_UMOFF_EVT = (1 << 2), -}; - #define DTX_UMOFF_TYPES (DTX_UMOFF_ILOG | DTX_UMOFF_SVT | DTX_UMOFF_EVT) #define DTX_INDEX_INVAL (int32_t)(-1) @@ -47,27 +41,7 @@ enum { DAE_EPOCH(dae)); \ } while (0) -static inline void -dtx_type2umoff_flag(umem_off_t *rec, uint32_t type) -{ - uint8_t flag = 0; - - switch (type) { - case DTX_RT_ILOG: - flag = DTX_UMOFF_ILOG; - break; - case DTX_RT_SVT: - flag = DTX_UMOFF_SVT; - break; - case DTX_RT_EVT: - flag = DTX_UMOFF_EVT; - break; - default: - D_ASSERT(0); - } - - umem_off_set_flags(rec, flag); -} +bool vos_skip_old_partial_dtx; static inline uint32_t dtx_umoff_flag2type(umem_off_t umoff) @@ -208,6 +182,7 @@ dtx_act_ent_cleanup(struct vos_container *cont, struct vos_dtx_act_ent *dae, dae->dae_oid_cnt = 0; } + DAE_REC_OFF(dae) = UMOFF_NULL; D_FREE(dae->dae_records); dae->dae_rec_cap = 0; DAE_REC_CNT(dae) = 0; @@ -685,50 +660,61 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab if (dae->dae_records != NULL) { D_ASSERT(DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT); + D_ASSERT(!UMOFF_IS_NULL(dae_df->dae_rec_off)); - for (i = DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT - 1; - i >= 0; i--) { - rc = do_dtx_rec_release(umm, cont, dae, - dae->dae_records[i], abort); + for (i = DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT - 1; i >= 0; i--) { + rc = do_dtx_rec_release(umm, cont, dae, dae->dae_records[i], abort); if (rc != 0) return rc; } + rc = umem_free(umm, dae_df->dae_rec_off); + if (rc != 0) + return rc; + + if (keep_act) { + rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_off, sizeof(dae_df->dae_rec_off)); + if (rc != 0) + return rc; + + dae_df->dae_rec_off = UMOFF_NULL; + } + count = DTX_INLINE_REC_CNT; } else { + D_ASSERT(DAE_REC_CNT(dae) <= DTX_INLINE_REC_CNT); + count = DAE_REC_CNT(dae); } for (i = count - 1; i >= 0; i--) { - rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], - abort); - if (rc != 0) - return rc; - } - - if (!UMOFF_IS_NULL(dae_df->dae_rec_off)) { - rc = umem_free(umm, dae_df->dae_rec_off); + rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], abort); if (rc != 0) return rc; } if (keep_act) { + /* When re-commit partial committed DTX, the count can be zero. */ + if (dae_df->dae_rec_cnt > 0) { + rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_cnt, + sizeof(dae_df->dae_rec_cnt)); + if (rc != 0) + return rc; + + dae_df->dae_rec_cnt = 0; + } + /* * If it is required to keep the active DTX entry, then it must be for partial * commit. Let's mark it as DTE_PARTIAL_COMMITTED. */ - if ((DAE_FLAGS(dae) & DTE_PARTIAL_COMMITTED)) + if (DAE_FLAGS(dae) & DTE_PARTIAL_COMMITTED) return 0; - rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_off, sizeof(dae_df->dae_rec_off)); - if (rc != 0) - return rc; - rc = umem_tx_add_ptr(umm, &dae_df->dae_flags, sizeof(dae_df->dae_flags)); if (rc != 0) return rc; - dae_df->dae_rec_off = UMOFF_NULL; dae_df->dae_flags |= DTE_PARTIAL_COMMITTED; return 0; @@ -931,7 +917,7 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t D_FREE(dce); if (rm_cos != NULL && - (rc == 0 || rc == -DER_NONEXIST || (rc == -DER_ALREADY && dae == NULL))) + ((rc == 0 && !keep_act) || rc == -DER_NONEXIST || (rc == -DER_ALREADY && dae == NULL))) *rm_cos = true; return rc; @@ -2706,6 +2692,135 @@ vos_dtx_abort(daos_handle_t coh, struct dtx_id *dti, daos_epoch_t epoch) return rc; } +static void +do_dtx_rec_discard_invalid(struct umem_instance *umm, struct vos_dtx_act_ent *dae, umem_off_t *rec, + int *discarded) +{ + bool valid; + + if (UMOFF_IS_NULL(*rec)) + return; + + switch (dtx_umoff_flag2type(*rec)) { + case DTX_RT_ILOG: { + valid = ilog_is_valid(umm, *rec, DAE_LID(dae), DAE_EPOCH(dae)); + break; + } + case DTX_RT_SVT: { + struct vos_irec_df *svt = umem_off2ptr(umm, *rec); + valid = vos_irec_is_valid(svt, DAE_LID(dae)); + break; + } + case DTX_RT_EVT: { + struct evt_desc *evt = umem_off2ptr(umm, *rec); + valid = evt_desc_is_valid(evt, DAE_LID(dae)); + break; + } + default: + /* On-disk data corruption case. */ + valid = false; + break; + } + + if (!valid) { + *rec = UMOFF_NULL; + *discarded += 1; + } +} + +static int +vos_dtx_discard_invalid_internal(struct vos_container *cont, struct vos_dtx_act_ent *dae, + int *discarded) +{ + struct umem_instance *umm = vos_cont2umm(cont); + int discarded_noninline = 0; + int discarded_inline = 0; + int count = min(DAE_REC_CNT(dae), DTX_INLINE_REC_CNT); + int i; + + /* go through the inlined records */ + for (i = 0; i < count; i++) { + do_dtx_rec_discard_invalid(umm, dae, &DAE_REC_INLINE(dae)[i], &discarded_inline); + } + + if (discarded_inline > 0) { + /* copy the whole array to durable format */ + struct vos_dtx_act_ent_df *dae_df = umem_off2ptr(umm, dae->dae_df_off); + size_t size = sizeof(umem_off_t) * count; + int rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_inline, size); + if (rc != 0) { + return rc; + } + memcpy(&dae_df->dae_rec_inline, &DAE_REC_INLINE(dae), size); + } + + /* go through the non-inlined records if present */ + if (dae->dae_records != NULL) { + D_ASSERT(DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT); + + count = DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT; + for (i = 0; i < count; i++) { + do_dtx_rec_discard_invalid(umm, dae, &dae->dae_records[i], + &discarded_noninline); + } + + if (discarded_noninline > 0) { + /* copy the whole array to the durable format */ + size_t size = sizeof(umem_off_t) * count; + void *rec_df = umem_off2ptr(umm, DAE_REC_OFF(dae)); + int rc = umem_tx_add_ptr(umm, rec_df, size); + if (rc != 0) { + return rc; + } + memcpy(rec_df, dae->dae_records, size); + } + } + + *discarded = discarded_inline + discarded_noninline; + + return 0; +} + +int +vos_dtx_discard_invalid(daos_handle_t coh, struct dtx_id *dti, int *discarded) +{ + struct vos_container *cont; + struct vos_dtx_act_ent *dae = NULL; + d_iov_t riov; + d_iov_t kiov; + int rc; + + cont = vos_hdl2cont(coh); + D_ASSERT(cont != NULL); + + D_ASSERT(dti != NULL); + D_ASSERT(discarded != NULL); + + /* lookup the DTX entry */ + d_iov_set(&kiov, dti, sizeof(*dti)); + d_iov_set(&riov, NULL, 0); + rc = dbtree_lookup(cont->vc_dtx_active_hdl, &kiov, &riov); + if (rc != 0) { + return rc; + } + dae = riov.iov_buf; + + rc = umem_tx_begin(vos_cont2umm(cont), NULL); + if (rc == 0) { + rc = vos_dtx_discard_invalid_internal(cont, dae, discarded); + if (rc == 0 && *discarded > 0) { + rc = umem_tx_commit(vos_cont2umm(cont)); + } else { + rc = umem_tx_abort(vos_cont2umm(cont), rc); + if (rc == -DER_CANCELED) { + rc = 0; + } + } + } + + return rc; +} + static int vos_dtx_set_flags_one(struct vos_container *cont, struct dtx_id *dti, uint32_t flags) { @@ -3122,6 +3237,9 @@ vos_dtx_act_reindex(struct vos_container *cont) dae->dae_need_release = 1; D_INIT_LIST_HEAD(&dae->dae_link); + if (vos_skip_old_partial_dtx && DAE_FLAGS(dae) & DTE_PARTIAL_COMMITTED) + DAE_REC_CNT(dae) = 0; + if (DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT) { size_t size; int count; diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index acd9e685dd7..b5ada253806 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -149,6 +149,7 @@ extern uint32_t vos_agg_gap; extern unsigned int vos_agg_nvme_thresh; extern bool vos_dkey_punch_propagate; +extern bool vos_skip_old_partial_dtx; static inline uint32_t vos_byte2blkcnt(uint64_t bytes) { @@ -2067,4 +2068,44 @@ int vos_bkt_array_add(struct vos_bkt_array *bkts, uint32_t bkt_id); int vos_bkt_array_pin(struct vos_pool *pool, struct vos_bkt_array *bkts, struct umem_pin_handle **pin_hdl); +/** Validate the provided svt. + * + * Note: It is designed for catastrophic recovery. Not to perform at run-time. + * + * \param svt[in] + * \param dtx_lid[in] local id of the DTX entry the evt is supposed to belong to + * + * \return true if svt is valid. + **/ +bool +vos_irec_is_valid(const struct vos_irec_df *svt, uint32_t dtx_lid); + +enum { + DTX_UMOFF_ILOG = (1 << 0), + DTX_UMOFF_SVT = (1 << 1), + DTX_UMOFF_EVT = (1 << 2), +}; + +static inline void +dtx_type2umoff_flag(umem_off_t *rec, uint32_t type) +{ + uint8_t flag = 0; + + switch (type) { + case DTX_RT_ILOG: + flag = DTX_UMOFF_ILOG; + break; + case DTX_RT_SVT: + flag = DTX_UMOFF_SVT; + break; + case DTX_RT_EVT: + flag = DTX_UMOFF_EVT; + break; + default: + D_ASSERT(0); + } + + umem_off_set_flags(rec, flag); +} + #endif /* __VOS_INTERNAL_H__ */ diff --git a/src/vos/vos_tree.c b/src/vos/vos_tree.c index c7aa8b57f5e..7133557c33c 100644 --- a/src/vos/vos_tree.c +++ b/src/vos/vos_tree.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1354,3 +1355,13 @@ obj_tree_find_attr(unsigned tree_class, int flags) return NULL; } } + +bool +vos_irec_is_valid(const struct vos_irec_df *svt, uint32_t dtx_lid) +{ + if (svt == NULL) { + return false; + } + + return svt->ir_dtx == dtx_lid; +} diff --git a/utils/cq/requirements.txt b/utils/cq/requirements.txt index 20f5c5ba003..317b68ab73b 100644 --- a/utils/cq/requirements.txt +++ b/utils/cq/requirements.txt @@ -3,10 +3,10 @@ pyenchant ## flake8 6 removed --diff option which breaks flake precommit hook. ## https://github.com/pycqa/flake8/issues/1389 https://github.com/PyCQA/flake8/pull/1720 flake8==7.1.1 -isort==5.13.2 -pylint==3.3.2 +isort==6.0.0 +pylint==3.3.4 yamllint==1.35.1 -codespell==2.3.0 +codespell==2.4.1 # Used by ci/jira_query.py which pip installs it standalone. jira torch>=2.2.0 diff --git a/utils/node_local_test.py b/utils/node_local_test.py index 6279d139582..b99fd54230d 100755 --- a/utils/node_local_test.py +++ b/utils/node_local_test.py @@ -2,6 +2,7 @@ """Node local test (NLT). (C) Copyright 2020-2024 Intel Corporation. +(C) Copyright 2025 Google LLC (C) Copyright 2025 Enakta Labs Ltd SPDX-License-Identifier: BSD-2-Clause-Patent @@ -5529,6 +5530,8 @@ def __init__(self, aft, cmd, env, loc, cwd): delete=False) as log_file: self.log_file = log_file.name self._env['D_LOG_FILE'] = self.log_file + with open(log_file.name, 'w', encoding='utf-8') as lf: + lf.write(f'cmd: {" ".join(cmd)}\n') def __str__(self): cmd_text = ' '.join(self._cmd) diff --git a/utils/rpms/daos.rpmlintrc b/utils/rpms/daos.rpmlintrc index 81301ebb048..32c68e1d41c 100644 --- a/utils/rpms/daos.rpmlintrc +++ b/utils/rpms/daos.rpmlintrc @@ -20,7 +20,7 @@ addFilter("daos-client\.x86_64: E: post(i|u)n-without-ldconfig \/usr\/lib64\/lib addFilter("daos-(client|server)\.x86_64: W: dangerous-command-in-%post(un)? rm") # lots of missing manpages -addFilter("W: no-manual-page-for-binary (cart_ctl|daos_agent|dfuse|self_test|acl_dump_test|agent_tests|crt_launch|daos_debug_set_params|daos_gen_io_conf|daos_perf|daos_racer|daos_run_io_conf|daos_test|dfs_test|dfuse_test|drpc_engine_test|drpc_test|eq_tests|fault_status|hello_drpc|job_tests|jobtest|security_test|daos_firmware|daos_admin|daos_engine|daos_metrics|daos_server|daos_storage_estimator.py|evt_ctl|jump_pl_map|obj_ctl|pl_bench|rdbt|ring_pl_map|smd_ut|bio_ut|vea_stress|vea_ut|vos_perf|vos_tests|dtx_tests|ddb|ddb_tests)") +addFilter("W: no-manual-page-for-binary (cart_ctl|daos_agent|dfuse|self_test|acl_dump_test|agent_tests|crt_launch|daos_debug_set_params|daos_gen_io_conf|daos_perf|daos_racer|daos_run_io_conf|daos_test|dfs_test|dfuse_test|drpc_engine_test|drpc_test|eq_tests|fault_status|hello_drpc|job_tests|jobtest|security_test|daos_firmware|daos_admin|daos_engine|daos_metrics|daos_server|daos_storage_estimator.py|evt_ctl|jump_pl_map|obj_ctl|pl_bench|rdbt|ring_pl_map|smd_ut|bio_ut|vea_stress|vea_ut|vos_perf|vos_tests|dtx_tests|dtx_ut|ddb|ddb_tests|ddb_ut)") addFilter("daos-(server|firmware)\.x86_64: W: non-standard-(u|g)id \/.+ daos_server") diff --git a/utils/rpms/daos.spec b/utils/rpms/daos.spec index 658d07ca939..26942fba956 100644 --- a/utils/rpms/daos.spec +++ b/utils/rpms/daos.spec @@ -16,7 +16,7 @@ Name: daos Version: 2.7.101 -Release: 5%{?relval}%{?dist} +Release: 6%{?relval}%{?dist} Summary: DAOS Storage Engine License: BSD-2-Clause-Patent @@ -555,6 +555,7 @@ getent passwd daos_agent >/dev/null || useradd -s /sbin/nologin -r -g daos_agent %files server-tests %doc README.md %{_bindir}/dtx_tests +%{_bindir}/dtx_ut %{_bindir}/evt_ctl %{_bindir}/jump_pl_map %{_bindir}/pl_bench @@ -566,6 +567,7 @@ getent passwd daos_agent >/dev/null || useradd -s /sbin/nologin -r -g daos_agent %{_bindir}/vos_tests %{_bindir}/vea_stress %{_bindir}/ddb_tests +%{_bindir}/ddb_ut %{_bindir}/obj_ctl %{_bindir}/vos_perf @@ -600,9 +602,12 @@ getent passwd daos_agent >/dev/null || useradd -s /sbin/nologin -r -g daos_agent # No files in a shim package %changelog -* Mon Jan 20 2025 Cedric Koch-Hofer 2.7.101-5 +* Wed Feb 12 2025 Cedric Koch-Hofer 2.7.101-6 - Add support of the libasan +* Wed Jan 22 2025 Jan Michalski 2.7.101-5 +- Add ddb_ut and dtx_ut to the server-tests package + * Fri Dec 20 2024 Jeff Olivier 2.7.101-4 - Switch libfuse3 to libfused diff --git a/utils/utest.yaml b/utils/utest.yaml index df6ae51cf7a..afc8bb9b920 100644 --- a/utils/utest.yaml +++ b/utils/utest.yaml @@ -1,4 +1,5 @@ # (C) Copyright 2023-2024 Intel Corporation. +# (C) Copyright 2025 Hewlett Packard Enterprise Development LP. # # SPDX-License-Identifier: BSD-2-Clause-Patent - name: common @@ -56,6 +57,7 @@ base: "PREFIX" tests: - cmd: ["bin/dtx_tests"] + - cmd: ["bin/dtx_ut"] - name: placement base: "PREFIX" tests: @@ -184,6 +186,7 @@ base: "PREFIX" tests: - cmd: ["bin/ddb_tests"] + - cmd: ["bin/ddb_ut"] - name: Source metadata testing gha: True memcheck: False