Skip to content

Commit

Permalink
DAOS-16969 test: Reduce cleanup operations for metadata.py test (#15779)
Browse files Browse the repository at this point in the history
Remove the calling of cleanup methods for multiple containers and ior
commands that can be handled by destroying the pool and a single ior
kill command.

Signed-off-by: Phil Henderson <[email protected]>
  • Loading branch information
phender authored Feb 7, 2025
1 parent e24bb62 commit 67bdb65
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 46 deletions.
80 changes: 35 additions & 45 deletions src/tests/ftest/server/metadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
(C) Copyright 2019-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand All @@ -9,7 +10,7 @@
from avocado.core.exceptions import TestFail
from exception_utils import CommandFailure
from ior_utils import IorCommand
from job_manager_utils import get_job_manager
from job_manager_utils import get_job_manager, stop_job_manager
from thread_manager import ThreadManager


Expand Down Expand Up @@ -63,29 +64,13 @@ class ObjectMetadata(TestWithServers):
def __init__(self, *args, **kwargs):
"""Initialize a TestWithServers object."""
super().__init__(*args, **kwargs)
self.ior_managers = []

# Minimum number of containers that should be able to be created
self.created_containers_min = self.params.get("created_cont_min", "/run/metadata/*")

# Number of created containers that should not be possible
self.created_containers_limit = self.params.get("created_cont_max", "/run/metadata/*")

def pre_tear_down(self):
"""Tear down steps to optionally run before tearDown().
Returns:
list: a list of error strings to report at the end of tearDown().
"""
error_list = []
if self.ior_managers:
self.test_log.info("Stopping IOR job managers")
error_list = self._stop_managers(self.ior_managers, "IOR job manager")
else:
self.log.debug("no pre-teardown steps defined")
return error_list

def create_pool(self, svc_ops_enabled=True):
"""Create a pool and display the svc ranks.
Expand Down Expand Up @@ -284,29 +269,26 @@ def metadata_fillup(self, svc_ops_enabled=True):
# Keep track of the number of sequential no space container
# create errors. Once the max has been reached stop the loop.
if status:
if in_failure:
self.log.info(
"Container: %d - [no space -> available] creation successful after %d"
" sequential 'no space' error(s) ", loop + 1, sequential_fail_counter)
in_failure = False
sequential_fail_counter = 0
else:
sequential_fail_counter += 1
if not in_failure:
self.log.info(
"Container: %d - [available -> no space] detected new sequential "
"'no space' error", loop + 1)
in_failure = True

if sequential_fail_counter >= sequential_fail_max:
self.log.info(
"Container %d - %d/%d sequential no space "
"container create errors", sequential_fail_counter,
sequential_fail_max, loop)
"Container %d - [no space limit] reached %d/%d sequential 'no space' "
"errors", loop + 1, sequential_fail_counter, sequential_fail_max)
break

if status and in_failure:
self.log.info(
"Container: %d - no space -> available "
"transition, sequential no space failures: %d",
loop, sequential_fail_counter)
in_failure = False
elif not status and not in_failure:
self.log.info(
"Container: %d - available -> no space "
"transition, sequential no space failures: %d",
loop, sequential_fail_counter)
in_failure = True

except TestFail as error:
self.log.error(str(error))
self.fail("fail (unexpected container create error)")
Expand All @@ -320,17 +302,17 @@ def metadata_fillup(self, svc_ops_enabled=True):
self.created_containers_min)
self.fail("Created too few containers")
self.log.info(
"Successfully created %d / %d containers)", len(self.container), loop)
"Successfully created %d containers in %d loops)", len(self.container), loop + 1)

# Phase 2 clean up some containers (expected to succeed)
msg = "Cleaning up {} containers after pool is full.".format(num_cont_to_destroy)
msg = (f"Cleaning up {num_cont_to_destroy}/{len(self.container)} containers after pool "
"is full.")
self.log_step(msg)
if not self.destroy_num_containers(num_cont_to_destroy):
self.fail("Fail (unexpected container destroy error)")

# Do not destroy containers in teardown (destroy pool while metadata rdb is full)
for container in self.container:
container.skip_cleanup()
# The remaining containers are not directly destroyed in teardown due to
# 'register_cleanup: False' test yaml entry. They are handled by the pool destroy.
self.log.info("Leaving pool metadata rdb full (containers will not be destroyed)")
self.log.info("Test passed")

Expand Down Expand Up @@ -469,6 +451,7 @@ def test_metadata_server_restart(self):
self.create_pool()
files_per_thread = 400
total_ior_threads = 5
ior_managers = []

processes = self.params.get("slots", "/run/ior/clientslots/*")

Expand All @@ -487,19 +470,26 @@ def test_metadata_server_restart(self):
ior_cmd.flags.value = self.params.get("ior{}flags".format(operation), "/run/ior/*")

# Define the job manager for the IOR command
self.ior_managers.append(
ior_managers.append(
get_job_manager(self, "Clush", ior_cmd))
env = ior_cmd.get_default_env(str(self.ior_managers[-1]))
self.ior_managers[-1].assign_hosts(self.hostlist_clients, self.workdir, None)
self.ior_managers[-1].assign_processes(processes)
self.ior_managers[-1].assign_environment(env)
self.ior_managers[-1].verbose = False
env = ior_cmd.get_default_env(str(ior_managers[-1]))
ior_managers[-1].assign_hosts(self.hostlist_clients, self.workdir, None)
ior_managers[-1].assign_processes(processes)
ior_managers[-1].assign_environment(env)
ior_managers[-1].verbose = False

# Disable cleanup methods for all ior commands.
ior_managers[-1].register_cleanup_method = None

# Add a thread for these IOR arguments
thread_manager.add(
test=self, manager=self.ior_managers[-1], loops=files_per_thread)
test=self, manager=ior_managers[-1], loops=files_per_thread)
self.log.info("Created %s thread %s", operation, index)

# Manually add one cleanup method for all ior threads
if operation == "write":
self.register_cleanup(stop_job_manager, job_manager=ior_managers[0])

# Launch the IOR threads
self.log.info("Launching %d IOR %s threads", thread_manager.qty, operation)
failed_thread_count = thread_manager.check_run()
Expand Down
6 changes: 6 additions & 0 deletions src/tests/ftest/server/metadata.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
hosts:
test_servers: 4
test_clients: 1

timeouts:
test_metadata_fillup_svc_ops_disabled: 400
test_metadata_fillup_svc_ops_enabled: 400
test_metadata_addremove: 1300
test_metadata_server_restart: 500

server_config:
name: daos_server
engines_per_host: 2
Expand Down Expand Up @@ -52,16 +54,20 @@ pool:
# properties: svc_ops_entry_age:150
# properties: svc_ops_entry_age:300
# properties: svc_ops_entry_age:600

container:
control_method: API
silent: true
register_cleanup: False

ior:
clientslots:
slots: 1
dfs_destroy: false
iorwriteflags: "-w -W -k -G 1"
iorreadflags: "-r -R -G 1"
dfs_oclass: "SX"

metadata:
mean_percent: 1
num_addremove_loops: 4
Expand Down
3 changes: 2 additions & 1 deletion src/tests/ftest/util/command_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
(C) Copyright 2018-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -478,7 +479,7 @@ def get_params(self, test):
super().get_params(test)
for namespace in ['/run/client/*', self.namespace]:
if namespace is not None:
self.env.update_from_list(test.params.get("env_vars", namespace, []))
self.env.update_from_list(test.params.get("env_vars", namespace, None) or [])

def _get_new(self):
"""Get a new object based upon this one.
Expand Down

0 comments on commit 67bdb65

Please sign in to comment.