Skip to content

Commit

Permalink
DAOS-16969 test: Reduce cleanup operations for metadata.py test (#15779
Browse files Browse the repository at this point in the history
…) (#15859)

Remove the calling of cleanup methods for multiple containers and ior
commands that can be handled by destroying the pool and a single ior
kill command.

Signed-off-by: Phil Henderson <[email protected]>
  • Loading branch information
phender authored Feb 10, 2025
1 parent fcef1e1 commit e9f1697
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 46 deletions.
80 changes: 35 additions & 45 deletions src/tests/ftest/server/metadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
(C) Copyright 2019-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand All @@ -9,7 +10,7 @@
from avocado.core.exceptions import TestFail
from exception_utils import CommandFailure
from ior_utils import IorCommand
from job_manager_utils import get_job_manager
from job_manager_utils import get_job_manager, stop_job_manager
from thread_manager import ThreadManager


Expand Down Expand Up @@ -63,29 +64,13 @@ class ObjectMetadata(TestWithServers):
def __init__(self, *args, **kwargs):
"""Initialize a TestWithServers object."""
super().__init__(*args, **kwargs)
self.ior_managers = []

# Minimum number of containers that should be able to be created
self.created_containers_min = self.params.get("created_cont_min", "/run/metadata/*")

# Number of created containers that should not be possible
self.created_containers_limit = self.params.get("created_cont_max", "/run/metadata/*")

def pre_tear_down(self):
"""Tear down steps to optionally run before tearDown().
Returns:
list: a list of error strings to report at the end of tearDown().
"""
error_list = []
if self.ior_managers:
self.test_log.info("Stopping IOR job managers")
error_list = self._stop_managers(self.ior_managers, "IOR job manager")
else:
self.log.debug("no pre-teardown steps defined")
return error_list

def create_pool(self, svc_ops_enabled=True):
"""Create a pool and display the svc ranks.
Expand Down Expand Up @@ -284,29 +269,26 @@ def metadata_fillup(self, svc_ops_enabled=True):
# Keep track of the number of sequential no space container
# create errors. Once the max has been reached stop the loop.
if status:
if in_failure:
self.log.info(
"Container: %d - [no space -> available] creation successful after %d"
" sequential 'no space' error(s) ", loop + 1, sequential_fail_counter)
in_failure = False
sequential_fail_counter = 0
else:
sequential_fail_counter += 1
if not in_failure:
self.log.info(
"Container: %d - [available -> no space] detected new sequential "
"'no space' error", loop + 1)
in_failure = True

if sequential_fail_counter >= sequential_fail_max:
self.log.info(
"Container %d - %d/%d sequential no space "
"container create errors", sequential_fail_counter,
sequential_fail_max, loop)
"Container %d - [no space limit] reached %d/%d sequential 'no space' "
"errors", loop + 1, sequential_fail_counter, sequential_fail_max)
break

if status and in_failure:
self.log.info(
"Container: %d - no space -> available "
"transition, sequential no space failures: %d",
loop, sequential_fail_counter)
in_failure = False
elif not status and not in_failure:
self.log.info(
"Container: %d - available -> no space "
"transition, sequential no space failures: %d",
loop, sequential_fail_counter)
in_failure = True

except TestFail as error:
self.log.error(str(error))
self.fail("fail (unexpected container create error)")
Expand All @@ -320,17 +302,17 @@ def metadata_fillup(self, svc_ops_enabled=True):
self.created_containers_min)
self.fail("Created too few containers")
self.log.info(
"Successfully created %d / %d containers)", len(self.container), loop)
"Successfully created %d containers in %d loops)", len(self.container), loop + 1)

# Phase 2 clean up some containers (expected to succeed)
msg = "Cleaning up {} containers after pool is full.".format(num_cont_to_destroy)
msg = (f"Cleaning up {num_cont_to_destroy}/{len(self.container)} containers after pool "
"is full.")
self.log_step(msg)
if not self.destroy_num_containers(num_cont_to_destroy):
self.fail("Fail (unexpected container destroy error)")

# Do not destroy containers in teardown (destroy pool while metadata rdb is full)
for container in self.container:
container.skip_cleanup()
# The remaining containers are not directly destroyed in teardown due to
# 'register_cleanup: False' test yaml entry. They are handled by the pool destroy.
self.log.info("Leaving pool metadata rdb full (containers will not be destroyed)")
self.log.info("Test passed")

Expand Down Expand Up @@ -469,6 +451,7 @@ def test_metadata_server_restart(self):
self.create_pool()
files_per_thread = 400
total_ior_threads = 5
ior_managers = []

processes = self.params.get("slots", "/run/ior/clientslots/*")

Expand All @@ -487,19 +470,26 @@ def test_metadata_server_restart(self):
ior_cmd.flags.value = self.params.get("ior{}flags".format(operation), "/run/ior/*")

# Define the job manager for the IOR command
self.ior_managers.append(
ior_managers.append(
get_job_manager(self, "Clush", ior_cmd))
env = ior_cmd.get_default_env(str(self.ior_managers[-1]))
self.ior_managers[-1].assign_hosts(self.hostlist_clients, self.workdir, None)
self.ior_managers[-1].assign_processes(processes)
self.ior_managers[-1].assign_environment(env)
self.ior_managers[-1].verbose = False
env = ior_cmd.get_default_env(str(ior_managers[-1]))
ior_managers[-1].assign_hosts(self.hostlist_clients, self.workdir, None)
ior_managers[-1].assign_processes(processes)
ior_managers[-1].assign_environment(env)
ior_managers[-1].verbose = False

# Disable cleanup methods for all ior commands.
ior_managers[-1].register_cleanup_method = None

# Add a thread for these IOR arguments
thread_manager.add(
test=self, manager=self.ior_managers[-1], loops=files_per_thread)
test=self, manager=ior_managers[-1], loops=files_per_thread)
self.log.info("Created %s thread %s", operation, index)

# Manually add one cleanup method for all ior threads
if operation == "write":
self.register_cleanup(stop_job_manager, job_manager=ior_managers[0])

# Launch the IOR threads
self.log.info("Launching %d IOR %s threads", thread_manager.qty, operation)
failed_thread_count = thread_manager.check_run()
Expand Down
6 changes: 6 additions & 0 deletions src/tests/ftest/server/metadata.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
hosts:
test_servers: 4
test_clients: 1

timeouts:
test_metadata_fillup_svc_ops_disabled: 400
test_metadata_fillup_svc_ops_enabled: 400
test_metadata_addremove: 1300
test_metadata_server_restart: 500

server_config:
name: daos_server
engines_per_host: 2
Expand Down Expand Up @@ -53,16 +55,20 @@ pool:
# properties: svc_ops_entry_age:150
# properties: svc_ops_entry_age:300
# properties: svc_ops_entry_age:600

container:
control_method: API
silent: true
register_cleanup: False

ior:
clientslots:
slots: 1
dfs_destroy: false
iorwriteflags: "-w -W -k -G 1"
iorreadflags: "-r -R -G 1"
dfs_oclass: "SX"

metadata:
mean_percent: 1
num_addremove_loops: 4
Expand Down
3 changes: 2 additions & 1 deletion src/tests/ftest/util/command_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
(C) Copyright 2018-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -478,7 +479,7 @@ def get_params(self, test):
super().get_params(test)
for namespace in ['/run/client/*', self.namespace]:
if namespace is not None:
self.env.update_from_list(test.params.get("env_vars", namespace, []))
self.env.update_from_list(test.params.get("env_vars", namespace, None) or [])

def _get_new(self):
"""Get a new object based upon this one.
Expand Down

0 comments on commit e9f1697

Please sign in to comment.