Skip to content

Commit

Permalink
Merge pull request #850 from casparvl/use_local_reframe_config_file
Browse files Browse the repository at this point in the history
Replace the use of a ReFrame template config file for a manually created one
  • Loading branch information
laraPPr authored Jan 29, 2025
2 parents a9f42cc + 896f2d4 commit 902a20e
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 100 deletions.
7 changes: 7 additions & 0 deletions bot/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,10 @@ EESSI_SOFTWARE_SUBDIR_OVERRIDE=${EESSI_SOFTWARE_SUBDIR_OVERRIDE:-${CPU_TARGET}}
export EESSI_SOFTWARE_SUBDIR_OVERRIDE
echo "bot/test.sh: EESSI_SOFTWARE_SUBDIR_OVERRIDE='${EESSI_SOFTWARE_SUBDIR_OVERRIDE}'"

# determine accelerator target (if any) from .architecture in ${JOB_CFG_FILE}
export EESSI_ACCELERATOR_TARGET=$(cfg_get_value "architecture" "accelerator")
echo "bot/test.sh: EESSI_ACCELERATOR_TARGET='${EESSI_ACCELERATOR_TARGET}'"

# get EESSI_OS_TYPE from .architecture.os_type in ${JOB_CFG_FILE} (default: linux)
EESSI_OS_TYPE=$(cfg_get_value "architecture" "os_type")
export EESSI_OS_TYPE=${EESSI_OS_TYPE:-linux}
Expand Down Expand Up @@ -219,6 +223,9 @@ declare -a TEST_SUITE_ARGS=()
if [[ ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} =~ .*/generic$ ]]; then
TEST_SUITE_ARGS+=("--generic")
fi
if [[ ${SHARED_FS_PATH} ]]; then
TEST_SUITE_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}")
fi
# [[ ! -z ${BUILD_LOGS_DIR} ]] && TEST_SUITE_ARGS+=("--build-logs-dir" "${BUILD_LOGS_DIR}")
# [[ ! -z ${SHARED_FS_PATH} ]] && TEST_SUITE_ARGS+=("--shared-fs-path" "${SHARED_FS_PATH}")

Expand Down
57 changes: 0 additions & 57 deletions reframe_config_bot.py.tmpl

This file was deleted.

83 changes: 40 additions & 43 deletions test_suite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -132,56 +132,42 @@ else
fi

# Configure ReFrame, see https://www.eessi.io/docs/test-suite/installation-configuration
export RFM_CONFIG_FILES=$TOPDIR/reframe_config_bot.py
export RFM_CONFIG_FILE_TEMPLATE=$TOPDIR/reframe_config_bot.py.tmpl
# RFM_CONFIG_FILES _has_ to be set by the site hosting the bot, so that it knows where to find the ReFrame
# config file that matches the bot config. See https://gitlab.com/eessi/support/-/issues/114#note_2293660921
if [ -z "$RFM_CONFIG_FILES" ]; then
if [ -z "${shared_fs_path}" ]; then
fatal_error "Environment variable 'shared_fs_path' was expected, but was not set"
fi
# Try to find a config file at $shared_fs_path/reframe_config.py
export RFM_CONFIG_FILES="${shared_fs_path}/reframe_config.py"
if [ ! -f "${RFM_CONFIG_FILES}" ]; then
# If we haven't found the ReFrame config, print an informative error
err_msg="Please put a ReFrame configuration file in ${shared_fs_path}/reframe_config.py"
err_msg="${err_msg} or set RFM_CONFIG_FILES in the environment of this bot instance to point to a valid"
err_msg="${err_msg} ReFrame configuration file that matches the bot config."
err_msg="${err_msg} For more information, see https://gitlab.com/eessi/support/-/issues/114#note_2293660921"
fatal_error "${err_msg}"
fi
fi
export RFM_CHECK_SEARCH_PATH=$TESTSUITEPREFIX/eessi/testsuite/tests
export RFM_CHECK_SEARCH_RECURSIVE=1
export RFM_PREFIX=$PWD/reframe_runs

echo "Configured reframe with the following environment variables:"
env | grep "RFM_"

# The /sys inside the container is not the same as the /sys of the host
# We want to extract the memory limit from the cgroup on the host (which is typically set by SLURM).
# Thus, bot/test.sh bind-mounts the host's /sys/fs/cgroup into /hostsys/fs/cgroup
# and that's the prefix we use to extract the memory limit from
cgroup_v1_mem_limit="/hostsys/fs/cgroup/memory/$(</proc/self/cpuset)/memory.limit_in_bytes"
cgroup_v2_mem_limit="/hostsys/fs/cgroup/$(</proc/self/cpuset)/memory.max"
if [ -f "$cgroup_v1_mem_limit" ]; then
echo "Getting memory limit from file $cgroup_v1_mem_limit"
cgroup_mem_bytes=$(cat "$cgroup_v1_mem_limit")
elif [ -f "$cgroup_v2_mem_limit" ]; then
echo "Getting memory limit from file $cgroup_v2_mem_limit"
cgroup_mem_bytes=$(cat "$cgroup_v2_mem_limit")
if [ "$cgroup_mem_bytes" = 'max' ]; then
# In cgroupsv2, the memory.max file may contain 'max', meaning the group can use the full system memory
# Here, we get the system memory from /proc/meminfo. Units are supposedly always in kb, but lets match them too
cgroup_mem_kilobytes=$(grep -oP 'MemTotal:\s+\K\d+(?=\s+kB)' /proc/meminfo)
if [[ $? -ne 0 ]] || [[ -z "$cgroup_mem_kilobytes" ]]; then
fatal_error "Failed to get memory limit from /proc/meminfo"
fi
cgroup_mem_bytes=$(("$cgroup_mem_kilobytes"*1024))
fi
else
fatal_error "Both files ${cgroup_v1_mem_limit} and ${cgroup_v2_mem_limit} couldn't be found. Failed to get the memory limit from the current cgroup"
# Get the correct partition name
REFRAME_PARTITION_NAME=${EESSI_SOFTWARE_SUBDIR//\//_}
if [ ! -z "$EESSI_ACCELERATOR_TARGET" ]; then
REFRAME_PARTITION_NAME=${REFRAME_PARTITION_NAME}_${EESSI_ACCELERATOR_TARGET//\//_}
fi
if [[ $? -eq 0 ]]; then
# Convert to MiB
cgroup_mem_mib=$(("$cgroup_mem_bytes"/(1024*1024)))
else
fatal_error "Failed to get the memory limit in bytes from the current cgroup"
fi
echo "Detected available memory: ${cgroup_mem_mib} MiB"
echo "Constructed partition name based on EESSI_SOFTWARE_SUBDIR and EESSI_ACCELERATOR_TARGET: ${REFRAME_PARTITION_NAME}"

cp ${RFM_CONFIG_FILE_TEMPLATE} ${RFM_CONFIG_FILES}
echo "Replacing memory limit in the ReFrame config file with the detected CGROUP memory limit: ${cgroup_mem_mib} MiB"
sed -i "s/__MEM_PER_NODE__/${cgroup_mem_mib}/g" $RFM_CONFIG_FILES
RFM_PARTITION="${SLURM_JOB_PARTITION}"
echo "Replacing partition name in the template ReFrame config file: ${RFM_PARTITION}"
sed -i "s/__RFM_PARTITION__/${RFM_PARTITION}/g" $RFM_CONFIG_FILES
# Set the reframe system name, including partition
export RFM_SYSTEM="BotBuildTests:${REFRAME_PARTITION_NAME}"

echo "Configured reframe with the following environment variables:"
env | grep "RFM_"

# Make debugging easier by printing the final config file:
echo "Final config file (after replacements):"
echo "ReFrame config file used:"
cat "${RFM_CONFIG_FILES}"

# Workaround for https://github.com/EESSI/software-layer/pull/467#issuecomment-1973341966
Expand Down Expand Up @@ -217,7 +203,18 @@ else
fatal_error "Failed to extract names of tests to run: ${REFRAME_NAME_ARGS}"
exit ${test_selection_exit_code}
fi
export REFRAME_ARGS="--tag CI --tag 1_node --nocolor ${REFRAME_NAME_ARGS}"
# Allow people deploying the bot to overrwide this
if [ -z "$REFRAME_SCALE_TAG" ]; then
REFRAME_SCALE_TAG="--tag 1_node"
fi
if [ -z "$REFRAME_CI_TAG" ]; then
REFRAME_CI_TAG="--tag CI"
fi
# Allow bot-deployers to add additional args through the environment
if [ -z "$REFRAME_ADDITIONAL_ARGS" ]; then
REFRAME_ADDITIONAL_ARGS=""
fi
export REFRAME_ARGS="${REFRAME_CI_TAG} ${REFRAME_SCALE_TAG} ${REFRAME_ADDITIONAL_ARGS} --nocolor ${REFRAME_NAME_ARGS}"

# List the tests we want to run
echo "Listing tests: reframe ${REFRAME_ARGS} --list"
Expand Down

0 comments on commit 902a20e

Please sign in to comment.