diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index ca6fed71d5..201ae6f8cb 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -17,11 +17,6 @@ display_help() { echo " --skip-cuda-install - disable installing a full CUDA SDK in the host_injections prefix (e.g. in CI)" } -# Function to check if a command exists -function command_exists() { - command -v "$1" >/dev/null 2>&1 -} - function copy_build_log() { # copy specified build log to specified directory, with some context added build_log=${1} @@ -159,8 +154,13 @@ fi # are: # - .lmod/lmodrc.lua # - .lmod/SitePackage.lua +# # We run scripts to create them if they don't exist or if the scripts have been # changed in the PR. +# +# (TODO do we need to change the path if we have sub-directories for +# accelerators? And would we need different scripts for creating lua files under +# different directories?) # Set base directory for software and for Lmod config files _eessi_software_path=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE} @@ -256,6 +256,12 @@ if command_exists "nvidia-smi"; then ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh fi +# Install extra software that is needed (e.g., for providing a custom ctypes +# library when needed) +cd ${TOPDIR}/scripts/extra +./install_extra_packages.sh --temp-dir /tmp/temp --easystack eessi-2023.06-extra-packages.yml +cd ${TOPDIR} + # use PR patch file to determine in which easystack files stuff was added changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing') if [ -z "${changed_easystacks}" ]; then diff --git a/_replace_files.txt b/_replace_files.txt new file mode 100644 index 0000000000..bb29d2ea26 --- /dev/null +++ b/_replace_files.txt @@ -0,0 +1 @@ +__EESSI_SOFTWARE_PATH__/Python/3.11.3-GCCcore-12.3.0/lib/python3.11/ctypes/util.py replacement_files/ctypes/util.py diff --git a/bot/build.sh b/bot/build.sh index 6e835cb6aa..9a48b5df16 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -266,15 +266,69 @@ mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR} BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}") BUILD_STEP_ARGS+=("--storage" "${STORAGE}") # add options required to handle NVIDIA support -BUILD_STEP_ARGS+=("--nvidia" "all") +if command_exists "nvidia-smi"; then + echo "Command 'nvidia-smi' found, using available GPU" + BUILD_STEP_ARGS+=("--nvidia" "all") +else + echo "No 'nvidia-smi' found, no available GPU but allowing overriding this check" + BUILD_STEP_ARGS+=("--nvidia" "install") +fi +# Retain location for host injections so we don't reinstall CUDA +# (Always need to run the driver installation as available driver may change) if [[ ! -z ${SHARED_FS_PATH} ]]; then BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") fi -# Don't run the Lmod GPU driver check when doing builds (may not have a GPU, and it's not relevant for vanilla builds anyway) -echo "EESSI_OVERRIDE_GPU_CHECK='${EESSI_OVERRIDE_GPU_CHECK}'" -export EESSI_OVERRIDE_GPU_CHECK=1 -echo "EESSI_OVERRIDE_GPU_CHECK='${EESSI_OVERRIDE_GPU_CHECK}'" +# replace some files using lower_dirs mechanism +# - read replacements from replace_files.txt +# each line has the format __EESSI_SOFTWARE_PATH__/some_path relative_path +# /cvmfs/repo_name/versions/repo_version/software/os_type/software_dir/some_path +# - for each replacement do +# - check if the target exists in the repository +# - create directory for replacement +# - copy target into directory +rm -f ADD_LOWER_DIRS +if [[ -f "replace_files.txt" ]]; then + LOWER_DIRS="${STORAGE}/lower_dirs" + mkdir -p "${LOWER_DIRS}" + echo "LOWER_DIRS: '${LOWER_DIRS}'" + + repo_name=${EESSI_CVMFS_REPO_OVERRIDE} + repo_version=${EESSI_VERSION_OVERRIDE} + os_type=${EESSI_OS_TYPE} + software_subdir_override=${EESSI_SOFTWARE_SUBDIR_OVERRIDE} + software_path="/cvmfs/${repo_name}/versions/${repo_version}/software/${os_type}/${software_subdir_override}/software" + + cat replace_files.txt | while read replace_spec; do + echo "replace_spec: '${replace_spec}'" + target=$(echo "${replace_spec}" | cut -f1 -d' ') + target_full_path=$(echo "${target}" | sed -e "s+__EESSI_SOFTWARE_PATH__+${software_path}+") + replace=$(echo "${replace_spec}" | cut -f2 -d' ') + echo "target: '${target}'" + echo "target_full_path: '${target_full_path}'" + echo "replace: '${replace}'" + if [[ -f ${replace} ]]; then + echo "replacement file exists" + target_lower_path=$(echo "${target_full_path}" | cut -f4- -d/) + echo "target_lower_path: '${target_lower_path}'" + target_lower_dir=$(dirname ${target_lower_path}) + echo "target_lower_dir: '${target_lower_dir}'" + mkdir -p ${LOWER_DIRS}/${target_lower_dir} + cp -a ${replace} ${LOWER_DIRS}/${target_lower_dir}/. + ls -lisa ${LOWER_DIRS}/${target_lower_dir} + touch ADD_LOWER_DIRS + else + echo "replacement file does NOT exist; ignoring replacement" + fi + done +fi +echo "LOWER_DIRS: '${LOWER_DIRS}'" +if [[ -f ADD_LOWER_DIRS ]]; then + BUILD_STEP_ARGS+=("--lower-dirs" "${LOWER_DIRS}") + echo "Added '--lower-dirs ${LOWER_DIRS}' to build step arguments" +else + echo "Nothing to be added for LOWER_DIRS" +fi # create tmp file for output of build step build_outerr=$(mktemp build.outerr.XXXX) diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py index 20c4098b8f..585c86ba8f 100755 --- a/create_lmodsitepackage.py +++ b/create_lmodsitepackage.py @@ -120,6 +120,7 @@ -- simpleName is a module in packagesList -- get the full host_injections path local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') + -- build final path where the software should be installed local packageEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" local packageDirExists = isDir(packageEasyBuildDir) diff --git a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml index 4f31c4dd08..089e4b8278 100644 --- a/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml +++ b/easystacks/pilot.nessi.no/2023.06/eessi-2023.06-eb-4.9.1-2023a.yml @@ -55,3 +55,12 @@ easyconfigs: - PyTorch-2.1.2-foss-2023a-CUDA-12.1.1.eb: options: cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0 + #- PyTorch-bundle-2.1.2-foss-2023a-CUDA-12.1.1.eb: + # # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20484 + # options: + # from-pr: 20484 + # cuda-compute-capabilities: 6.0,6.1,7.0,7.5,8.0,8.6,8.9,9.0 + - SentencePiece-0.2.0-GCC-12.3.0.eb: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/20484 + options: + from-pr: 20484 diff --git a/eb_hooks.py b/eb_hooks.py index e4a957acdc..e7999ddb16 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -5,6 +5,7 @@ import easybuild.tools.environment as env from easybuild.easyblocks.generic.configuremake import obtain_config_guess +from easybuild.easyblocks.python import EXTS_FILTER_PYTHON_PACKAGES from easybuild.framework.easyconfig.constants import EASYCONFIG_CONSTANTS from easybuild.tools.build_log import EasyBuildError, print_msg from easybuild.tools.config import build_option, update_build_option @@ -311,6 +312,27 @@ def parse_hook_qt5_check_qtwebengine_disable(ec, eprefix): raise EasyBuildError("Qt5-specific hook triggered for non-Qt5 easyconfig?!") +def parse_hook_sentencepiece_disable_tcmalloc_aarch64(ec, eprefix): + """ + Disable using TCMalloc + """ + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + if ec.name == 'SentencePiece' and ec.version in ['0.2.0'] and cpu_target == CPU_TARGET_AARCH64_GENERIC: + # find right setting to change/update + print_msg("parse_hook for SentencePiece: OLD '%s'", ec['components']) + new_components = [] + for item in ec['components']: + if item[2]['easyblock'] == 'CMakeMake': + new_item = item[2] + new_item['configopts'] = '-DSPM_ENABLE_TCMALLOC=OFF' + new_components.append((item[0], item[1], new_item)) + else: + new_components.append(item) + ec['components'] = new_components + print_msg("parse_hook for SentencePiece: NEW '%s'", ec['components']) + else: + raise EasyBuildError("SentencePiece-specific hook triggered for non-SentencePiece easyconfig?!") + def parse_hook_ucx_eprefix(ec, eprefix): """Make UCX aware of compatibility layer via additional configuration options.""" if ec.name == 'UCX': @@ -349,6 +371,30 @@ def parse_hook_lammps_remove_deps_for_CI_aarch64(ec, *args, **kwargs): raise EasyBuildError("LAMMPS-specific hook triggered for non-LAMMPS easyconfig?!") +def parse_hook_librosa_custom_ctypes(ec, *args, **kwargs): + """ + Add exts_filter to soundfile extension in exts_list + """ + if ec.name == 'librosa' and ec.version in ('0.10.1',): + ec_dict = ec.asdict() + eessi_software_path = get_eessi_envvar('EESSI_SOFTWARE_PATH') + custom_ctypes_path = os.path.join(eessi_software_path, "software", "custom_ctypes", "1.2") + ebpythonprefixes = "EBPYTHONPREFIXES=%s" % custom_ctypes_path + exts_list_new = [] + for item in ec_dict['exts_list']: + if item[0] == 'soundfile': + ext_dict = item[2] + ext_dict['exts_filter'] = (ebpythonprefixes + ' ' + EXTS_FILTER_PYTHON_PACKAGES[0], + EXTS_FILTER_PYTHON_PACKAGES[1]) + exts_list_new.append((item[0], item[1], ext_dict)) + else: + exts_list_new.append(item) + ec['exts_list'] = exts_list_new + print_msg("New exts_list: '%s'", ec['exts_list']) + else: + raise EasyBuildError("librosa/0.10.1-specific hook triggered for non-librosa/0.10.1 easyconfig?!") + + def pre_prepare_hook_highway_handle_test_compilation_issues(self, *args, **kwargs): """ Solve issues with compiling or running the tests on both @@ -646,6 +692,12 @@ def pre_single_extension_testthat(ext, *args, **kwargs): ext.cfg['preinstallopts'] = "sed -i 's/SIGSTKSZ/32768/g' inst/include/testthat/vendor/catch.h && " +def pre_sanitycheck_hook(self, *args, **kwargs): + """Main pre-sanity-check hook: trigger custom functions based on software name.""" + if self.name in PRE_SANITYCHECK_HOOKS: + PRE_SANITYCHECK_HOOKS[self.name](self, *args, **kwargs) + + def post_sanitycheck_hook(self, *args, **kwargs): """Main post-sanity-check hook: trigger custom functions based on software name.""" if self.name in POST_SANITYCHECK_HOOKS: @@ -693,6 +745,23 @@ def replace_non_distributable_files_with_symlinks(log, install_dir, package, all symlink(host_inj_path, full_path) +def pre_sanitycheck_sentence_piece_ld_preload_aarch64(self, *args, **kwargs): + """ + Use LD_PRELOAD before sanity check to work around + error 'cannot allocate memory in static TLS block' + """ + return + cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') + + if self.name == 'SentencePiece' and self.version in ['0.2.0'] and cpu_target == CPU_TARGET_AARCH64_GENERIC: + ebrootgperftools = os.getenv('EBROOTGPERFTOOLS') + lib_tcmalloc_minimal = os.path.join(ebrootgperftools, 'lib64', 'libtcmalloc_minimal.so') + env.setvar('LD_PRELOAD', lib_tcmalloc_minimal) + print_msg("Set LD_PRELOAD env var to '%s'", os.getenv('LD_PRELOAD')) + else: + raise EasyBuildError("SentencePiece-specific hook triggered for non-SentencePiece easyconfig?!") + + def post_sanitycheck_cuda(self, *args, **kwargs): """ Remove files from CUDA installation that we are not allowed to ship, @@ -852,6 +921,35 @@ def inject_gpu_property(ec): return ec +def pre_module_hook(self, *args, **kwargs): + """Main pre-module-check hook: trigger custom functions based on software name.""" + if self.name in PRE_MODULE_HOOKS: + PRE_MODULE_HOOKS[self.name](self, *args, **kwargs) + + +def pre_module_hook_librosa_augment_modluafooter(self, *args, **kwargs): + """ + Add EBPYTHONPREFIXES to modluafooter + """ + if self.name == 'librosa' and self.version == '0.10.1': + eessi_software_path = get_eessi_envvar('EESSI_SOFTWARE_PATH') + custom_ctypes_path = os.path.join(eessi_software_path, "software", "custom_ctypes", "1.2") + key = 'modluafooter' + values = ['prepend_path("EBPYTHONPREFIXES","%s")' % (custom_ctypes_path)] + print_msg("Adding '%s' to modluafooter", values[0]) + if not key in self.cfg: + self.cfg[key] = '\n'.join(values) + else: + new_value = self.cfg[key] + for value in values: + if not value in new_value: + new_value = '\n'.join([new_value, value]) + self.cfg[key] = new_value + print_msg("Full modluafooter is '%s'", self.cfg[key]) + else: + raise EasyBuildError("librosa/0.10.1-specific hook triggered for non-librosa/0.10.1 easyconfig?!") + + PARSE_HOOKS = { 'casacore': parse_hook_casacore_disable_vectorize, 'CGAL': parse_hook_cgal_toolchainopts_precise, @@ -859,10 +957,12 @@ def inject_gpu_property(ec): 'GPAW': parse_hook_gpaw_harcoded_path, 'ImageMagick': parse_hook_imagemagick_add_dependency, 'LAMMPS': parse_hook_lammps_remove_deps_for_CI_aarch64, + 'librosa': parse_hook_librosa_custom_ctypes, 'OpenBLAS': parse_hook_openblas_relax_lapack_tests_num_errors, 'Pillow-SIMD' : parse_hook_Pillow_SIMD_harcoded_paths, 'pybind11': parse_hook_pybind11_replace_catch2, 'Qt5': parse_hook_qt5_check_qtwebengine_disable, + 'SentencePiece': parse_hook_sentencepiece_disable_tcmalloc_aarch64, 'UCX': parse_hook_ucx_eprefix, } @@ -904,8 +1004,16 @@ def inject_gpu_property(ec): 'numpy': post_single_extension_numpy, } +PRE_SANITYCHECK_HOOKS = { + 'SentencePiece': pre_sanitycheck_sentence_piece_ld_preload_aarch64, +} + POST_SANITYCHECK_HOOKS = { 'CUDA': post_sanitycheck_cuda, 'cuDNN': post_sanitycheck_cudnn, 'cuTENSOR': post_sanitycheck_cutensor, } + +PRE_MODULE_HOOKS = { + 'librosa': pre_module_hook_librosa_augment_modluafooter, +} diff --git a/eessi_container.sh b/eessi_container.sh index a9405b6d8e..e6bb13cbe7 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -477,6 +477,11 @@ if [[ ${SETUP_NVIDIA} -eq 1 ]]; then mkdir -p ${EESSI_USR_LOCAL_CUDA} BIND_PATHS="${BIND_PATHS},${EESSI_VAR_LOG}:/var/log,${EESSI_USR_LOCAL_CUDA}:/usr/local/cuda" [[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}" + if [[ "${NVIDIA_MODE}" == "install" ]] ; then + # No GPU so we need to "trick" Lmod to allow us to load CUDA modules even without a CUDA driver + # (this variable means EESSI_OVERRIDE_GPU_CHECK=1 will be set inside the container) + export SINGULARITYENV_EESSI_OVERRIDE_GPU_CHECK=1 + fi fi fi diff --git a/replacement_files/ctypes/util.py b/replacement_files/ctypes/util.py new file mode 100644 index 0000000000..b4cb4becb0 --- /dev/null +++ b/replacement_files/ctypes/util.py @@ -0,0 +1,379 @@ +import os +import shutil +import subprocess +import sys + +# find_library(name) returns the pathname of a library, or None. +if os.name == "nt": + + def _get_build_version(): + """Return the version of MSVC that was used to build Python. + + For Python 2.3 and up, the version number is included in + sys.version. For earlier versions, assume the compiler is MSVC 6. + """ + # This function was copied from Lib/distutils/msvccompiler.py + prefix = "MSC v." + i = sys.version.find(prefix) + if i == -1: + return 6 + i = i + len(prefix) + s, rest = sys.version[i:].split(" ", 1) + majorVersion = int(s[:-2]) - 6 + if majorVersion >= 13: + majorVersion += 1 + minorVersion = int(s[2:3]) / 10.0 + # I don't think paths are affected by minor version in version 6 + if majorVersion == 6: + minorVersion = 0 + if majorVersion >= 6: + return majorVersion + minorVersion + # else we don't know what version of the compiler this is + return None + + def find_msvcrt(): + """Return the name of the VC runtime dll""" + version = _get_build_version() + if version is None: + # better be safe than sorry + return None + if version <= 6: + clibname = 'msvcrt' + elif version <= 13: + clibname = 'msvcr%d' % (version * 10) + else: + # CRT is no longer directly loadable. See issue23606 for the + # discussion about alternative approaches. + return None + + # If python was built with in debug mode + import importlib.machinery + if '_d.pyd' in importlib.machinery.EXTENSION_SUFFIXES: + clibname += 'd' + return clibname+'.dll' + + def find_library(name): + if name in ('c', 'm'): + return find_msvcrt() + # See MSDN for the REAL search order. + for directory in os.environ['PATH'].split(os.pathsep): + fname = os.path.join(directory, name) + if os.path.isfile(fname): + return fname + if fname.lower().endswith(".dll"): + continue + fname = fname + ".dll" + if os.path.isfile(fname): + return fname + return None + +elif os.name == "posix" and sys.platform == "darwin": + from ctypes.macholib.dyld import dyld_find as _dyld_find + def find_library(name): + possible = ['lib%s.dylib' % name, + '%s.dylib' % name, + '%s.framework/%s' % (name, name)] + for name in possible: + try: + return _dyld_find(name) + except ValueError: + continue + return None + +elif sys.platform.startswith("aix"): + # AIX has two styles of storing shared libraries + # GNU auto_tools refer to these as svr4 and aix + # svr4 (System V Release 4) is a regular file, often with .so as suffix + # AIX style uses an archive (suffix .a) with members (e.g., shr.o, libssl.so) + # see issue#26439 and _aix.py for more details + + from ctypes._aix import find_library + +elif os.name == "posix": + # Andreas Degert's find functions, using gcc, /sbin/ldconfig, objdump + import re, tempfile + + def _is_elf(filename): + "Return True if the given file is an ELF file" + elf_header = b'\x7fELF' + with open(filename, 'br') as thefile: + return thefile.read(4) == elf_header + + def _findLib_gcc(name): + # Run GCC's linker with the -t (aka --trace) option and examine the + # library name it prints out. The GCC command will fail because we + # haven't supplied a proper program with main(), but that does not + # matter. + expr = os.fsencode(r'[^\(\)\s]*lib%s\.[^\(\)\s]*' % re.escape(name)) + + c_compiler = shutil.which('gcc') + if not c_compiler: + c_compiler = shutil.which('cc') + if not c_compiler: + # No C compiler available, give up + return None + + temp = tempfile.NamedTemporaryFile() + try: + args = [c_compiler, '-Wl,-t', '-o', temp.name, '-l' + name] + + env = dict(os.environ) + env['LC_ALL'] = 'C' + env['LANG'] = 'C' + try: + proc = subprocess.Popen(args, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + env=env) + except OSError: # E.g. bad executable + return None + with proc: + trace = proc.stdout.read() + finally: + try: + temp.close() + except FileNotFoundError: + # Raised if the file was already removed, which is the normal + # behaviour of GCC if linking fails + pass + res = re.findall(expr, trace) + if not res: + return None + + for file in res: + # Check if the given file is an elf file: gcc can report + # some files that are linker scripts and not actual + # shared objects. See bpo-41976 for more details + if not _is_elf(file): + continue + return os.fsdecode(file) + + + if sys.platform == "sunos5": + # use /usr/ccs/bin/dump on solaris + def _get_soname(f): + if not f: + return None + + try: + proc = subprocess.Popen(("/usr/ccs/bin/dump", "-Lpv", f), + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + except OSError: # E.g. command not found + return None + with proc: + data = proc.stdout.read() + res = re.search(br'\[.*\]\sSONAME\s+([^\s]+)', data) + if not res: + return None + return os.fsdecode(res.group(1)) + else: + def _get_soname(f): + # assuming GNU binutils / ELF + if not f: + return None + objdump = shutil.which('objdump') + if not objdump: + # objdump is not available, give up + return None + + try: + proc = subprocess.Popen((objdump, '-p', '-j', '.dynamic', f), + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + except OSError: # E.g. bad executable + return None + with proc: + dump = proc.stdout.read() + res = re.search(br'\sSONAME\s+([^\s]+)', dump) + if not res: + return None + return os.fsdecode(res.group(1)) + + if sys.platform.startswith(("freebsd", "openbsd", "dragonfly")): + + def _num_version(libname): + # "libxyz.so.MAJOR.MINOR" => [ MAJOR, MINOR ] + parts = libname.split(b".") + nums = [] + try: + while parts: + nums.insert(0, int(parts.pop())) + except ValueError: + pass + return nums or [sys.maxsize] + + def find_library(name): + ename = re.escape(name) + expr = r':-l%s\.\S+ => \S*/(lib%s\.\S+)' % (ename, ename) + expr = os.fsencode(expr) + + try: + proc = subprocess.Popen(('/sbin/ldconfig', '-r'), + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + except OSError: # E.g. command not found + data = b'' + else: + with proc: + data = proc.stdout.read() + + res = re.findall(expr, data) + if not res: + return _get_soname(_findLib_gcc(name)) + res.sort(key=_num_version) + return os.fsdecode(res[-1]) + + elif sys.platform == "sunos5": + + def _findLib_crle(name, is64): + if not os.path.exists('/usr/bin/crle'): + return None + + env = dict(os.environ) + env['LC_ALL'] = 'C' + + if is64: + args = ('/usr/bin/crle', '-64') + else: + args = ('/usr/bin/crle',) + + paths = None + try: + proc = subprocess.Popen(args, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + env=env) + except OSError: # E.g. bad executable + return None + with proc: + for line in proc.stdout: + line = line.strip() + if line.startswith(b'Default Library Path (ELF):'): + paths = os.fsdecode(line).split()[4] + + if not paths: + return None + + for dir in paths.split(":"): + libfile = os.path.join(dir, "lib%s.so" % name) + if os.path.exists(libfile): + return libfile + + return None + + def find_library(name, is64 = False): + return _get_soname(_findLib_crle(name, is64) or _findLib_gcc(name)) + + else: + + def _findSoname_ldconfig(name): + import struct + if struct.calcsize('l') == 4: + machine = os.uname().machine + '-32' + else: + machine = os.uname().machine + '-64' + mach_map = { + 'x86_64-64': 'libc6,x86-64', + 'ppc64-64': 'libc6,64bit', + 'sparc64-64': 'libc6,64bit', + 's390x-64': 'libc6,64bit', + 'ia64-64': 'libc6,IA-64', + } + abi_type = mach_map.get(machine, 'libc6') + + # XXX assuming GLIBC's ldconfig (with option -p) + regex = r'\s+(lib%s\.[^\s]+)\s+\(%s' + regex = os.fsencode(regex % (re.escape(name), abi_type)) + try: + with subprocess.Popen(['/sbin/ldconfig', '-p'], + stdin=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + stdout=subprocess.PIPE, + env={'LC_ALL': 'C', 'LANG': 'C'}) as p: + res = re.search(regex, p.stdout.read()) + if res: + return os.fsdecode(res.group(1)) + except OSError: + pass + + def _findLib_ld(name): + # See issue #9998 for why this is needed + expr = r'[^\(\)\s]*lib%s\.[^\(\)\s]*' % re.escape(name) + cmd = ['ld', '-t'] + libpath = os.environ.get('LD_LIBRARY_PATH') + if libpath: + for d in libpath.split(':'): + cmd.extend(['-L', d]) + cmd.extend(['-o', os.devnull, '-l%s' % name]) + result = None + try: + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True) + out, _ = p.communicate() + res = re.findall(expr, os.fsdecode(out)) + for file in res: + # Check if the given file is an elf file: gcc can report + # some files that are linker scripts and not actual + # shared objects. See bpo-41976 for more details + if not _is_elf(file): + continue + return os.fsdecode(file) + except Exception: + pass # result will be None + return result + + def find_library(name): + # See issue #9998 + lib = _findLib_gcc(name) + # return absolute path + return _findSoname_ldconfig(name) or \ + os.path.join(os.path.dirname(lib), _get_soname(lib)) or \ + _get_soname(_findLib_gcc(name)) or _get_soname(_findLib_ld(name)) + +################################################################ +# test code + +def test(): + from ctypes import cdll + if os.name == "nt": + print(cdll.msvcrt) + print(cdll.load("msvcrt")) + print(find_library("msvcrt")) + + if os.name == "posix": + # find and load_version + print(find_library("m")) + print(find_library("c")) + print(find_library("bz2")) + + # load + if sys.platform == "darwin": + print(cdll.LoadLibrary("libm.dylib")) + print(cdll.LoadLibrary("libcrypto.dylib")) + print(cdll.LoadLibrary("libSystem.dylib")) + print(cdll.LoadLibrary("System.framework/System")) + # issue-26439 - fix broken test call for AIX + elif sys.platform.startswith("aix"): + from ctypes import CDLL + if sys.maxsize < 2**32: + print(f"Using CDLL(name, os.RTLD_MEMBER): {CDLL('libc.a(shr.o)', os.RTLD_MEMBER)}") + print(f"Using cdll.LoadLibrary(): {cdll.LoadLibrary('libc.a(shr.o)')}") + # librpm.so is only available as 32-bit shared library + print(find_library("rpm")) + print(cdll.LoadLibrary("librpm.so")) + else: + print(f"Using CDLL(name, os.RTLD_MEMBER): {CDLL('libc.a(shr_64.o)', os.RTLD_MEMBER)}") + print(f"Using cdll.LoadLibrary(): {cdll.LoadLibrary('libc.a(shr_64.o)')}") + print(f"crypt\t:: {find_library('crypt')}") + print(f"crypt\t:: {cdll.LoadLibrary(find_library('crypt'))}") + print(f"crypto\t:: {find_library('crypto')}") + print(f"crypto\t:: {cdll.LoadLibrary(find_library('crypto'))}") + else: + print(cdll.LoadLibrary("libm.so")) + print(cdll.LoadLibrary("libcrypt.so")) + print(find_library("crypt")) + +if __name__ == "__main__": + test() diff --git a/run_in_compat_layer_env.sh b/run_in_compat_layer_env.sh index 393956a0c1..cc2cdae034 100755 --- a/run_in_compat_layer_env.sh +++ b/run_in_compat_layer_env.sh @@ -26,12 +26,12 @@ fi if [ ! -z ${EESSI_VERSION_OVERRIDE} ]; then INPUT="export EESSI_VERSION_OVERRIDE=${EESSI_VERSION_OVERRIDE}; ${INPUT}" fi -if [ ! -z ${http_proxy} ]; then - INPUT="export http_proxy=${http_proxy}; ${INPUT}" -fi if [ ! -z ${EESSI_OVERRIDE_GPU_CHECK} ]; then INPUT="export EESSI_OVERRIDE_GPU_CHECK=${EESSI_OVERRIDE_GPU_CHECK}; ${INPUT}" fi +if [ ! -z ${http_proxy} ]; then + INPUT="export http_proxy=${http_proxy}; ${INPUT}" +fi if [ ! -z ${https_proxy} ]; then INPUT="export https_proxy=${https_proxy}; ${INPUT}" fi diff --git a/scripts/extra/custom_ctypes-1.2.eb b/scripts/extra/custom_ctypes-1.2.eb new file mode 100644 index 0000000000..35be6dcc41 --- /dev/null +++ b/scripts/extra/custom_ctypes-1.2.eb @@ -0,0 +1,29 @@ +## +# This is a contribution from the NESSI project +# Homepage: https://documentation.sigma2.no +# +# Authors:: Thomas Roeblitz +# License:: GPL-2.0-only +# +## + +easyblock = 'Tarball' + +name = 'custom_ctypes' +version = '1.2' + +homepage = 'https://github.com/ComputeCanada/custom_ctypes' +description = """custum_ctypes is a small Python package to fix the discovery of libraries with Python's ctypes module. It changes the behavior of find_library to return absolute paths to shared objects rather than just the names.""" + +toolchain = SYSTEM + +source_urls = ['https://github.com/ComputeCanada/custom_ctypes/archive/refs/tags'] +sources = ['%(version)s.tar.gz'] +checksums = ['3b30ce633c6a329169f2b10ff24b8eaaeef3fa208a66cdacdb53c22f02a88d9b'] + +sanity_check_paths = { + 'files': ['README.md'], + 'dirs': ['lib'], +} + +moduleclass = 'lib' diff --git a/scripts/extra/eessi-2023.06-extra-packages.yml b/scripts/extra/eessi-2023.06-extra-packages.yml new file mode 100644 index 0000000000..22670ec7a3 --- /dev/null +++ b/scripts/extra/eessi-2023.06-extra-packages.yml @@ -0,0 +1,2 @@ +easyconfigs: + - custom_ctypes-1.2.eb diff --git a/scripts/extra/install_extra_packages.sh b/scripts/extra/install_extra_packages.sh new file mode 100755 index 0000000000..ccd2890864 --- /dev/null +++ b/scripts/extra/install_extra_packages.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash + +# This script can be used to install extra packages under ${EESSI_SOFTWARE_PATH} + +# some logging +echo ">>> Running ${BASH_SOURCE}" + +# Initialise our bash functions +TOPDIR=$(dirname $(realpath ${BASH_SOURCE})) +source "${TOPDIR}"/../utils.sh + +# Function to display help message +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " --help Display this help message" + echo " -e, --easystack EASYSTACKFILE Easystack file which specifies easyconfigs to be installed." + echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" + echo " storage during the installation" +} + +# Initialize variables +TEMP_DIR= +EASYSTACK_FILE= + +# Parse command-line options +while [[ $# -gt 0 ]]; do + case "$1" in + --help) + show_help + exit 0 + ;; + -e|--easystack) + if [ -n "$2" ]; then + EASYSTACK_FILE="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + -t|--temp-dir) + if [ -n "$2" ]; then + TEMP_DIR="$2" + shift 2 + else + echo "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + *) + show_help + fatal_error "Error: Unknown option: $1" + ;; + esac +done + +if [[ -z ${EASYSTACK_FILE} ]]; then + show_help + fatal_error "Error: need to specify easystack file" +fi + +# Make sure NESSI is initialised +check_eessi_initialised + +# As an installation location just use $EESSI_SOFTWARE_PATH +export NESSI_CVMFS_INSTALL=${EESSI_SOFTWARE_PATH} + +# we need a directory we can use for temporary storage +if [[ -z "${TEMP_DIR}" ]]; then + tmpdir=$(mktemp -d) +else + mkdir -p ${TEMP_DIR} + tmpdir=$(mktemp -d --tmpdir=${TEMP_DIR} extra.XXX) + if [[ ! -d "$tmpdir" ]] ; then + fatal_error "Could not create directory ${tmpdir}" + fi +fi +echo "Created temporary directory '${tmpdir}'" +export WORKING_DIR=${tmpdir} + +# load EasyBuild +ml EasyBuild + +# load NESSI-extend/2023.06-easybuild +ml NESSI-extend/2023.06-easybuild + +eb --show-config + +eb --easystack ${EASYSTACK_FILE} --robot + +# clean up tmpdir +rm -rf "${tmpdir}" diff --git a/scripts/gpu_support/nvidia/copy_nvidia_host_libraries.sh b/scripts/gpu_support/nvidia/copy_nvidia_host_libraries.sh new file mode 100755 index 0000000000..ebc428a50d --- /dev/null +++ b/scripts/gpu_support/nvidia/copy_nvidia_host_libraries.sh @@ -0,0 +1,145 @@ +#!/bin/bash + +# This script links host libraries related to GPU drivers to a location where +# they can be found by the EESSI linker + +# Initialise our bash functions +TOPDIR=$(dirname $(realpath $BASH_SOURCE)) +source "$TOPDIR"/../../utils.sh + +# We rely on ldconfig to give us the location of the libraries on the host +command_name="ldconfig" +# We cannot use a version of ldconfig that's being shipped under CVMFS +exclude_prefix="/cvmfs" + +found_paths=() +# Always attempt to use /sbin/ldconfig +if [ -x "/sbin/$command_name" ]; then + found_paths+=("/sbin/$command_name") +fi +IFS=':' read -ra path_dirs <<< "$PATH" +for dir in "${path_dirs[@]}"; do + if [ "$dir" = "/sbin" ]; then + continue # we've already checked for $command_name in /sbin, don't need to do it twice + fi + if [[ ! "$dir" =~ ^$exclude_prefix ]]; then + if [ -x "$dir/$command_name" ]; then + found_paths+=("$dir/$command_name") + fi + fi +done + +if [ ${#found_paths[@]} -gt 0 ]; then + echo "Found $command_name in the following locations:" + printf -- "- %s\n" "${found_paths[@]}" + echo "Using first version" + host_ldconfig=${found_paths[0]} +else + error="$command_name not found in PATH or only found in paths starting with $exclude_prefix." + fatal_error "$error" +fi + +# Make sure EESSI is initialised (doesn't matter what version) +check_eessi_initialised + +# Find the CUDA version of the host CUDA drivers +# (making sure that this can still work inside prefix environment inside a container) +export LD_LIBRARY_PATH=/.singularity.d/libs:$LD_LIBRARY_PATH +nvidia_smi_command="nvidia-smi --query-gpu=driver_version --format=csv,noheader" +if $nvidia_smi_command > /dev/null; then + host_driver_version=$($nvidia_smi_command | tail -n1) + echo_green "Found NVIDIA GPU driver version ${host_driver_version}" + # If the first worked, this should work too + host_cuda_version=$(nvidia-smi -q --display=COMPUTE | grep CUDA | awk 'NF>1{print $NF}') + echo_green "Found host CUDA version ${host_cuda_version}" +else + error="Failed to successfully execute\n $nvidia_smi_command\n" + fatal_error "$error" +fi + +# Let's make sure the driver libraries are not already in place +link_drivers=1 + +# first make sure that target of host_injections variant symlink is an existing directory +host_injections_target=$(realpath -m ${EESSI_CVMFS_REPO}/host_injections) +if [ ! -d ${host_injections_target} ]; then + create_directory_structure ${host_injections_target} +fi + +host_injections_nvidia_dir="${EESSI_CVMFS_REPO}/host_injections/nvidia/${EESSI_CPU_FAMILY}" +host_injection_driver_dir="${host_injections_nvidia_dir}/host" +host_injection_driver_version_file="$host_injection_driver_dir/driver_version.txt" +if [ -e "$host_injection_driver_version_file" ]; then + if grep -q "$host_driver_version" "$host_injection_driver_version_file"; then + echo_green "The host GPU driver libraries (v${host_driver_version}) have already been linked! (based on ${host_injection_driver_version_file})" + link_drivers=0 + else + # There's something there but it is out of date + echo_yellow "Cleaning out outdated symlinks" + rm $host_injection_driver_dir/* + if [ $? -ne 0 ]; then + error="Unable to remove files under '$host_injection_driver_dir'." + fatal_error "$error" + fi + fi +fi + +drivers_linked=0 +if [ "$link_drivers" -eq 1 ]; then + if ! create_directory_structure "${host_injection_driver_dir}" ; then + fatal_error "No write permissions to directory ${host_injection_driver_dir}" + fi + cd ${host_injection_driver_dir} + # Need a small temporary space to hold a couple of files + temp_dir=$(mktemp -d) + echo "temp_dir: '${temp_dir}'" + + # Gather libraries on the host (_must_ be host ldconfig) + $host_ldconfig -p | awk '{print $NF}' > "$temp_dir"/libs.txt + # Allow for the fact that we may be in a container so the CUDA libs might be in there + ls /.singularity.d/libs/* >> "$temp_dir"/libs.txt 2>/dev/null + + # Leverage singularity to find the full list of libraries we should be linking to + echo_yellow "Downloading latest version of nvliblist.conf from Apptainer to ${temp_dir}/nvliblist.conf" + curl --silent --output "$temp_dir"/nvliblist.conf https://raw.githubusercontent.com/apptainer/apptainer/main/etc/nvliblist.conf + + # Make symlinks to all the interesting libraries + grep '.so$' "$temp_dir"/nvliblist.conf | xargs -i grep {} "$temp_dir"/libs.txt | xargs -i cp -a {} ${host_injection_driver_dir}/. + + # Inject driver and CUDA versions into dir + echo $host_driver_version > driver_version.txt + echo $host_cuda_version > cuda_version.txt + drivers_linked=1 + + # Remove the temporary directory when done + rm -r "$temp_dir" +fi + +# Make latest symlink for NVIDIA drivers +cd $host_injections_nvidia_dir +symlink="latest" +if [ -L "$symlink" ]; then + # Unless the drivers have been installed, leave the symlink alone + if [ "$drivers_linked" -eq 1 ]; then + ln -sf host latest + fi +else + # No link exists yet + ln -s host latest +fi + +# Make sure the libraries can be found by the EESSI linker +host_injection_linker_dir=${EESSI_EPREFIX/versions/host_injections} +if [ -L "$host_injection_linker_dir/lib" ]; then + target_path=$(readlink -f "$host_injection_linker_dir/lib") + if [ "$target_path" != "$$host_injections_nvidia_dir/latest" ]; then + cd $host_injection_linker_dir + ln -sf $host_injections_nvidia_dir/latest lib + fi +else + create_directory_structure $host_injection_linker_dir + cd $host_injection_linker_dir + ln -s $host_injections_nvidia_dir/latest lib +fi + +echo_green "Host NVIDIA GPU drivers linked successfully for EESSI" diff --git a/scripts/utils.sh b/scripts/utils.sh index b2be3f6221..962decd20e 100644 --- a/scripts/utils.sh +++ b/scripts/utils.sh @@ -78,6 +78,11 @@ function create_directory_structure() { return $return_code } +# Function to check if a command exists +function command_exists() { + command -v "$1" >/dev/null 2>&1 +} + function get_path_for_tool { tool_name=$1 tool_envvar_name=$2