diff --git a/auto_detect.py b/auto_detect.py index dd82437..5653395 100644 --- a/auto_detect.py +++ b/auto_detect.py @@ -3,6 +3,7 @@ import subprocess import re import logging +import math def get_cpu_info(logger): @@ -10,12 +11,58 @@ def get_cpu_info(logger): 'freq' : None, 'threads': None, 'cores': None, - #'tdp': None, + 'tdp': None, 'mem': None, - #'make': None, + 'make': None, 'chips': None } + try: + file_path = '/sys/class/powercap/intel-rapl/intel-rapl:0/name' + with open(file_path, 'r', encoding='UTF-8') as file: + domain_name = file.read().strip() + if domain_name != 'package-0': + raise RuntimeError(f"Domain /sys/class/powercap/intel-rapl/intel-rapl:0/name was not package-0, but {domain_name}") + + file_path = '/sys/class/powercap/intel-rapl/intel-rapl:0/constraint_0_name' + with open(file_path, 'r', encoding='UTF-8') as file: + constraint_name = file.read().strip() + if constraint_name != 'long_term': + raise RuntimeError(f"Constraint /sys/class/powercap/intel-rapl/intel-rapl:0/constraint_0_name was not long_term, but {constraint_name}") + + file_path = '/sys/class/powercap/intel-rapl/intel-rapl:0/constraint_0_max_power_uw' + with open(file_path, 'r', encoding='UTF-8') as file: + tdp = file.read() + data['tdp'] = int(tdp) / 1_000_000 + + logger.info('Found TDP: %d W', data['tdp']) + #pylint: disable=broad-except + except Exception as err: + logger.info('Exception: %s', err) + logger.info('Could not read RAPL powercapping info from /sys/class/powercap/intel-rapl') + + try: + file_paths = { + 1: '/sys/class/powercap/intel-rapl/intel-rapl:0/name', + 2: '/sys/class/powercap/intel-rapl/intel-rapl:1/name', + 3: '/sys/class/powercap/intel-rapl/intel-rapl:2/name', + 4: '/sys/class/powercap/intel-rapl/intel-rapl:3/name', + 5: '/sys/class/powercap/intel-rapl/intel-rapl:4/name', + 6: '/sys/class/powercap/intel-rapl/intel-rapl:5/name', + } + for chips, file_path in file_paths.items(): + with open(file_path, 'r', encoding='UTF-8') as file: + domain_name = file.read().strip() + if domain_name != f"package-{chips-1}": + raise RuntimeError(f"Domain {file_path} was not package-{chips-1}, but {domain_name}") + logger.info('Found Sockets: %d', chips) + data['chips'] = chips + #pylint: disable=broad-except + except Exception as err: + logger.info('Exception: %s', err) + logger.info('Could not find (additional) chips info under file path. Most likely reached final chip. continuing ...') + + try: cpuinfo = subprocess.check_output('lscpu', encoding='UTF-8') match = re.search(r'On-line CPU\(s\) list:\s*(0-)?(\d+)', cpuinfo) @@ -25,14 +72,16 @@ def get_cpu_info(logger): else: logger.info('Could not find Threads. Setting to None') + # this will overwrite info we have from RAPL socket discovery, as we + # deem lscpu more relieable match = re.search(r'Socket\(s\):\s*(\d+)', cpuinfo) if match: data['chips'] = int(match.group(1)) - logger.info('Found Sockets: %d', data['chips']) + logger.info('Found Sockets: %d (will take precedence if not 0)', data['chips']) else: - logger.info('Could not find Chips/Sockets. Setting to None') + logger.info('Could not find Chips/Sockets via lscpu') - if data['threads']: + if data['threads'] and data['chips']: match = re.search(r'Thread\(s\) per core:\s*(\d+)', cpuinfo) if match: threads_per_core = int(match.group(1)) @@ -48,6 +97,16 @@ def get_cpu_info(logger): else: logger.info('Could not find Frequency. Setting to None') + match = re.search(r'Model name:.*Intel\(R\)', cpuinfo) + if match: + data['make'] = 'intel' + logger.info('Found Make: %s', data['make']) + + match = re.search(r'Model name:.*AMD ', cpuinfo) + if match: + data['make'] = 'amd' + logger.info('Found Make: %s', data['make']) + # we currently do not match for architecture, as this info is provided nowhere @@ -79,12 +138,13 @@ def get_cpu_info(logger): logger.info('/proc/cpuinfo not accesible on system. Could not check for Base Frequency info. Setting value to None.') + try: meminfo = subprocess.check_output(['cat', '/proc/meminfo'], encoding='UTF-8', stderr=subprocess.DEVNULL) match = re.search(r'MemTotal:\s*(\d+) kB', meminfo) if match: - data['mem'] = round(int(match.group(1)) / 1024 / 1024) - logger.info('Found Memory: %d', data['mem']) + data['mem'] = math.ceil(int(match.group(1)) / 1024 / 1024) + logger.info('Found Memory: %d GB', data['mem']) else: logger.info('Could not find Memory. Setting to None') #pylint: disable=broad-except @@ -99,4 +159,4 @@ def get_cpu_info(logger): logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) - get_cpu_info(logger) + print(get_cpu_info(logger)) diff --git a/xgb.py b/xgb.py index 3f12b4c..d0165ba 100644 --- a/xgb.py +++ b/xgb.py @@ -34,7 +34,7 @@ def train_model(cpu_chips, Z): X = X[Z.columns] # only select the supplied columns from the command line - logger.info('Model will be trained on the following variables: %s', X.columns.values) + logger.info('Model will be trained on the following columns and restrictions: \n%s', Z) # params = { # 'max_depth': 10, @@ -130,20 +130,24 @@ def interpolate_predictions(predictions): args_dict = args.__dict__.copy() del args_dict['silent'] del args_dict['auto'] + del args_dict['energy'] # did the user supply any of the auto detectable arguments? - if len(args_dict) == 0 or args.auto: - logger.info('No arguments where supplied, or auto mode was forced. Running auto detect on the sytem') + if not any(args_dict.values()) or args.auto: + logger.info('No arguments where supplied, or auto mode was forced. Running auto detect on the sytem.') data = auto_detect.get_cpu_info(logger) logger.info('The following data was auto detected: %s', data) - args.cpu_freq = data['freq'] - args.cpu_threads = data['threads'] - args.cpu_cores = data['cores'] - args.ram = data['mem'] - args.cpu_chips = data['chips'] + # only overwrite not already supplied values + args.cpu_freq = args.cpu_freq or data['freq'] + args.cpu_threads = args.cpu_threads or data['threads'] + args.cpu_cores = args.cpu_cores or data['cores'] + args.tdp = args.tdp or data['tdp'] + args.ram = args.ram or data['mem'] + args.cpu_make = args.cpu_make or data['make'] + args.cpu_chips = args.cpu_chips or data['chips'] # set default. We do this here and not in argparse, so we can check if anything was supplied at all if not args.vhost_ratio: