Skip to content

Commit

Permalink
Added TDP and make detection; Fixed --auto mode when no parameter sup…
Browse files Browse the repository at this point in the history
…plied
  • Loading branch information
ArneTR committed Dec 17, 2023
1 parent 37090c1 commit 21252c9
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 16 deletions.
76 changes: 68 additions & 8 deletions auto_detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,66 @@
import subprocess
import re
import logging
import math

def get_cpu_info(logger):

data = {
'freq' : None,
'threads': None,
'cores': None,
#'tdp': None,
'tdp': None,
'mem': None,
#'make': None,
'make': None,
'chips': None
}

try:
file_path = '/sys/class/powercap/intel-rapl/intel-rapl:0/name'
with open(file_path, 'r', encoding='UTF-8') as file:
domain_name = file.read().strip()
if domain_name != 'package-0':
raise RuntimeError(f"Domain /sys/class/powercap/intel-rapl/intel-rapl:0/name was not package-0, but {domain_name}")

file_path = '/sys/class/powercap/intel-rapl/intel-rapl:0/constraint_0_name'
with open(file_path, 'r', encoding='UTF-8') as file:
constraint_name = file.read().strip()
if constraint_name != 'long_term':
raise RuntimeError(f"Constraint /sys/class/powercap/intel-rapl/intel-rapl:0/constraint_0_name was not long_term, but {constraint_name}")

file_path = '/sys/class/powercap/intel-rapl/intel-rapl:0/constraint_0_max_power_uw'
with open(file_path, 'r', encoding='UTF-8') as file:
tdp = file.read()
data['tdp'] = int(tdp) / 1_000_000

logger.info('Found TDP: %d W', data['tdp'])
#pylint: disable=broad-except
except Exception as err:
logger.info('Exception: %s', err)
logger.info('Could not read RAPL powercapping info from /sys/class/powercap/intel-rapl')

try:
file_paths = {
1: '/sys/class/powercap/intel-rapl/intel-rapl:0/name',
2: '/sys/class/powercap/intel-rapl/intel-rapl:1/name',
3: '/sys/class/powercap/intel-rapl/intel-rapl:2/name',
4: '/sys/class/powercap/intel-rapl/intel-rapl:3/name',
5: '/sys/class/powercap/intel-rapl/intel-rapl:4/name',
6: '/sys/class/powercap/intel-rapl/intel-rapl:5/name',
}
for chips, file_path in file_paths.items():
with open(file_path, 'r', encoding='UTF-8') as file:
domain_name = file.read().strip()
if domain_name != f"package-{chips-1}":
raise RuntimeError(f"Domain {file_path} was not package-{chips-1}, but {domain_name}")
logger.info('Found Sockets: %d', chips)
data['chips'] = chips
#pylint: disable=broad-except
except Exception as err:
logger.info('Exception: %s', err)
logger.info('Could not find (additional) chips info under file path. Most likely reached final chip. continuing ...')


try:
cpuinfo = subprocess.check_output('lscpu', encoding='UTF-8')
match = re.search(r'On-line CPU\(s\) list:\s*(0-)?(\d+)', cpuinfo)
Expand All @@ -25,14 +72,16 @@ def get_cpu_info(logger):
else:
logger.info('Could not find Threads. Setting to None')

# this will overwrite info we have from RAPL socket discovery, as we
# deem lscpu more relieable
match = re.search(r'Socket\(s\):\s*(\d+)', cpuinfo)
if match:
data['chips'] = int(match.group(1))
logger.info('Found Sockets: %d', data['chips'])
logger.info('Found Sockets: %d (will take precedence if not 0)', data['chips'])
else:
logger.info('Could not find Chips/Sockets. Setting to None')
logger.info('Could not find Chips/Sockets via lscpu')

if data['threads']:
if data['threads'] and data['chips']:
match = re.search(r'Thread\(s\) per core:\s*(\d+)', cpuinfo)
if match:
threads_per_core = int(match.group(1))
Expand All @@ -48,6 +97,16 @@ def get_cpu_info(logger):
else:
logger.info('Could not find Frequency. Setting to None')

match = re.search(r'Model name:.*Intel\(R\)', cpuinfo)
if match:
data['make'] = 'intel'
logger.info('Found Make: %s', data['make'])

match = re.search(r'Model name:.*AMD ', cpuinfo)
if match:
data['make'] = 'amd'
logger.info('Found Make: %s', data['make'])


# we currently do not match for architecture, as this info is provided nowhere

Expand Down Expand Up @@ -79,12 +138,13 @@ def get_cpu_info(logger):
logger.info('/proc/cpuinfo not accesible on system. Could not check for Base Frequency info. Setting value to None.')



try:
meminfo = subprocess.check_output(['cat', '/proc/meminfo'], encoding='UTF-8', stderr=subprocess.DEVNULL)
match = re.search(r'MemTotal:\s*(\d+) kB', meminfo)
if match:
data['mem'] = round(int(match.group(1)) / 1024 / 1024)
logger.info('Found Memory: %d', data['mem'])
data['mem'] = math.ceil(int(match.group(1)) / 1024 / 1024)
logger.info('Found Memory: %d GB', data['mem'])
else:
logger.info('Could not find Memory. Setting to None')
#pylint: disable=broad-except
Expand All @@ -99,4 +159,4 @@ def get_cpu_info(logger):
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.INFO)

get_cpu_info(logger)
print(get_cpu_info(logger))
20 changes: 12 additions & 8 deletions xgb.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def train_model(cpu_chips, Z):

X = X[Z.columns] # only select the supplied columns from the command line

logger.info('Model will be trained on the following variables: %s', X.columns.values)
logger.info('Model will be trained on the following columns and restrictions: \n%s', Z)

# params = {
# 'max_depth': 10,
Expand Down Expand Up @@ -130,20 +130,24 @@ def interpolate_predictions(predictions):
args_dict = args.__dict__.copy()
del args_dict['silent']
del args_dict['auto']
del args_dict['energy']

# did the user supply any of the auto detectable arguments?
if len(args_dict) == 0 or args.auto:
logger.info('No arguments where supplied, or auto mode was forced. Running auto detect on the sytem')
if not any(args_dict.values()) or args.auto:
logger.info('No arguments where supplied, or auto mode was forced. Running auto detect on the sytem.')

data = auto_detect.get_cpu_info(logger)

logger.info('The following data was auto detected: %s', data)

args.cpu_freq = data['freq']
args.cpu_threads = data['threads']
args.cpu_cores = data['cores']
args.ram = data['mem']
args.cpu_chips = data['chips']
# only overwrite not already supplied values
args.cpu_freq = args.cpu_freq or data['freq']
args.cpu_threads = args.cpu_threads or data['threads']
args.cpu_cores = args.cpu_cores or data['cores']
args.tdp = args.tdp or data['tdp']
args.ram = args.ram or data['mem']
args.cpu_make = args.cpu_make or data['make']
args.cpu_chips = args.cpu_chips or data['chips']

# set default. We do this here and not in argparse, so we can check if anything was supplied at all
if not args.vhost_ratio:
Expand Down

0 comments on commit 21252c9

Please sign in to comment.