From fe2cf94dfa89dd3cc788a3cefa35505fefd01fb7 Mon Sep 17 00:00:00 2001 From: ShyamB97 Date: Thu, 3 Oct 2024 21:24:44 +0200 Subject: [PATCH] attempt plotting code cleanup --- tools/basic_functions.py | 198 +------------- tools/basic_functions_performance.py | 395 +++++++++++++++++---------- 2 files changed, 259 insertions(+), 334 deletions(-) diff --git a/tools/basic_functions.py b/tools/basic_functions.py index fbb819e..bd2fd08 100644 --- a/tools/basic_functions.py +++ b/tools/basic_functions.py @@ -1,27 +1,19 @@ import os +import pathlib import json import re -import matplotlib -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import pathlib import struct +import pandas as pd + +from datetime import datetime as dt from urllib.parse import urljoin, urlencode from urllib.request import urlopen from urllib.error import URLError, HTTPError from http.client import HTTPResponse -from datetime import datetime as dt -from dateutil.parser import parse -from tabulate import tabulate - from rich import print -color_list = ['red', 'blue', 'green', 'cyan', 'orange', 'navy', 'magenta', 'lime', 'purple', 'hotpink', 'olive', 'salmon', 'teal', 'darkblue', 'darkgreen', 'darkcyan', 'darkorange', 'deepskyblue', 'darkmagenta', 'sienna', 'chocolate'] -linestyle_list = ['solid', 'dotted', 'dashed', 'dashdot','solid', 'dotted', 'dashed', 'dashdot'] -marker_list = ['s','o','.','p','P','^','<','>','*','+','x','X','d','D','h','H'] not_alma9_os = ['np04srv008', 'np04srv010', 'np04srv014', 'np04srv023', 'np04onl003', 'np04srv007', 'np04srv009', 'np04crt001'] @@ -84,12 +76,6 @@ def create_filename(test_args : dict, test_num : int) -> str: ]) -def directory(input_dir): - for dir_path in input_dir: - if not os.path.exists(dir_path): - os.makedirs(dir_path) - - def current_time(): now = dt.now() current_dnt = now.strftime('%Y-%m-%d %H:%M:%S') @@ -569,7 +555,7 @@ def process_files(input_dir, process_pcm_files=False, process_uprof_files=False, def break_file_name(file): - return file.split("/")[-1].split('-') + return file.split(".")[0].split("/")[-1].split('-') def sanitize_label(label): @@ -628,180 +614,6 @@ def convert_to_24_hour_format(time_str): return time_24_hour -def convert(s): - return list(map(lambda x: x, s)) - - -def get_column_val(df, columns, labels, file): - val = [] - label = [] - info = break_file_name(file) - - for j, (columns_j, label_j) in enumerate(zip(columns, labels)): - if columns_j in ['NewTime', 'Timestamp']: - continue - elif columns_j in ['Socket0 L2 Cache Hits']: - Y_tmp = df['Socket0 L2 Cache Misses'].div(df[columns_j]+df['Socket0 L2 Cache Misses']).mul(100) - Y = Y_tmp.values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - elif columns_j in ['Socket0 L3 Cache Hits']: - Y_tmp = df['Socket0 L3 Cache Misses'].div(df[columns_j]+df['Socket0 L3 Cache Misses']).mul(100) - Y = Y_tmp.values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - elif columns_j in ['Socket1 L2 Cache Hits']: - Y_tmp = df['Socket1 L2 Cache Misses'].div(df[columns_j]+df['Socket1 L2 Cache Misses']).mul(100) - Y = Y_tmp.values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - elif columns_j in ['Socket1 L3 Cache Hits']: - Y_tmp = df['Socket1 L3 Cache Misses'].div(df[columns_j]+df['Socket1 L3 Cache Misses']).mul(100) - Y = Y_tmp.values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - elif columns_j in ['L2 Access (pti) Socket0']: - Y_tmp = df['L2 Miss (pti) Socket0'].div(df[columns_j]).mul(100) - Y = Y_tmp.values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - elif columns_j in ['L2 Access (pti) Socket1']: - Y_tmp = df['L2 Miss (pti) Socket1'].div(df[columns_j]).mul(100) - Y = Y_tmp.values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - elif columns_j in ['L2 Access (pti) Socket1.1']: - Y_tmp = df['L2 Miss (pti) Socket1.1'].div(df[columns_j]).mul(100) - Y = Y_tmp.values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - elif columns_j in ['Socket0 L2 Cache Misses', 'Socket1 L2 Cache Misses', 'L2 Miss (pti) Socket0', 'L2 Miss (pti) Socket1', 'Socket0 L3 Cache Misses', 'Socket1 L3 Cache Misses', 'L3 Miss % Socket0', 'L3 Miss % Socket1', 'Ave L3 Miss Latency Socket0', 'Ave L3 Miss Latency Socket1']: - Y_tmp = df[columns_j].div(1) - Y = Y_tmp.values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - elif columns_j in ['L3 Miss Socket0', 'L3 Miss Socket1', 'L3 Miss Socket1.1']: - Y_tmp = df[columns_j].div(1000000000) - Y = Y_tmp.values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - elif columns_j in ['Socket0 Memory Bandwidth', 'Socket1 Memory Bandwidth']: - Y_tmp = df[columns_j].div(1000) - Y = Y_tmp.values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - elif columns_j in ['Socket0 L2 Cache Misses Per Instruction', 'Socket1 L2 Cache Misses Per Instruction']: - Y_tmp = df[columns_j].mul(100) - Y = Y_tmp.values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - elif columns_j in ['Package Joules Consumed Socket0 Energy Consumption', 'Package Joules Consumed Socket1 Energy Consumption']: - #Y_tmp = df[columns_j] - 40 - Y_tmp = df[columns_j] - Y = Y_tmp.values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - elif columns_j in ['IRA Socket0', 'IRA Socket1']: - Y_tmp = df['Utilization (%) Socket1'].mul(0) - Y = Y_tmp.values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - else: - Y = df[columns_j].values.tolist() - val.append(Y) - label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - - return val, label - - -def cpupining_info(file, var): - with open(file, 'r') as f: - data_cpupins = json.load(f) - info_daq_application = json.dumps(data_cpupins['daq_application'][f'--name {var}'], skipkeys = True, allow_nan = True) - data_list = json.loads(info_daq_application) - - return data_list - - -def core_utilization(input_dir, file): - CPU_plot, User_plot = [], [] - - data_frame = pd.read_csv(f'{input_dir}{file}') - - print(data_frame) - - maxV = data_frame['CPU'].max() - minV = data_frame['CPU'].min() - - for j in range(minV, maxV + 1): - CPU_plot.append(j) - df = data_frame.loc[data_frame['CPU'] == j] - User_max = df['user (%)'].max() - User_plot.append(User_max) - - return CPU_plot, User_plot - - -def parse_cpu_cores(cpu_cores_i): - ranges = re.split(r',|-', cpu_cores_i) - cpu_cores = [] - for item in ranges: - if '-' in item: - start, end = map(int, item.split('-')) - cpu_cores.extend(range(start, end + 1)) - else: - cpu_cores.append(int(item)) - return cpu_cores - - -def extract_table_data(input_dir, file_core, data_list, emu_mode=False): - pinning_table, cpu_core_table, cpu_core_table_format, cpu_utilization_table, cpu_utilization_maximum_table, max_tmp = [], [], [], [], [], [] - cpu_core, cpu_utilization = core_utilization(input_dir, file_core) - denominator, sum_utilization = 0, 0 - - # Process data_list, and extract 'threads' sub-dictionary, and other data entries - for data_i, value_i in data_list.items(): - if data_i == 'threads': - for threads_i, cpu_cores_i in value_i.items(): - if emu_mode: - if threads_i in ['fakeprod-1..', 'fakeprod-2..', 'consumer-1..', 'consumer-2..', 'recording-1..', 'recording-2..', 'consumer-0', 'tpset-0', 'cleanup-0', 'recording-0', 'postproc-0-1..', 'postproc-0-2..']: - pinning_table.append(threads_i) - cpu_core_table.append(cpu_cores_i) - else: - pass - - else: - if threads_i in ['fakeprod-1..', 'fakeprod-2..']: - pass - else: - pinning_table.append(threads_i) - cpu_core_table.append(cpu_cores_i) - - else: - pinning_table.append(data_i) - cpu_core_table.append(value_i) - - # Calculate averages for each CPU core configuration - for cpu_cores_i in cpu_core_table: - try: - cpu_cores = parse_cpu_cores(cpu_cores_i) - cpu_core_table_format.append(cpu_cores) - except ValueError: - print(f'Check the format of the cpu pinning file. The [#,#] will not work.') - - for core_i in cpu_cores: - denominator += 1 - sum_utilization += cpu_utilization[core_i] - max_tmp.append(cpu_utilization[core_i]) - - utilization_average = round((sum_utilization / denominator), 2) - cpu_utilization_table.append(utilization_average) - cpu_utilization_maximum_table.append(max(max_tmp)) - denominator, sum_utilization = 0, 0 # Reset variables for the next iteration - - return pinning_table, cpu_core_table, cpu_utilization_maximum_table - - def output_file_check(input_dir, file, output_dir, chunk_size): try: with open('{}/{}.out'.format(input_dir, file), 'rb') as f: diff --git a/tools/basic_functions_performance.py b/tools/basic_functions_performance.py index 7be57eb..b2c94a6 100644 --- a/tools/basic_functions_performance.py +++ b/tools/basic_functions_performance.py @@ -1,154 +1,179 @@ -from basic_functions import * -from fpdf import FPDF -from fpdf.enums import XPos, YPos -from PIL import Image +import os +import json +import re + from warnings import warn -pcm_columns_list_0 = ['C0 Core C-state residency', 'Socket0 Memory Bandwidth', - 'Socket0 Instructions Per Cycle', 'Socket0 Instructions Retired Any (Million)', - 'Socket0 L2 Cache Misses', 'Socket0 L2 Cache Hits', - 'Socket0 L3 Cache Misses', 'Socket0 L3 Cache Hits'] -pcm_columns_list_1 = ['C0 Core C-state residency', 'Socket1 Memory Bandwidth', - 'Socket1 Instructions Per Cycle', 'Socket1 Instructions Retired Any (Million)', - 'Socket1 L2 Cache Misses', 'Socket1 L2 Cache Hits', - 'Socket1 L3 Cache Misses', 'Socket1 L3 Cache Hits'] -uprof_columns_list_0 = [' Utilization (%) Socket0', 'Total Mem Bw (GB/s) Socket0', - 'IPC (Sys + User) Socket0', 'IRA Socket0', #<------------- we don't have this (IRA) data - 'L2 Miss (pti) Socket0', 'L2 Access (pti) Socket0', - 'L3 Miss Socket0', 'L3 Miss % Socket0'] -uprof_columns_list_1 = ['Utilization (%) Socket1', 'Total Mem Bw (GB/s) Socket1', - 'IPC (Sys + User) Socket1', 'IRA Socket1', #<------------- we don't have this (IRA) data - 'L2 Miss (pti) Socket1', 'L2 Access (pti) Socket1', - 'L3 Miss Socket1', 'L3 Miss % Socket1'] -label_names = ['CPU Utilization (%)', 'Memory Bandwidth (GB/sec)', - 'Instructions Per Cycle', 'Instructions Retired Any (Million)', - 'L2 Cache Misses (Million)', 'L2 Cache [Misses/Accesses] (%)', - 'L3 Cache Misses (Million)', 'L3 Cache [Misses/Accesses] (%)'] +import matplotlib +import matplotlib.pyplot as plt +import pandas as pd + +from fpdf import FPDF +from fpdf.enums import XPos, YPos + +from basic_functions import break_file_name, current_time, load_json + +from rich import print + +color_list = ['red', 'blue', 'green', 'cyan', 'orange', 'navy', 'magenta', 'lime', 'purple', 'hotpink', 'olive', 'salmon', 'teal', 'darkblue', 'darkgreen', 'darkcyan', 'darkorange', 'deepskyblue', 'darkmagenta', 'sienna', 'chocolate'] +linestyle_list = ['solid', 'dotted', 'dashed', 'dashdot','solid', 'dotted', 'dashed', 'dashdot'] +marker_list = ['s','o','.','p','P','^','<','>','*','+','x','X','d','D','h','H'] + +def pcm_columns_list(socket : int): + return { + 'CPU Utilization (%)' : 'C0 Core C-state residency', + 'Memory Bandwidth (GB/sec)' : f'Socket{socket} Memory Bandwidth', + 'Instructions Per Cycle' : f'Socket{socket} Instructions Per Cycle', + 'Instructions Retired Any (Million)' : f'Socket{socket} Instructions Retired Any (Million)', + 'L2 Cache Misses (Million)' : f'Socket{socket} L2 Cache Misses', + 'L2 Cache [Misses/Accesses] (%)' : f'Socket{socket} L2 Cache Hits', + 'L3 Cache Misses (Million)' : f'Socket{socket} L3 Cache Misses', + 'L3 Cache [Misses/Accesses] (%)' : f'Socket{socket} L3 Cache Hits' + } + + +def uprof_columns_list(socket : int): + return { + 'CPU Utilization (%)' : f' Utilization (%) Socket{socket}', + 'Memory Bandwidth (GB/sec)' : f'Total Mem Bw (GB/s) Socket{socket}', + 'Instructions Per Cycle' : f'IPC (Sys + User) Socket{socket}', + 'Instructions Retired Any (Million)' : f'IRA Socket{socket}', #<------------- we don't have this (IRA) data + 'L2 Cache Misses (Million)' : f'L2 Miss (pti) Socket{socket}', + 'L2 Cache [Misses/Accesses] (%)' : f'L2 Access (pti) Socket{socket}', + 'L3 Cache Misses (Million)' : f'L3 Miss Socket{socket}', + 'L3 Cache [Misses/Accesses] (%)' : f'L3 Miss % Socket{socket}' + } + label_columns = ['Socket0','Socket1'] -def plot_vars_comparison(input_dir, output_dir, all_files, pdf_name): - X_plot, Y_plot_0, Y_plot_1, label_plot_0, label_plot_1 = [], [], [], [], [] - - for i, file_i in enumerate(all_files): - info = break_file_name(file_i) - data_frame = pd.read_csv(f'{input_dir}{file_i}') - X_plot.append(data_frame['NewTime'].values.tolist()) - Y_tmp_0, Y_tmp_1, label_tmp_0, label_tmp_1 = [], [], [], [] - - if info[0]=='grafana': - for k, (columns_pcm_0, columns_pcm_1) in enumerate(zip(pcm_columns_list_0, pcm_columns_list_1)): - Y_0, label_0 = get_column_val(data_frame, [columns_pcm_0], [label_columns[0]], file_i) - Y_1, label_1 = get_column_val(data_frame, [columns_pcm_1], [label_columns[1]], file_i) - Y_tmp_0.append(Y_0) - label_tmp_0.append(label_0) - Y_tmp_1.append(Y_1) - label_tmp_1.append(label_1) +def percentage(num : float, den : float) -> float: + return 100 * num / den + + +def dict_rev(d : dict) -> dict: + return {v : k for k , v in d.items()} + + +def get_column_val(df, columns, labels, file): + val = [] + label = [] + info = break_file_name(file) + + for (columns_j, label_j) in zip(columns, labels): + if columns_j in ['NewTime', 'Timestamp']: + continue + elif columns_j in ['Socket0 L2 Cache Hits', 'Socket0 L3 Cache Hits', 'Socket1 L2 Cache Hits', 'Socket1 L3 Cache Hits']: + socket = columns_j.split("Socket")[1][0] + cache = columns_j.split(" Cache")[0][-1] + Y = percentage(df[f"Socket{socket} L{cache} Cache Misses"], df[f"Socket{socket} L{cache} Cache Hits"] + df[f"Socket{socket} L{cache} Cache Misses"]) + elif columns_j in ['L2 Access (pti) Socket0', 'L2 Access (pti) Socket1', 'L2 Access (pti) Socket1.1']: + socket = columns_j.split("L")[1][0] + cache = columns_j.split("Socket")[1] + Y = percentage(df[f'L{cache} Miss (pti) Socket{socket}'], df[f'L{cache} Access (pti) Socket{socket}']) + elif columns_j in ['Socket0 L2 Cache Misses', 'Socket1 L2 Cache Misses', 'L2 Miss (pti) Socket0', 'L2 Miss (pti) Socket1', 'Socket0 L3 Cache Misses', 'Socket1 L3 Cache Misses', 'L3 Miss % Socket0', 'L3 Miss % Socket1', 'Ave L3 Miss Latency Socket0', 'Ave L3 Miss Latency Socket1']: + Y = df[columns_j] + elif columns_j in ['L3 Miss Socket0', 'L3 Miss Socket1', 'L3 Miss Socket1.1']: + Y = df[columns_j].div(1_000_000_000) + elif columns_j in ['Socket0 Memory Bandwidth', 'Socket1 Memory Bandwidth']: + Y = df[columns_j].div(1000) + elif columns_j in ['Socket0 L2 Cache Misses Per Instruction', 'Socket1 L2 Cache Misses Per Instruction']: + Y = df[columns_j].mul(100) + elif columns_j in ['Package Joules Consumed Socket0 Energy Consumption', 'Package Joules Consumed Socket1 Energy Consumption']: + Y = df[columns_j] + elif columns_j in ['IRA Socket0', 'IRA Socket1']: + Y = df['Utilization (%) Socket1'].mul(0) else: - for k, (columns_uprof_0, columns_uprof_1) in enumerate(zip(uprof_columns_list_0, uprof_columns_list_1)): - Y_0, label_0 = get_column_val(data_frame, [columns_uprof_0], [label_columns[0]], file_i) - Y_1, label_1 = get_column_val(data_frame, [columns_uprof_1], [label_columns[1]], file_i) - Y_tmp_0.append(Y_0) - label_tmp_0.append(label_0) - Y_tmp_1.append(Y_1) - label_tmp_1.append(label_1) + Y = df[columns_j] + val.append(Y.values) + label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') - Y_plot_0.append(Y_tmp_0) - label_plot_0.append(label_tmp_0) - Y_plot_1.append(Y_tmp_1) - label_plot_1.append(label_tmp_1) + return val, label + +def plot(ax : plt.Axes, x : list, y : list, x_label : str, y_label : str, colour : str, label : str, linestyle : str): + ax.plot(x, y, color=colour, label=label, linestyle=linestyle) + ax.set_ylabel(y_label) + ax.set_xlabel(x_label) + + ax.grid(which='major', color='gray', linestyle='dashed') + ax.legend(loc='upper left') + return + +def plot_vars_comparison(output_dir, grafana_data : list[str], pdf_name): + X_plot = [] + y_plot = {} + for file_i in grafana_data: + info = break_file_name(file_i) + data_frame = pd.read_csv(file_i) + X_plot.append(data_frame['NewTime'].values) + + if info[0] == "grafana": + column_generator = pcm_columns_list + else: + column_generator = uprof_columns_list + + y = {} + for s in range(2): + y_tmp = {} + for c in column_generator(s).values(): + v, _ = get_column_val(data_frame, [c], [label_columns[s]], file_i) + y_tmp[c] = v + plot_label = f"{info[2]} Socket{s}" + y[plot_label] = y_tmp + y_plot[f"{info[1]} {info[5]}"] = y + # Here we make the plot: matplotlib.rcParams['font.family'] = 'DejaVu Serif' rows=cols=2 rows_cols = rows*cols - fig, axs = plt.subplots(rows, cols, figsize=(18, 8)) - plt.style.use('default') - axs = axs.flatten() - #axs[3].axis('off') - - for i in range(len(Y_plot_0)): #number of files or tests - for j in range(len(Y_plot_0[i])): #number of metrix - if j < rows_cols: - label0_ij0 = re.sub('_', ' ', label_plot_0[i][j][0]) - axs[j].plot(X_plot[i], Y_plot_0[i][j][0], color=color_list[i], label=label0_ij0, linestyle=linestyle_list[0]) - axs[j].set_ylabel(f'{label_names[j]}') - axs[j].set_xlabel('Time (min)') - axs[j].grid(which='major', color='gray', linestyle='dashed') - axs[j].legend(loc='upper left') - else: - pass - - plt.tight_layout() - plt.savefig(f'{output_dir}/Fig0_{pdf_name}_results_socket0.png') - print(f'{output_dir}/Fig0_{pdf_name}_results_socket0.png') - plt.close() - - fig, axs = plt.subplots(rows, cols, figsize=(18, 8)) - plt.style.use('default') - axs = axs.flatten() - - for i in range(len(Y_plot_0)): - for j in range(len(Y_plot_0[i])): - if j < rows_cols: - pass - else: - label0_ij0 = re.sub('_', ' ', label_plot_0[i][j][0]) - axs[j-rows_cols].plot(X_plot[i], Y_plot_0[i][j][0], color=color_list[i], label=label0_ij0, linestyle=linestyle_list[0]) - axs[j-rows_cols].set_ylabel(f'{label_names[j]}') - axs[j-rows_cols].set_xlabel('Time (min)') - axs[j-rows_cols].grid(which='major', color='gray', linestyle='dashed') - axs[j-rows_cols].legend(loc='upper left') - - plt.tight_layout() - plt.savefig(f'{output_dir}/Fig1_{pdf_name}_results_cache_socket0.png') - print(f'{output_dir}/Fig1_{pdf_name}_results_cache_socket0.png') - plt.close() - - fig, axs = plt.subplots(rows, cols, figsize=(18, 8)) - plt.style.use('default') - axs = axs.flatten() - - for i in range(len(Y_plot_1)): - for j in range(len(Y_plot_1[i])): - if j < rows_cols: - label1_ij0 = re.sub('_', ' ', label_plot_1[i][j][0]) - axs[j].plot(X_plot[i], Y_plot_1[i][j][0], color=color_list[i], label=label1_ij0, linestyle=linestyle_list[0]) - axs[j].set_ylabel(f'{label_names[j]}') - axs[j].set_xlabel('Time (min)') - axs[j].grid(which='major', color='gray', linestyle='dashed') - axs[j].legend(loc='upper left') - else: - pass - - plt.tight_layout() - plt.savefig(f'{output_dir}/Fig2_{pdf_name}_results_socket1.png') - print(f'{output_dir}/Fig2_{pdf_name}_results_socket1.png') - plt.close() - - fig, axs = plt.subplots(rows, cols, figsize=(18, 8)) - plt.style.use('default') - axs = axs.flatten() - - for i in range(len(Y_plot_1)): - for j in range(len(Y_plot_1[i])): - if j < rows_cols: - pass - else: - label1_ij0 = re.sub('_', ' ', label_plot_1[i][j][0]) - axs[j-rows_cols].plot(X_plot[i], Y_plot_1[i][j][0], color=color_list[i], label=label1_ij0, linestyle=linestyle_list[0]) - axs[j-rows_cols].set_ylabel(f'{label_names[j]}') - axs[j-rows_cols].set_xlabel('Time (min)') - axs[j-rows_cols].grid(which='major', color='gray', linestyle='dashed') - axs[j-rows_cols].legend(loc='upper left') - - plt.tight_layout() - plt.savefig(f'{output_dir}/Fig3_{pdf_name}_results_cache_socket1.png') - print(f'{output_dir}/Fig3_{pdf_name}_results_cache_socket1.png') - plt.close() + fig_num = 0 + + for s in range(2): + y_labels = {**dict_rev(pcm_columns_list(s)), **dict_rev(uprof_columns_list(s))} + _, axs = plt.subplots(rows, cols, figsize=(18, 8)) + plt.style.use('default') + axs = axs.flatten() + for i, (test, data) in enumerate(y_plot.items()): + for k in data: + if f"Socket{s}" in k: break + for j, (name, metric) in enumerate(data[k].items()): + if j < rows_cols: + plot(axs[j], X_plot[i], metric[0], "Time (min)", y_labels[name], color_list[i], (test + " " + k).replace("_", " "), linestyle_list[0]) + else: + pass + + plt.tight_layout() + out = f'{output_dir}/Fig{fig_num}_{pdf_name}_results_socket{s}.png' + plt.savefig(out) + print(out) + plt.close() + fig_num += 1 + + for s in range(2): + y_labels = {**dict_rev(pcm_columns_list(s)), **dict_rev(uprof_columns_list(s))} + _, axs = plt.subplots(rows, cols, figsize=(18, 8)) + plt.style.use('default') + axs = axs.flatten() + for i, (test, data) in enumerate(y_plot.items()): + for k in data: + if f"Socket{s}" in k: break + for j, (name, metric) in enumerate(data[k].items()): + if j < rows_cols: + pass + else: + plot(axs[j - rows_cols], X_plot[i], metric[0], "Time (min)", y_labels[name], color_list[i], (test + " " + k).replace("_", " "), linestyle_list[0]) + + plt.tight_layout() + out = f'{output_dir}/Fig{fig_num}_{pdf_name}_results_cache_socket{s}.png' + plt.savefig(out) + print(out) + plt.close() + fig_num += 1 + return + def create_report_performance(input_dir, output_dir, all_files, times : list[list], readout_name, daqconf_files, core_utilization_files, parent_folder_dir, print_info=True, pdf_name='performance_report', repin_threads_file=[None], comment=['TBA']): - directory([input_dir, output_dir]) # Open pdf file pdf = FPDF() @@ -204,18 +229,18 @@ def create_report_performance(input_dir, output_dir, all_files, times : list[lis pdf.ln(10) #-------------------------------------------- FIGURES START ------------------------------------------------ - plot_vars_comparison(input_dir, output_dir, all_files, pdf_name) + plot_vars_comparison(output_dir, all_files, pdf_name) if info[3] == '0' or info[3] == '01': pdf.image(f'{output_dir}/Fig0_{pdf_name}_results_socket0.png', w=180) pdf.write(5, 'Figure. Socket0 results of the tests ran using the metrics CPU Utilization (%), Memory Bandwidth (GB/sec), Instructions Per Cycle, Instructions Retired Any (Million).') pdf.ln(10) - pdf.image(f'{output_dir}/Fig1_{pdf_name}_results_cache_socket0.png', w=180) + pdf.image(f'{output_dir}/Fig2_{pdf_name}_results_cache_socket0.png', w=180) pdf.write(5, 'Figure. Socket0 results of the tests ran using the metrics L2 Cache Misses (Million), L2 Cache [Misses/Hits] (%), L3 Cache Misses (Million), and L3 Cache [Misses/Hits] (%).') pdf.ln(10) if info[3] == '1' or info[3] == '01': - pdf.image(f'{output_dir}/Fig2_{pdf_name}_results_socket1.png', w=180) + pdf.image(f'{output_dir}/Fig1_{pdf_name}_results_socket1.png', w=180) pdf.write(5, 'Figure. Socket1 results of the tests ran using the metrics CPU Utilization (%), Memory Bandwidth (GB/sec), Instructions Per Cycle, Instructions Retired Any (Million).') pdf.ln(10) pdf.image(f'{output_dir}/Fig3_{pdf_name}_results_cache_socket1.png', w=180) @@ -239,6 +264,94 @@ def create_report_performance(input_dir, output_dir, all_files, times : list[lis print(f'The report was create and saved to {output_dir}/{pdf_name}.pdf') +def cpupining_info(file, var): + with open(file, 'r') as f: + data_cpupins = json.load(f) + info_daq_application = json.dumps(data_cpupins['daq_application'][f'--name {var}'], skipkeys = True, allow_nan = True) + data_list = json.loads(info_daq_application) + + return data_list + + +def core_utilization(input_dir, file): + CPU_plot, User_plot = [], [] + + data_frame = pd.read_csv(f'{input_dir}{file}') + + print(data_frame) + + maxV = data_frame['CPU'].max() + minV = data_frame['CPU'].min() + + for j in range(minV, maxV + 1): + CPU_plot.append(j) + df = data_frame.loc[data_frame['CPU'] == j] + User_max = df['user (%)'].max() + User_plot.append(User_max) + + return CPU_plot, User_plot + + +def parse_cpu_cores(cpu_cores_i): + ranges = re.split(r',|-', cpu_cores_i) + cpu_cores = [] + for item in ranges: + if '-' in item: + start, end = map(int, item.split('-')) + cpu_cores.extend(range(start, end + 1)) + else: + cpu_cores.append(int(item)) + return cpu_cores + + +def extract_table_data(input_dir, file_core, data_list, emu_mode=False): + pinning_table, cpu_core_table, cpu_core_table_format, cpu_utilization_table, cpu_utilization_maximum_table, max_tmp = [], [], [], [], [], [] + cpu_core, cpu_utilization = core_utilization(input_dir, file_core) + denominator, sum_utilization = 0, 0 + + # Process data_list, and extract 'threads' sub-dictionary, and other data entries + for data_i, value_i in data_list.items(): + if data_i == 'threads': + for threads_i, cpu_cores_i in value_i.items(): + if emu_mode: + if threads_i in ['fakeprod-1..', 'fakeprod-2..', 'consumer-1..', 'consumer-2..', 'recording-1..', 'recording-2..', 'consumer-0', 'tpset-0', 'cleanup-0', 'recording-0', 'postproc-0-1..', 'postproc-0-2..']: + pinning_table.append(threads_i) + cpu_core_table.append(cpu_cores_i) + else: + pass + + else: + if threads_i in ['fakeprod-1..', 'fakeprod-2..']: + pass + else: + pinning_table.append(threads_i) + cpu_core_table.append(cpu_cores_i) + + else: + pinning_table.append(data_i) + cpu_core_table.append(value_i) + + # Calculate averages for each CPU core configuration + for cpu_cores_i in cpu_core_table: + try: + cpu_cores = parse_cpu_cores(cpu_cores_i) + cpu_core_table_format.append(cpu_cores) + except ValueError: + print(f'Check the format of the cpu pinning file. The [#,#] will not work.') + + for core_i in cpu_cores: + denominator += 1 + sum_utilization += cpu_utilization[core_i] + max_tmp.append(cpu_utilization[core_i]) + + utilization_average = round((sum_utilization / denominator), 2) + cpu_utilization_table.append(utilization_average) + cpu_utilization_maximum_table.append(max(max_tmp)) + denominator, sum_utilization = 0, 0 # Reset variables for the next iteration + + return pinning_table, cpu_core_table, cpu_utilization_maximum_table + + def daqconf_info(file_daqconf, file_core, input_dir, var, pdf, if_pdf=False, repin_threads_file=False): applist = load_json(file_daqconf) @@ -275,7 +388,7 @@ def daqconf_info(file_daqconf, file_core, input_dir, var, pdf, if_pdf=False, rep pdf.cell(0, 10, f'Table of CPU core pins information of {var_i}.') pdf.ln(10) else: - warn("Cannot cpu pinning parse file, path must be absolute") + warn("Cannot parse cpu pinning file, path must be absolute") def table_cpupins(columns_data, pdf, if_pdf=False):