From fe2cf94dfa89dd3cc788a3cefa35505fefd01fb7 Mon Sep 17 00:00:00 2001
From: ShyamB97 <bhuller.shyam@gmail.com>
Date: Thu, 3 Oct 2024 21:24:44 +0200
Subject: [PATCH] attempt plotting code cleanup

---
 tools/basic_functions.py             | 198 +-------------
 tools/basic_functions_performance.py | 395 +++++++++++++++++----------
 2 files changed, 259 insertions(+), 334 deletions(-)

diff --git a/tools/basic_functions.py b/tools/basic_functions.py
index fbb819e..bd2fd08 100644
--- a/tools/basic_functions.py
+++ b/tools/basic_functions.py
@@ -1,27 +1,19 @@
 import os
+import pathlib
 import json
 import re
-import matplotlib
-import matplotlib.pyplot as plt 
-import numpy as np
-import pandas as pd
-import pathlib
 import struct
+import pandas as pd
+
+from datetime import datetime as dt
 
 from urllib.parse import urljoin, urlencode
 from urllib.request import urlopen
 from urllib.error import URLError, HTTPError
 from http.client import HTTPResponse
 
-from datetime import datetime as dt
-from dateutil.parser import parse
-from tabulate import tabulate
-
 from rich import print
 
-color_list = ['red', 'blue', 'green', 'cyan', 'orange', 'navy', 'magenta', 'lime', 'purple', 'hotpink', 'olive', 'salmon', 'teal', 'darkblue', 'darkgreen', 'darkcyan', 'darkorange', 'deepskyblue', 'darkmagenta', 'sienna', 'chocolate'] 
-linestyle_list = ['solid', 'dotted', 'dashed', 'dashdot','solid', 'dotted', 'dashed', 'dashdot']
-marker_list = ['s','o','.','p','P','^','<','>','*','+','x','X','d','D','h','H'] 
 not_alma9_os = ['np04srv008', 'np04srv010', 'np04srv014', 'np04srv023', 'np04onl003', 'np04srv007', 'np04srv009', 'np04crt001']
 
 
@@ -84,12 +76,6 @@ def create_filename(test_args : dict, test_num : int) -> str:
         ])
 
 
-def directory(input_dir):
-    for dir_path in input_dir:
-        if not os.path.exists(dir_path):
-            os.makedirs(dir_path)
-
-
 def current_time():
     now = dt.now()
     current_dnt = now.strftime('%Y-%m-%d %H:%M:%S')
@@ -569,7 +555,7 @@ def process_files(input_dir, process_pcm_files=False, process_uprof_files=False,
 
 
 def break_file_name(file):
-    return file.split("/")[-1].split('-')
+    return file.split(".")[0].split("/")[-1].split('-')
 
 
 def sanitize_label(label):
@@ -628,180 +614,6 @@ def convert_to_24_hour_format(time_str):
     return time_24_hour
 
 
-def convert(s):
-    return list(map(lambda x: x, s))
-
-
-def get_column_val(df, columns, labels, file):
-    val = []
-    label = []
-    info = break_file_name(file)
-    
-    for j, (columns_j, label_j) in enumerate(zip(columns, labels)):
-        if columns_j in ['NewTime', 'Timestamp']:
-            continue
-        elif columns_j in ['Socket0 L2 Cache Hits']:
-            Y_tmp =  df['Socket0 L2 Cache Misses'].div(df[columns_j]+df['Socket0 L2 Cache Misses']).mul(100)
-            Y = Y_tmp.values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')
-        elif columns_j in ['Socket0 L3 Cache Hits']:
-            Y_tmp =  df['Socket0 L3 Cache Misses'].div(df[columns_j]+df['Socket0 L3 Cache Misses']).mul(100)
-            Y = Y_tmp.values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')  
-        elif columns_j in ['Socket1 L2 Cache Hits']:
-            Y_tmp = df['Socket1 L2 Cache Misses'].div(df[columns_j]+df['Socket1 L2 Cache Misses']).mul(100)
-            Y = Y_tmp.values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')  
-        elif columns_j in ['Socket1 L3 Cache Hits']:
-            Y_tmp = df['Socket1 L3 Cache Misses'].div(df[columns_j]+df['Socket1 L3 Cache Misses']).mul(100)
-            Y = Y_tmp.values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')  
-        elif columns_j in ['L2 Access (pti) Socket0']:
-            Y_tmp = df['L2 Miss (pti) Socket0'].div(df[columns_j]).mul(100)
-            Y = Y_tmp.values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')  
-        elif columns_j in ['L2 Access (pti) Socket1']:
-            Y_tmp = df['L2 Miss (pti) Socket1'].div(df[columns_j]).mul(100)
-            Y = Y_tmp.values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')
-        elif columns_j in ['L2 Access (pti) Socket1.1']:
-            Y_tmp = df['L2 Miss (pti) Socket1.1'].div(df[columns_j]).mul(100)
-            Y = Y_tmp.values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')  
-        elif columns_j in ['Socket0 L2 Cache Misses', 'Socket1 L2 Cache Misses', 'L2 Miss (pti) Socket0', 'L2 Miss (pti) Socket1', 'Socket0 L3 Cache Misses', 'Socket1 L3 Cache Misses', 'L3 Miss % Socket0', 'L3 Miss % Socket1', 'Ave L3 Miss Latency Socket0', 'Ave L3 Miss Latency Socket1']:
-            Y_tmp = df[columns_j].div(1)
-            Y = Y_tmp.values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')
-        elif columns_j in ['L3 Miss Socket0', 'L3 Miss Socket1', 'L3 Miss Socket1.1']:
-            Y_tmp = df[columns_j].div(1000000000)
-            Y = Y_tmp.values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')
-        elif columns_j in ['Socket0 Memory Bandwidth', 'Socket1 Memory Bandwidth']:
-            Y_tmp = df[columns_j].div(1000)
-            Y = Y_tmp.values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')
-        elif columns_j in ['Socket0 L2 Cache Misses Per Instruction', 'Socket1 L2 Cache Misses Per Instruction']:
-            Y_tmp = df[columns_j].mul(100)
-            Y = Y_tmp.values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')
-        elif columns_j in ['Package Joules Consumed Socket0 Energy Consumption', 'Package Joules Consumed Socket1 Energy Consumption']:
-            #Y_tmp = df[columns_j] - 40
-            Y_tmp = df[columns_j]
-            Y = Y_tmp.values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')  
-        elif columns_j in ['IRA Socket0', 'IRA Socket1']:
-            Y_tmp = df['Utilization (%) Socket1'].mul(0)
-            Y = Y_tmp.values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}') 
-        else:
-            Y = df[columns_j].values.tolist()
-            val.append(Y)
-            label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')
-    
-    return val, label
-
-
-def cpupining_info(file, var):
-    with open(file, 'r') as f:
-        data_cpupins = json.load(f)
-        info_daq_application = json.dumps(data_cpupins['daq_application'][f'--name {var}'], skipkeys = True, allow_nan = True)
-        data_list = json.loads(info_daq_application)
-        
-    return data_list
-
-
-def core_utilization(input_dir, file):
-    CPU_plot, User_plot = [], []
-    
-    data_frame = pd.read_csv(f'{input_dir}{file}')
-
-    print(data_frame)
-
-    maxV = data_frame['CPU'].max()
-    minV = data_frame['CPU'].min()
-
-    for j in range(minV, maxV + 1):
-        CPU_plot.append(j)
-        df = data_frame.loc[data_frame['CPU'] == j]
-        User_max = df['user (%)'].max()
-        User_plot.append(User_max)
-
-    return CPU_plot, User_plot
-
-
-def parse_cpu_cores(cpu_cores_i):
-    ranges = re.split(r',|-', cpu_cores_i)
-    cpu_cores = []
-    for item in ranges:
-        if '-' in item:
-            start, end = map(int, item.split('-'))
-            cpu_cores.extend(range(start, end + 1))
-        else:
-            cpu_cores.append(int(item))
-    return cpu_cores
-
-
-def extract_table_data(input_dir, file_core, data_list, emu_mode=False): 
-    pinning_table, cpu_core_table, cpu_core_table_format, cpu_utilization_table, cpu_utilization_maximum_table, max_tmp = [], [], [], [], [], []
-    cpu_core, cpu_utilization = core_utilization(input_dir, file_core)
-    denominator, sum_utilization = 0, 0
-
-    # Process data_list, and extract 'threads' sub-dictionary, and other data entries
-    for data_i, value_i in data_list.items():
-        if data_i == 'threads': 
-            for threads_i, cpu_cores_i in value_i.items():
-                    if emu_mode:
-                        if threads_i in ['fakeprod-1..', 'fakeprod-2..', 'consumer-1..', 'consumer-2..', 'recording-1..', 'recording-2..', 'consumer-0', 'tpset-0', 'cleanup-0', 'recording-0', 'postproc-0-1..', 'postproc-0-2..']:
-                            pinning_table.append(threads_i)
-                            cpu_core_table.append(cpu_cores_i)
-                        else:
-                            pass
-
-                    else:
-                        if threads_i in ['fakeprod-1..', 'fakeprod-2..']:
-                            pass
-                        else:
-                            pinning_table.append(threads_i)
-                            cpu_core_table.append(cpu_cores_i)                  
-        
-        else:
-            pinning_table.append(data_i)
-            cpu_core_table.append(value_i)
-
-    # Calculate averages for each CPU core configuration
-    for cpu_cores_i in cpu_core_table:
-        try:
-            cpu_cores = parse_cpu_cores(cpu_cores_i)
-            cpu_core_table_format.append(cpu_cores)
-        except ValueError:
-            print(f'Check the format of the cpu pinning file. The [#,#] will not work.')
-
-        for core_i in cpu_cores:
-            denominator += 1
-            sum_utilization += cpu_utilization[core_i] 
-            max_tmp.append(cpu_utilization[core_i])
-        
-        utilization_average = round((sum_utilization / denominator), 2)
-        cpu_utilization_table.append(utilization_average)
-        cpu_utilization_maximum_table.append(max(max_tmp))
-        denominator, sum_utilization = 0, 0   # Reset variables for the next iteration
-
-    return pinning_table, cpu_core_table, cpu_utilization_maximum_table
-
-
 def output_file_check(input_dir, file, output_dir, chunk_size):
     try:
         with open('{}/{}.out'.format(input_dir, file), 'rb') as f:
diff --git a/tools/basic_functions_performance.py b/tools/basic_functions_performance.py
index 7be57eb..b2c94a6 100644
--- a/tools/basic_functions_performance.py
+++ b/tools/basic_functions_performance.py
@@ -1,154 +1,179 @@
-from basic_functions import *
-from fpdf import FPDF 
-from fpdf.enums import XPos, YPos
-from PIL import Image
+import os
+import json
+import re
+
 from warnings import warn
 
-pcm_columns_list_0 = ['C0 Core C-state residency', 'Socket0 Memory Bandwidth',
-                    'Socket0 Instructions Per Cycle', 'Socket0 Instructions Retired Any (Million)',
-                    'Socket0 L2 Cache Misses', 'Socket0 L2 Cache Hits',
-                    'Socket0 L3 Cache Misses', 'Socket0 L3 Cache Hits']
-pcm_columns_list_1 = ['C0 Core C-state residency', 'Socket1 Memory Bandwidth',
-                    'Socket1 Instructions Per Cycle', 'Socket1 Instructions Retired Any (Million)',
-                    'Socket1 L2 Cache Misses', 'Socket1 L2 Cache Hits',
-                    'Socket1 L3 Cache Misses', 'Socket1 L3 Cache Hits']
-uprof_columns_list_0 = [' Utilization (%) Socket0', 'Total Mem Bw (GB/s) Socket0',
-                        'IPC (Sys + User) Socket0', 'IRA Socket0',   #<------------- we don't have this (IRA) data 
-                        'L2 Miss (pti) Socket0', 'L2 Access (pti) Socket0',
-                        'L3 Miss Socket0', 'L3 Miss % Socket0']
-uprof_columns_list_1 = ['Utilization (%) Socket1', 'Total Mem Bw (GB/s) Socket1',
-                        'IPC (Sys + User) Socket1', 'IRA Socket1',   #<------------- we don't have this (IRA) data 
-                        'L2 Miss (pti) Socket1', 'L2 Access (pti) Socket1',
-                        'L3 Miss Socket1', 'L3 Miss % Socket1']
-label_names = ['CPU Utilization (%)', 'Memory Bandwidth (GB/sec)',
-            'Instructions Per Cycle', 'Instructions Retired Any (Million)',
-            'L2 Cache Misses (Million)', 'L2 Cache [Misses/Accesses] (%)',
-            'L3 Cache Misses (Million)', 'L3 Cache [Misses/Accesses] (%)']
+import matplotlib
+import matplotlib.pyplot as plt
+import pandas as pd
+
+from fpdf import FPDF
+from fpdf.enums import XPos, YPos
+
+from basic_functions import break_file_name, current_time, load_json
+
+from rich import print
+
+color_list = ['red', 'blue', 'green', 'cyan', 'orange', 'navy', 'magenta', 'lime', 'purple', 'hotpink', 'olive', 'salmon', 'teal', 'darkblue', 'darkgreen', 'darkcyan', 'darkorange', 'deepskyblue', 'darkmagenta', 'sienna', 'chocolate']
+linestyle_list = ['solid', 'dotted', 'dashed', 'dashdot','solid', 'dotted', 'dashed', 'dashdot']
+marker_list = ['s','o','.','p','P','^','<','>','*','+','x','X','d','D','h','H']
+
+def pcm_columns_list(socket : int):
+    return {
+        'CPU Utilization (%)'                : 'C0 Core C-state residency',
+        'Memory Bandwidth (GB/sec)'          : f'Socket{socket} Memory Bandwidth',
+        'Instructions Per Cycle'             : f'Socket{socket} Instructions Per Cycle',
+        'Instructions Retired Any (Million)' : f'Socket{socket} Instructions Retired Any (Million)',
+        'L2 Cache Misses (Million)'          : f'Socket{socket} L2 Cache Misses',
+        'L2 Cache [Misses/Accesses] (%)'     : f'Socket{socket} L2 Cache Hits',
+        'L3 Cache Misses (Million)'          : f'Socket{socket} L3 Cache Misses',
+        'L3 Cache [Misses/Accesses] (%)'     : f'Socket{socket} L3 Cache Hits'
+    }
+
+
+def uprof_columns_list(socket : int):
+    return {
+        'CPU Utilization (%)'                : f' Utilization (%) Socket{socket}',
+        'Memory Bandwidth (GB/sec)'          : f'Total Mem Bw (GB/s) Socket{socket}',
+        'Instructions Per Cycle'             : f'IPC (Sys + User) Socket{socket}',
+        'Instructions Retired Any (Million)' : f'IRA Socket{socket}',   #<------------- we don't have this (IRA) data 
+        'L2 Cache Misses (Million)'          : f'L2 Miss (pti) Socket{socket}',
+        'L2 Cache [Misses/Accesses] (%)'     : f'L2 Access (pti) Socket{socket}',
+        'L3 Cache Misses (Million)'          : f'L3 Miss Socket{socket}',
+        'L3 Cache [Misses/Accesses] (%)'     : f'L3 Miss % Socket{socket}'
+    }
+
 label_columns = ['Socket0','Socket1']
 
-def plot_vars_comparison(input_dir, output_dir, all_files, pdf_name):
-    X_plot, Y_plot_0, Y_plot_1, label_plot_0, label_plot_1 = [], [], [], [], []
-    
-    for i, file_i in enumerate(all_files):    
-        info = break_file_name(file_i)
-        data_frame = pd.read_csv(f'{input_dir}{file_i}')
-        X_plot.append(data_frame['NewTime'].values.tolist())
 
-        Y_tmp_0, Y_tmp_1, label_tmp_0, label_tmp_1 = [], [], [], []
-        
-        if info[0]=='grafana':
-            for k, (columns_pcm_0, columns_pcm_1) in enumerate(zip(pcm_columns_list_0, pcm_columns_list_1)):
-                Y_0, label_0 = get_column_val(data_frame, [columns_pcm_0], [label_columns[0]], file_i)  
-                Y_1, label_1 = get_column_val(data_frame, [columns_pcm_1], [label_columns[1]], file_i)  
-                Y_tmp_0.append(Y_0)
-                label_tmp_0.append(label_0)
-                Y_tmp_1.append(Y_1)
-                label_tmp_1.append(label_1)
+def percentage(num : float, den : float) -> float:
+    return 100 * num / den
+
+
+def dict_rev(d : dict) -> dict:
+    return {v : k for k , v in d.items()}
+
+
+def get_column_val(df, columns, labels, file):
+    val = []
+    label = []
+    info = break_file_name(file)
+    
+    for (columns_j, label_j) in zip(columns, labels):
+        if columns_j in ['NewTime', 'Timestamp']:
+            continue
+        elif columns_j in ['Socket0 L2 Cache Hits', 'Socket0 L3 Cache Hits', 'Socket1 L2 Cache Hits', 'Socket1 L3 Cache Hits']:
+            socket = columns_j.split("Socket")[1][0]
+            cache = columns_j.split(" Cache")[0][-1]
+            Y = percentage(df[f"Socket{socket} L{cache} Cache Misses"], df[f"Socket{socket} L{cache} Cache Hits"] + df[f"Socket{socket} L{cache} Cache Misses"])
+        elif columns_j in ['L2 Access (pti) Socket0', 'L2 Access (pti) Socket1', 'L2 Access (pti) Socket1.1']:
+            socket = columns_j.split("L")[1][0]
+            cache = columns_j.split("Socket")[1]
+            Y = percentage(df[f'L{cache} Miss (pti) Socket{socket}'], df[f'L{cache} Access (pti) Socket{socket}'])
+        elif columns_j in ['Socket0 L2 Cache Misses', 'Socket1 L2 Cache Misses', 'L2 Miss (pti) Socket0', 'L2 Miss (pti) Socket1', 'Socket0 L3 Cache Misses', 'Socket1 L3 Cache Misses', 'L3 Miss % Socket0', 'L3 Miss % Socket1', 'Ave L3 Miss Latency Socket0', 'Ave L3 Miss Latency Socket1']:
+            Y = df[columns_j]
+        elif columns_j in ['L3 Miss Socket0', 'L3 Miss Socket1', 'L3 Miss Socket1.1']:
+            Y = df[columns_j].div(1_000_000_000)
+        elif columns_j in ['Socket0 Memory Bandwidth', 'Socket1 Memory Bandwidth']:
+            Y = df[columns_j].div(1000)
+        elif columns_j in ['Socket0 L2 Cache Misses Per Instruction', 'Socket1 L2 Cache Misses Per Instruction']:
+            Y = df[columns_j].mul(100)
+        elif columns_j in ['Package Joules Consumed Socket0 Energy Consumption', 'Package Joules Consumed Socket1 Energy Consumption']:
+            Y = df[columns_j]
+        elif columns_j in ['IRA Socket0', 'IRA Socket1']:
+            Y = df['Utilization (%) Socket1'].mul(0)
         else:
-            for k, (columns_uprof_0, columns_uprof_1) in enumerate(zip(uprof_columns_list_0, uprof_columns_list_1)):
-                Y_0, label_0 = get_column_val(data_frame, [columns_uprof_0], [label_columns[0]], file_i)
-                Y_1, label_1 = get_column_val(data_frame, [columns_uprof_1], [label_columns[1]], file_i)
-                Y_tmp_0.append(Y_0)
-                label_tmp_0.append(label_0)
-                Y_tmp_1.append(Y_1)
-                label_tmp_1.append(label_1)
+            Y = df[columns_j]
+        val.append(Y.values)
+        label.append(f'{info[1]} {info[5]} {info[2]} {label_j}')
     
-        Y_plot_0.append(Y_tmp_0)
-        label_plot_0.append(label_tmp_0)
-        Y_plot_1.append(Y_tmp_1)
-        label_plot_1.append(label_tmp_1)
+    return val, label
+
+def plot(ax : plt.Axes, x : list, y : list, x_label : str, y_label : str, colour : str, label : str, linestyle : str):
+    ax.plot(x, y, color=colour, label=label, linestyle=linestyle)
+    ax.set_ylabel(y_label)
+    ax.set_xlabel(x_label)
+
+    ax.grid(which='major', color='gray', linestyle='dashed')
+    ax.legend(loc='upper left')
+    return
+
+def plot_vars_comparison(output_dir, grafana_data : list[str], pdf_name):
+    X_plot = []
     
+    y_plot = {}
+    for file_i in grafana_data:    
+        info = break_file_name(file_i)
+        data_frame = pd.read_csv(file_i)
+        X_plot.append(data_frame['NewTime'].values)
+
+        if info[0] == "grafana":
+            column_generator = pcm_columns_list
+        else:
+            column_generator = uprof_columns_list
+
+        y = {}
+        for s in range(2):
+            y_tmp = {}
+            for c in column_generator(s).values():
+                v, _ = get_column_val(data_frame, [c], [label_columns[s]], file_i)
+                y_tmp[c] = v
+            plot_label = f"{info[2]} Socket{s}"
+            y[plot_label] = y_tmp
+        y_plot[f"{info[1]} {info[5]}"] = y
+
     # Here we make the plot:
     matplotlib.rcParams['font.family'] = 'DejaVu Serif'
     rows=cols=2
     rows_cols = rows*cols
-    fig, axs = plt.subplots(rows, cols, figsize=(18, 8))
-    plt.style.use('default')
-    axs = axs.flatten()
-    #axs[3].axis('off')
-    
-    for i in range(len(Y_plot_0)):  #number of files or tests
-        for j in range(len(Y_plot_0[i])):  #number of metrix
-            if j < rows_cols:
-                label0_ij0 = re.sub('_', ' ', label_plot_0[i][j][0])
-                axs[j].plot(X_plot[i], Y_plot_0[i][j][0], color=color_list[i], label=label0_ij0, linestyle=linestyle_list[0])
-                axs[j].set_ylabel(f'{label_names[j]}')
-                axs[j].set_xlabel('Time (min)')
-                axs[j].grid(which='major', color='gray', linestyle='dashed')
-                axs[j].legend(loc='upper left')
-            else:
-                pass
-                
-    plt.tight_layout()
-    plt.savefig(f'{output_dir}/Fig0_{pdf_name}_results_socket0.png')
-    print(f'{output_dir}/Fig0_{pdf_name}_results_socket0.png')
-    plt.close() 
-    
-    fig, axs = plt.subplots(rows, cols, figsize=(18, 8))
-    plt.style.use('default')
-    axs = axs.flatten()   
-    
-    for i in range(len(Y_plot_0)):  
-        for j in range(len(Y_plot_0[i])):
-            if j < rows_cols:
-                pass
-            else:
-                label0_ij0 = re.sub('_', ' ', label_plot_0[i][j][0])
-                axs[j-rows_cols].plot(X_plot[i], Y_plot_0[i][j][0], color=color_list[i], label=label0_ij0, linestyle=linestyle_list[0])
-                axs[j-rows_cols].set_ylabel(f'{label_names[j]}')
-                axs[j-rows_cols].set_xlabel('Time (min)')
-                axs[j-rows_cols].grid(which='major', color='gray', linestyle='dashed')
-                axs[j-rows_cols].legend(loc='upper left')
-                
-    plt.tight_layout()
-    plt.savefig(f'{output_dir}/Fig1_{pdf_name}_results_cache_socket0.png')
-    print(f'{output_dir}/Fig1_{pdf_name}_results_cache_socket0.png')
-    plt.close() 
-    
-    fig, axs = plt.subplots(rows, cols, figsize=(18, 8))
-    plt.style.use('default')
-    axs = axs.flatten()
-    
-    for i in range(len(Y_plot_1)):  
-        for j in range(len(Y_plot_1[i])):
-            if j < rows_cols:
-                label1_ij0 = re.sub('_', ' ', label_plot_1[i][j][0])
-                axs[j].plot(X_plot[i], Y_plot_1[i][j][0], color=color_list[i], label=label1_ij0, linestyle=linestyle_list[0])
-                axs[j].set_ylabel(f'{label_names[j]}')
-                axs[j].set_xlabel('Time (min)')
-                axs[j].grid(which='major', color='gray', linestyle='dashed')
-                axs[j].legend(loc='upper left')
-            else:
-                pass
-    
-    plt.tight_layout()
-    plt.savefig(f'{output_dir}/Fig2_{pdf_name}_results_socket1.png')
-    print(f'{output_dir}/Fig2_{pdf_name}_results_socket1.png')
-    plt.close() 
-    
-    fig, axs = plt.subplots(rows, cols, figsize=(18, 8))
-    plt.style.use('default')
-    axs = axs.flatten()
-    
-    for i in range(len(Y_plot_1)):  
-        for j in range(len(Y_plot_1[i])):
-            if j < rows_cols:
-                pass
-            else:
-                label1_ij0 = re.sub('_', ' ', label_plot_1[i][j][0])
-                axs[j-rows_cols].plot(X_plot[i], Y_plot_1[i][j][0], color=color_list[i], label=label1_ij0, linestyle=linestyle_list[0])
-                axs[j-rows_cols].set_ylabel(f'{label_names[j]}')
-                axs[j-rows_cols].set_xlabel('Time (min)')
-                axs[j-rows_cols].grid(which='major', color='gray', linestyle='dashed')
-                axs[j-rows_cols].legend(loc='upper left')
-    
-    plt.tight_layout()
-    plt.savefig(f'{output_dir}/Fig3_{pdf_name}_results_cache_socket1.png')
-    print(f'{output_dir}/Fig3_{pdf_name}_results_cache_socket1.png')
-    plt.close() 
+    fig_num = 0
+
+    for s in range(2):
+        y_labels = {**dict_rev(pcm_columns_list(s)), **dict_rev(uprof_columns_list(s))}
+        _, axs = plt.subplots(rows, cols, figsize=(18, 8))
+        plt.style.use('default')
+        axs = axs.flatten()
+        for i, (test, data) in enumerate(y_plot.items()):
+            for k in data:
+                if f"Socket{s}" in k: break
+            for j, (name, metric) in enumerate(data[k].items()):
+                if j < rows_cols:
+                    plot(axs[j], X_plot[i], metric[0], "Time (min)", y_labels[name], color_list[i], (test + " " + k).replace("_", " "), linestyle_list[0])
+                else:
+                    pass
+
+        plt.tight_layout()
+        out = f'{output_dir}/Fig{fig_num}_{pdf_name}_results_socket{s}.png'
+        plt.savefig(out)
+        print(out)
+        plt.close()
+        fig_num += 1
+
+    for s in range(2):
+        y_labels = {**dict_rev(pcm_columns_list(s)), **dict_rev(uprof_columns_list(s))}
+        _, axs = plt.subplots(rows, cols, figsize=(18, 8))
+        plt.style.use('default')
+        axs = axs.flatten()
+        for i, (test, data) in enumerate(y_plot.items()):
+            for k in data:
+                if f"Socket{s}" in k: break
+            for j, (name, metric) in enumerate(data[k].items()):
+                if j < rows_cols:
+                    pass
+                else:
+                    plot(axs[j - rows_cols], X_plot[i], metric[0], "Time (min)", y_labels[name], color_list[i], (test + " " + k).replace("_", " "), linestyle_list[0])
+
+        plt.tight_layout()
+        out = f'{output_dir}/Fig{fig_num}_{pdf_name}_results_cache_socket{s}.png'
+        plt.savefig(out)
+        print(out)
+        plt.close()
+        fig_num += 1
+    return
+
 
 def create_report_performance(input_dir, output_dir, all_files, times : list[list], readout_name, daqconf_files, core_utilization_files, parent_folder_dir, print_info=True, pdf_name='performance_report', repin_threads_file=[None], comment=['TBA']):
-    directory([input_dir, output_dir])
 
     # Open pdf file
     pdf = FPDF()
@@ -204,18 +229,18 @@ def create_report_performance(input_dir, output_dir, all_files, times : list[lis
     pdf.ln(10)
     
     #-------------------------------------------- FIGURES START ------------------------------------------------
-    plot_vars_comparison(input_dir, output_dir, all_files, pdf_name)
+    plot_vars_comparison(output_dir, all_files, pdf_name)
     
     if info[3] == '0' or info[3] == '01':
         pdf.image(f'{output_dir}/Fig0_{pdf_name}_results_socket0.png', w=180)
         pdf.write(5, 'Figure. Socket0 results of the tests ran using the metrics CPU Utilization (%), Memory Bandwidth (GB/sec), Instructions Per Cycle, Instructions Retired Any (Million).')
         pdf.ln(10)
-        pdf.image(f'{output_dir}/Fig1_{pdf_name}_results_cache_socket0.png', w=180)
+        pdf.image(f'{output_dir}/Fig2_{pdf_name}_results_cache_socket0.png', w=180)
         pdf.write(5, 'Figure. Socket0 results of the tests ran using the metrics L2 Cache Misses (Million), L2 Cache [Misses/Hits] (%), L3 Cache Misses (Million), and L3 Cache [Misses/Hits] (%).')
         pdf.ln(10)
         
     if info[3] == '1' or info[3] == '01':
-        pdf.image(f'{output_dir}/Fig2_{pdf_name}_results_socket1.png', w=180)
+        pdf.image(f'{output_dir}/Fig1_{pdf_name}_results_socket1.png', w=180)
         pdf.write(5, 'Figure. Socket1 results of the tests ran using the metrics CPU Utilization (%), Memory Bandwidth (GB/sec), Instructions Per Cycle, Instructions Retired Any (Million).')
         pdf.ln(10)
         pdf.image(f'{output_dir}/Fig3_{pdf_name}_results_cache_socket1.png', w=180)
@@ -239,6 +264,94 @@ def create_report_performance(input_dir, output_dir, all_files, times : list[lis
     print(f'The report was create and saved to {output_dir}/{pdf_name}.pdf')
 
 
+def cpupining_info(file, var):
+    with open(file, 'r') as f:
+        data_cpupins = json.load(f)
+        info_daq_application = json.dumps(data_cpupins['daq_application'][f'--name {var}'], skipkeys = True, allow_nan = True)
+        data_list = json.loads(info_daq_application)
+        
+    return data_list
+
+
+def core_utilization(input_dir, file):
+    CPU_plot, User_plot = [], []
+    
+    data_frame = pd.read_csv(f'{input_dir}{file}')
+
+    print(data_frame)
+
+    maxV = data_frame['CPU'].max()
+    minV = data_frame['CPU'].min()
+
+    for j in range(minV, maxV + 1):
+        CPU_plot.append(j)
+        df = data_frame.loc[data_frame['CPU'] == j]
+        User_max = df['user (%)'].max()
+        User_plot.append(User_max)
+
+    return CPU_plot, User_plot
+
+
+def parse_cpu_cores(cpu_cores_i):
+    ranges = re.split(r',|-', cpu_cores_i)
+    cpu_cores = []
+    for item in ranges:
+        if '-' in item:
+            start, end = map(int, item.split('-'))
+            cpu_cores.extend(range(start, end + 1))
+        else:
+            cpu_cores.append(int(item))
+    return cpu_cores
+
+
+def extract_table_data(input_dir, file_core, data_list, emu_mode=False): 
+    pinning_table, cpu_core_table, cpu_core_table_format, cpu_utilization_table, cpu_utilization_maximum_table, max_tmp = [], [], [], [], [], []
+    cpu_core, cpu_utilization = core_utilization(input_dir, file_core)
+    denominator, sum_utilization = 0, 0
+
+    # Process data_list, and extract 'threads' sub-dictionary, and other data entries
+    for data_i, value_i in data_list.items():
+        if data_i == 'threads': 
+            for threads_i, cpu_cores_i in value_i.items():
+                    if emu_mode:
+                        if threads_i in ['fakeprod-1..', 'fakeprod-2..', 'consumer-1..', 'consumer-2..', 'recording-1..', 'recording-2..', 'consumer-0', 'tpset-0', 'cleanup-0', 'recording-0', 'postproc-0-1..', 'postproc-0-2..']:
+                            pinning_table.append(threads_i)
+                            cpu_core_table.append(cpu_cores_i)
+                        else:
+                            pass
+
+                    else:
+                        if threads_i in ['fakeprod-1..', 'fakeprod-2..']:
+                            pass
+                        else:
+                            pinning_table.append(threads_i)
+                            cpu_core_table.append(cpu_cores_i)                  
+        
+        else:
+            pinning_table.append(data_i)
+            cpu_core_table.append(value_i)
+
+    # Calculate averages for each CPU core configuration
+    for cpu_cores_i in cpu_core_table:
+        try:
+            cpu_cores = parse_cpu_cores(cpu_cores_i)
+            cpu_core_table_format.append(cpu_cores)
+        except ValueError:
+            print(f'Check the format of the cpu pinning file. The [#,#] will not work.')
+
+        for core_i in cpu_cores:
+            denominator += 1
+            sum_utilization += cpu_utilization[core_i] 
+            max_tmp.append(cpu_utilization[core_i])
+        
+        utilization_average = round((sum_utilization / denominator), 2)
+        cpu_utilization_table.append(utilization_average)
+        cpu_utilization_maximum_table.append(max(max_tmp))
+        denominator, sum_utilization = 0, 0   # Reset variables for the next iteration
+
+    return pinning_table, cpu_core_table, cpu_utilization_maximum_table
+
+
 def daqconf_info(file_daqconf, file_core, input_dir, var, pdf, if_pdf=False, repin_threads_file=False):
     applist = load_json(file_daqconf)
 
@@ -275,7 +388,7 @@ def daqconf_info(file_daqconf, file_core, input_dir, var, pdf, if_pdf=False, rep
             pdf.cell(0, 10, f'Table of CPU core pins information of {var_i}.')
             pdf.ln(10)
         else:
-            warn("Cannot cpu pinning parse file, path must be absolute")
+            warn("Cannot parse cpu pinning file, path must be absolute")
 
 
 def table_cpupins(columns_data, pdf, if_pdf=False):