From 8d7b7e76f65c7ad60d7a950e2f1b8cb992eedd3e Mon Sep 17 00:00:00 2001 From: simonge Date: Wed, 8 Jan 2025 16:01:14 +0000 Subject: [PATCH 1/7] Added pytorch training scripts --- .../RegressionModel.py | 104 ++++++++++++++++++ .../TaggerRegression.py | 50 +++++++++ 2 files changed, 154 insertions(+) create mode 100644 benchmarks/LOWQ2/reconstruction_training/RegressionModel.py create mode 100644 benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py diff --git a/benchmarks/LOWQ2/reconstruction_training/RegressionModel.py b/benchmarks/LOWQ2/reconstruction_training/RegressionModel.py new file mode 100644 index 00000000..589779ef --- /dev/null +++ b/benchmarks/LOWQ2/reconstruction_training/RegressionModel.py @@ -0,0 +1,104 @@ +import torch +import torch.nn as nn +import torch.optim as optim +import numpy as np + +class RegressionModel(nn.Module): + def __init__(self): + super(RegressionModel, self).__init__() + self.fc1 = nn.Linear(4, 512) + self.fc2 = nn.Linear(512, 64) + self.fc4 = nn.Linear(64, 3) + self.input_mean = torch.tensor([0.0, 0.0, 0.0, 0.0]) + self.input_std = torch.tensor([1.0, 1.0, 1.0, 1.0]) + self.input_covariance = torch.tensor([[1.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 0.0], + [0.0, 0.0, 0.0, 1.0]]) + self.output_mean = torch.tensor([0.0, 0.0, 0.0]) + self.output_std = torch.tensor([1.0, 1.0, 1.0]) + self.output_correlation = torch.tensor([[1.0, 0.0, 0.0], + [0.0, 1.0, 0.0], + [0.0, 0.0, 1.0]]) + + def forward(self, x): + x = (x-self.input_mean)/self.input_std + x = torch.tanh(self.fc1(x)) + x = torch.tanh(self.fc2(x)) + x = self.fc4(x) + x = x*self.output_std + self.output_mean + return x + + def adapt(self, input_data, output_data): + in_mean = input_data.mean(axis=0) + in_std = input_data.std (axis=0) + self.input_mean = torch.tensor(in_mean) + self.input_std = torch.tensor(in_std) + + # Calculate the correlation matrix of the input data + input_normalized = (input_data-in_mean)/in_std + input_correlation = np.corrcoef(input_normalized, rowvar=False) + # Invert the correlation matrix and convert into float tensor + self.input_covariance = torch.tensor(np.linalg.inv(input_correlation).astype(np.float32)) + + self.output_mean = torch.tensor(output_data.mean(axis=0)) + self.output_std = torch.tensor(output_data.std (axis=0)) + +def makeModel(): + # Create the model + model = RegressionModel() + # Define the optimizer + optimizer = optim.Adam(model.parameters(), lr=0.0001) + + # Define the loss function + criterion = nn.MSELoss() + + return model, optimizer, criterion + +def trainModel(epochs, train_loader, val_loader): + + # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + # print(f"Using device: {device}") + + model, optimizer, criterion = makeModel() + # model.to(device) + + # Verify that the model parameters are on the GPU + # for name, param in model.named_parameters(): + # print(f"{name} is on {param.device}") + + # Adapt the model using the training data from the training loader + model.adapt(train_loader.dataset.tensors[0].detach().numpy(), train_loader.dataset.tensors[1].detach().numpy()) + + for epoch in range(epochs): + model.train() + running_loss = 0.0 + for inputs, targets in train_loader: + # inputs, targets = inputs.to(device), targets.to(device) + optimizer.zero_grad() + outputs = model(inputs) + loss = criterion(outputs, targets) + loss.backward() + optimizer.step() + running_loss += loss.item() * inputs.size(0) + + epoch_loss = running_loss / len(train_loader.dataset) + # print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}") + + + # Validation step + model.eval() + val_loss = 0.0 + with torch.no_grad(): + for val_inputs, val_targets in val_loader: + # val_inputs, val_targets = val_inputs.to(device), val_targets.to(device) + val_outputs = model(val_inputs) + val_loss += criterion(val_outputs, val_targets).item() * val_inputs.size(0) + # val_outputs = model(val_input) + # val_loss = criterion(val_outputs, val_target) + + val_loss /= len(val_loader.dataset) + + print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss}, Val Loss: {val_loss}") + + return model \ No newline at end of file diff --git a/benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py b/benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py new file mode 100644 index 00000000..6e7eb8a7 --- /dev/null +++ b/benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py @@ -0,0 +1,50 @@ +# import edm4hep +import torch +import argparse +from ProcessData import create_arrays +from torch.utils.data import DataLoader, TensorDataset +from RegressionModel import makeModel, trainModel + +# Parse arguments +parser = argparse.ArgumentParser(description='Train a regression model for the Tagger.') +parser.add_argument('--dataFiles', type=str, nargs='+', help='Path to the data files') +parser.add_argument('--outModelFile', type=str, default="regression_model.onnx", help='Output file for the trained model') +args = parser.parse_args() +dataFiles = args.dataFiles +outModelFile = args.outModelFile + +input_data, target_data = create_arrays(dataFiles) + +torch_input_data = torch.tensor(input_data) +torch_target_data = torch.tensor(target_data) + +# Split data into training and validation sets +validation_fraction = 0.25 +split_index = int(len(torch_input_data) * (1 - validation_fraction)) +val_input_data = torch_input_data[split_index:] +val_target_data = torch_target_data[split_index:] +train_input_data = torch_input_data[:split_index] +train_target_data = torch_target_data[:split_index] + +# Create TensorDatasets +train_dataset = TensorDataset(train_input_data, train_target_data) +val_dataset = TensorDataset(val_input_data, val_target_data) +batch_size = 4096 + +# Create DataLoaders +train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True ) +val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) + +print(f"Training data: {len(train_input_data)} samples") + +epochs = 2000 +model = trainModel(epochs, train_loader, val_loader) + +# Save the trained model to ONNX format +dummy_input = torch_input_data[0].unsqueeze(0) # Create a dummy input for the model + +torch.onnx.export(model, dummy_input, outModelFile, + input_names=['input'], output_names=['output'], + dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}) + +print(f"Model has been saved to {outModelFile}") \ No newline at end of file From 25a3706a1b86afa8c7b52c06a15d6ecb634e23da Mon Sep 17 00:00:00 2001 From: simonge Date: Wed, 8 Jan 2025 16:08:11 +0000 Subject: [PATCH 2/7] Update formatting --- .../RegressionModel.py | 21 +++++++++---------- .../TaggerRegression.py | 13 ++++++------ 2 files changed, 17 insertions(+), 17 deletions(-) rename benchmarks/{LOWQ2 => lowq2}/reconstruction_training/RegressionModel.py (82%) rename benchmarks/{LOWQ2 => lowq2}/reconstruction_training/TaggerRegression.py (84%) diff --git a/benchmarks/LOWQ2/reconstruction_training/RegressionModel.py b/benchmarks/lowq2/reconstruction_training/RegressionModel.py similarity index 82% rename from benchmarks/LOWQ2/reconstruction_training/RegressionModel.py rename to benchmarks/lowq2/reconstruction_training/RegressionModel.py index 589779ef..17e1031f 100644 --- a/benchmarks/LOWQ2/reconstruction_training/RegressionModel.py +++ b/benchmarks/lowq2/reconstruction_training/RegressionModel.py @@ -11,15 +11,15 @@ def __init__(self): self.fc4 = nn.Linear(64, 3) self.input_mean = torch.tensor([0.0, 0.0, 0.0, 0.0]) self.input_std = torch.tensor([1.0, 1.0, 1.0, 1.0]) - self.input_covariance = torch.tensor([[1.0, 0.0, 0.0, 0.0], - [0.0, 1.0, 0.0, 0.0], - [0.0, 0.0, 1.0, 0.0], - [0.0, 0.0, 0.0, 1.0]]) + # self.input_covariance = torch.tensor([[1.0, 0.0, 0.0, 0.0], + # [0.0, 1.0, 0.0, 0.0], + # [0.0, 0.0, 1.0, 0.0], + # [0.0, 0.0, 0.0, 1.0]]) self.output_mean = torch.tensor([0.0, 0.0, 0.0]) self.output_std = torch.tensor([1.0, 1.0, 1.0]) - self.output_correlation = torch.tensor([[1.0, 0.0, 0.0], - [0.0, 1.0, 0.0], - [0.0, 0.0, 1.0]]) + # self.output_correlation = torch.tensor([[1.0, 0.0, 0.0], + # [0.0, 1.0, 0.0], + # [0.0, 0.0, 1.0]]) def forward(self, x): x = (x-self.input_mean)/self.input_std @@ -36,10 +36,10 @@ def adapt(self, input_data, output_data): self.input_std = torch.tensor(in_std) # Calculate the correlation matrix of the input data - input_normalized = (input_data-in_mean)/in_std - input_correlation = np.corrcoef(input_normalized, rowvar=False) + # input_normalized = (input_data-in_mean)/in_std + # input_correlation = np.corrcoef(input_normalized, rowvar=False) # Invert the correlation matrix and convert into float tensor - self.input_covariance = torch.tensor(np.linalg.inv(input_correlation).astype(np.float32)) + # self.input_covariance = torch.tensor(np.linalg.inv(input_correlation).astype(np.float32)) self.output_mean = torch.tensor(output_data.mean(axis=0)) self.output_std = torch.tensor(output_data.std (axis=0)) @@ -49,7 +49,6 @@ def makeModel(): model = RegressionModel() # Define the optimizer optimizer = optim.Adam(model.parameters(), lr=0.0001) - # Define the loss function criterion = nn.MSELoss() diff --git a/benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py b/benchmarks/lowq2/reconstruction_training/TaggerRegression.py similarity index 84% rename from benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py rename to benchmarks/lowq2/reconstruction_training/TaggerRegression.py index 6e7eb8a7..e0575126 100644 --- a/benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py +++ b/benchmarks/lowq2/reconstruction_training/TaggerRegression.py @@ -20,10 +20,11 @@ # Split data into training and validation sets validation_fraction = 0.25 -split_index = int(len(torch_input_data) * (1 - validation_fraction)) -val_input_data = torch_input_data[split_index:] -val_target_data = torch_target_data[split_index:] -train_input_data = torch_input_data[:split_index] +split_index = int(len(torch_input_data) * (1 - validation_fraction)) + +val_input_data = torch_input_data[split_index:] +val_target_data = torch_target_data[split_index:] +train_input_data = torch_input_data[:split_index] train_target_data = torch_target_data[:split_index] # Create TensorDatasets @@ -37,8 +38,8 @@ print(f"Training data: {len(train_input_data)} samples") -epochs = 2000 -model = trainModel(epochs, train_loader, val_loader) +epochs = 100 +model = trainModel(epochs, train_loader, val_loader) # Save the trained model to ONNX format dummy_input = torch_input_data[0].unsqueeze(0) # Create a dummy input for the model From 08117088c44a88e37c06f88320d4811e0228da43 Mon Sep 17 00:00:00 2001 From: simonge Date: Wed, 8 Jan 2025 16:33:28 +0000 Subject: [PATCH 3/7] Added (copied) Snakefile --- Snakefile | 1 + .../lowq2/reconstruction_training/Snakefile | 48 +++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 benchmarks/lowq2/reconstruction_training/Snakefile diff --git a/Snakefile b/Snakefile index 14e28f0e..0cae3fb7 100644 --- a/Snakefile +++ b/Snakefile @@ -51,6 +51,7 @@ include: "benchmarks/insert_tau/Snakefile" include: "benchmarks/femc_electron/Snakefile" include: "benchmarks/femc_photon/Snakefile" include: "benchmarks/femc_pi0/Snakefile" +include: "benchmarks/lowq2/reconstruction_training/Snakefile" use_s3 = config["remote_provider"].lower() == "s3" use_xrootd = config["remote_provider"].lower() == "xrootd" diff --git a/benchmarks/lowq2/reconstruction_training/Snakefile b/benchmarks/lowq2/reconstruction_training/Snakefile new file mode 100644 index 00000000..1e028329 --- /dev/null +++ b/benchmarks/lowq2/reconstruction_training/Snakefile @@ -0,0 +1,48 @@ +rule calo_pid_recon: + input: + "root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.08.1/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run6.ab.0001.eicrecon.tree.edm4eic.root", + output: + "sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root", + log: + "sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root.log", + wildcard_constraints: + INDEX="\d{4}", + shell: """ +set -m # monitor mode to prevent lingering processes +exec env DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \ + eicrecon {input} -Ppodio:output_file={output} \ + -Ppodio:output_collections=MCParticles,EcalEndcapNRecHits,EcalEndcapNClusters,EcalEndcapNParticleIDInput_features,EcalEndcapNParticleIDTarget +""" + + +rule calo_pid: + input: + electrons=expand( + "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root", + PARTICLE=["e-"], + ENERGY=["100MeVto20GeV"], + PHASE_SPACE=["130to177deg"], + INDEX=range(100), + ), + pions=expand( + "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root", + PARTICLE=["pi-"], + ENERGY=["100MeVto20GeV"], + PHASE_SPACE=["130to177deg"], + INDEX=range(100), + ), + matplotlibrc=".matplotlibrc", + script="benchmarks/lowq2/reconstruction_training/TaggerRegression.py", + output: + directory("results/{DETECTOR_CONFIG}/calo_pid") + shell: + """ +env \ +MATPLOTLIBRC={input.matplotlibrc} \ +DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \ +PLOT_TITLE={wildcards.DETECTOR_CONFIG} \ +INPUT_ELECTRONS="{input.electrons}" \ +INPUT_PIONS="{input.pions}" \ +OUTPUT_DIR={output} \ +python {input.script} +""" From 15598be1c91017eb24aba9471de910a84a54dae7 Mon Sep 17 00:00:00 2001 From: simonge Date: Wed, 8 Jan 2025 17:27:30 +0000 Subject: [PATCH 4/7] Update to snakefiles --- Snakefile | 42 +++++------ .../lowq2/reconstruction_training/Snakefile | 69 ++++++++----------- 2 files changed, 50 insertions(+), 61 deletions(-) diff --git a/Snakefile b/Snakefile index 0cae3fb7..d6f21d01 100644 --- a/Snakefile +++ b/Snakefile @@ -30,27 +30,27 @@ def find_epic_libraries(): return libs -include: "benchmarks/backgrounds/Snakefile" -include: "benchmarks/backwards_ecal/Snakefile" -include: "benchmarks/barrel_ecal/Snakefile" -include: "benchmarks/calo_pid/Snakefile" -include: "benchmarks/ecal_gaps/Snakefile" -include: "benchmarks/material_scan/Snakefile" -include: "benchmarks/tracking_performances/Snakefile" -include: "benchmarks/tracking_performances_dis/Snakefile" -include: "benchmarks/lfhcal/Snakefile" -include: "benchmarks/zdc_lyso/Snakefile" -include: "benchmarks/zdc_neutron/Snakefile" -include: "benchmarks/insert_muon/Snakefile" -include: "benchmarks/zdc_lambda/Snakefile" -include: "benchmarks/zdc_photon/Snakefile" -include: "benchmarks/zdc_pi0/Snakefile" -include: "benchmarks/zdc_sigma/Snakefile" -include: "benchmarks/insert_neutron/Snakefile" -include: "benchmarks/insert_tau/Snakefile" -include: "benchmarks/femc_electron/Snakefile" -include: "benchmarks/femc_photon/Snakefile" -include: "benchmarks/femc_pi0/Snakefile" +# include: "benchmarks/backgrounds/Snakefile" +# include: "benchmarks/backwards_ecal/Snakefile" +# include: "benchmarks/barrel_ecal/Snakefile" +# include: "benchmarks/calo_pid/Snakefile" +# include: "benchmarks/ecal_gaps/Snakefile" +# include: "benchmarks/material_scan/Snakefile" +# include: "benchmarks/tracking_performances/Snakefile" +# include: "benchmarks/tracking_performances_dis/Snakefile" +# include: "benchmarks/lfhcal/Snakefile" +# include: "benchmarks/zdc_lyso/Snakefile" +# include: "benchmarks/zdc_neutron/Snakefile" +# include: "benchmarks/insert_muon/Snakefile" +# include: "benchmarks/zdc_lambda/Snakefile" +# include: "benchmarks/zdc_photon/Snakefile" +# include: "benchmarks/zdc_pi0/Snakefile" +# include: "benchmarks/zdc_sigma/Snakefile" +# include: "benchmarks/insert_neutron/Snakefile" +# include: "benchmarks/insert_tau/Snakefile" +# include: "benchmarks/femc_electron/Snakefile" +# include: "benchmarks/femc_photon/Snakefile" +# include: "benchmarks/femc_pi0/Snakefile" include: "benchmarks/lowq2/reconstruction_training/Snakefile" use_s3 = config["remote_provider"].lower() == "s3" diff --git a/benchmarks/lowq2/reconstruction_training/Snakefile b/benchmarks/lowq2/reconstruction_training/Snakefile index 1e028329..62e00465 100644 --- a/benchmarks/lowq2/reconstruction_training/Snakefile +++ b/benchmarks/lowq2/reconstruction_training/Snakefile @@ -1,48 +1,37 @@ -rule calo_pid_recon: - input: - "root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.08.1/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run6.ab.0001.eicrecon.tree.edm4eic.root", +rule lowq2_tensor_recon: output: - "sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root", + "tensors.eicrecon.tree.edm4eic.root", log: - "sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root.log", - wildcard_constraints: - INDEX="\d{4}", - shell: """ -set -m # monitor mode to prevent lingering processes -exec env DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \ - eicrecon {input} -Ppodio:output_file={output} \ - -Ppodio:output_collections=MCParticles,EcalEndcapNRecHits,EcalEndcapNClusters,EcalEndcapNParticleIDInput_features,EcalEndcapNParticleIDTarget -""" + "tensors.eicrecon.tree.edm4eic.root.log", + shell: + """ + eicrecon root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.08.1/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run6.ab.0001.eicrecon.tree.edm4eic.root -Ppodio:output_file={output} \ + -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor \ + -Pjana:nevents=1000 + """ -rule calo_pid: +rule lowq2_reconstruction_training: + input: + data="tensors.eicrecon.tree.edm4eic.root", + script="TaggerRegression.py", + output: + "TaggerRegressionTest.onnx", + shell: + """ + python {input.script} --input {input.data} --outModelFile {output} + """ + +rule lowq2_reconstruction_inference: input: - electrons=expand( - "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root", - PARTICLE=["e-"], - ENERGY=["100MeVto20GeV"], - PHASE_SPACE=["130to177deg"], - INDEX=range(100), - ), - pions=expand( - "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root", - PARTICLE=["pi-"], - ENERGY=["100MeVto20GeV"], - PHASE_SPACE=["130to177deg"], - INDEX=range(100), - ), - matplotlibrc=".matplotlibrc", - script="benchmarks/lowq2/reconstruction_training/TaggerRegression.py", + data="tensors.eicrecon.tree.edm4eic.root", + model="TaggerRegressionTest.onnx", output: - directory("results/{DETECTOR_CONFIG}/calo_pid") + "TaggerInference.eicrecon.tree.edm4eic.root", shell: """ -env \ -MATPLOTLIBRC={input.matplotlibrc} \ -DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \ -PLOT_TITLE={wildcards.DETECTOR_CONFIG} \ -INPUT_ELECTRONS="{input.electrons}" \ -INPUT_PIONS="{input.pions}" \ -OUTPUT_DIR={output} \ -python {input.script} -""" + eicrecon {input.data} -Ppodio:output_file={output} + -Ptagger:inference_model={input.model} + -Pjana:nevents=1000 + -PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model} + """ \ No newline at end of file From c5f076c88c1f622eaa774ecd94e781e8a7c49599 Mon Sep 17 00:00:00 2001 From: simonge Date: Thu, 9 Jan 2025 15:32:48 +0000 Subject: [PATCH 5/7] Updated to work with snakemake locally --- .../reconstruction_training/ProcessData.py | 20 ++++++++ .../lowq2/reconstruction_training/Snakefile | 46 +++++++++++++++---- 2 files changed, 56 insertions(+), 10 deletions(-) create mode 100644 benchmarks/lowq2/reconstruction_training/ProcessData.py diff --git a/benchmarks/lowq2/reconstruction_training/ProcessData.py b/benchmarks/lowq2/reconstruction_training/ProcessData.py new file mode 100644 index 00000000..faf2dc10 --- /dev/null +++ b/benchmarks/lowq2/reconstruction_training/ProcessData.py @@ -0,0 +1,20 @@ +import uproot +import awkward as ak + +def create_arrays(dataFiles): + + # List of branches to load + branches = ["_TaggerTrackerFeatureTensor_shape","_TaggerTrackerFeatureTensor_floatData","_TaggerTrackerTargetTensor_floatData"] + + # Load data from concatenated list of files + data = uproot.concatenate([f"{file}:events" for file in dataFiles], branches, library="ak") + + # Filter events with at least one track + num_tracks = data["_TaggerTrackerFeatureTensor_shape"][:,0] + filtered_data = data[num_tracks == 1] + + input_data = filtered_data["_TaggerTrackerFeatureTensor_floatData"] + target_data = filtered_data["_TaggerTrackerTargetTensor_floatData"] + + return input_data, target_data + diff --git a/benchmarks/lowq2/reconstruction_training/Snakefile b/benchmarks/lowq2/reconstruction_training/Snakefile index 62e00465..1792b772 100644 --- a/benchmarks/lowq2/reconstruction_training/Snakefile +++ b/benchmarks/lowq2/reconstruction_training/Snakefile @@ -1,37 +1,63 @@ +# Creates or copies the feature and target tensors for the lowq2 reconstruction training into a new file. rule lowq2_tensor_recon: output: "tensors.eicrecon.tree.edm4eic.root", log: "tensors.eicrecon.tree.edm4eic.root.log", + params: + input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,3), number=range(0, 20)), + compact_xml="epic_lowq2.xml", shell: """ - eicrecon root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.08.1/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run6.ab.0001.eicrecon.tree.edm4eic.root -Ppodio:output_file={output} \ + eicrecon {params.input} -Ppodio:output_file={output} \ -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor \ - -Pjana:nevents=1000 + -Pplugins_to_ignore=janatop,LUMISPECCAL,ECTOF,BTOF,FOFFMTRK,RPOTS,B0TRK,MPGD,ECTRK,DRICH,DIRC,pid,tracking,acts,EEMC,BEMC,FEMC,EHCAL,BHCAL,FHCAL,B0ECAL,ZDC,BTRK,BVTX,PFRICH,richgeo,evaluator,pid_lut,reco,rootfile \ + -Pdd4hep:xml_files={params.compact_xml} \ """ - +# Trains a regression model to predict the TaggerTrackerTargetTensor from the TaggerTrackerFeatureTensor. rule lowq2_reconstruction_training: input: data="tensors.eicrecon.tree.edm4eic.root", script="TaggerRegression.py", output: - "TaggerRegressionTest.onnx", + "TestTaggerRegression.onnx", shell: """ - python {input.script} --input {input.data} --outModelFile {output} + python {input.script} --dataFiles {input.data} --outModelFile {output} """ +# Runs the inference with the default model. rule lowq2_reconstruction_inference: input: data="tensors.eicrecon.tree.edm4eic.root", - model="TaggerRegressionTest.onnx", output: "TaggerInference.eicrecon.tree.edm4eic.root", + params: + compact_xml="epic_lowq2.xml", + shell: + """ + eicrecon {input.data} -Ppodio:output_file={output} \ + -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor,TaggerTrackerPredictionTensor,TaggerTrackerReconstructedParticles \ + -Pplugins_to_ignore=janatop,LUMISPECCAL,ECTOF,BTOF,FOFFMTRK,RPOTS,B0TRK,MPGD,ECTRK,DRICH,DIRC,pid,tracking,acts,EEMC,BEMC,FEMC,EHCAL,BHCAL,FHCAL,B0ECAL,ZDC,BTRK,BVTX,PFRICH,richgeo,evaluator,pid_lut,reco,rootfile \ + -Pdd4hep:xml_files={params.compact_xml} \ + """ + +# Runs the inference with the new model. +rule lowq2_reconstruction_new_inference: + input: + data="tensors.eicrecon.tree.edm4eic.root", + model="TestTaggerRegression.onnx", + output: + "TaggerInferenceNew.eicrecon.tree.edm4eic.root", + params: + compact_xml="epic_lowq2.xml", shell: """ - eicrecon {input.data} -Ppodio:output_file={output} - -Ptagger:inference_model={input.model} - -Pjana:nevents=1000 - -PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model} + eicrecon {input.data} -Ppodio:output_file={output} \ + -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor,TaggerTrackerPredictionTensor,TaggerTrackerReconstructedParticles \ + -Pplugins_to_ignore=janatop,LUMISPECCAL,ECTOF,BTOF,FOFFMTRK,RPOTS,B0TRK,MPGD,ECTRK,DRICH,DIRC,pid,tracking,acts,EEMC,BEMC,FEMC,EHCAL,BHCAL,FHCAL,B0ECAL,ZDC,BTRK,BVTX,PFRICH,richgeo,evaluator,pid_lut,reco,rootfile \ + -Pdd4hep:xml_files={params.compact_xml} \ + -PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model} \ + -Peicrecon:LogLevel=error \ """ \ No newline at end of file From 4646a89c276f3fa1081df7a54e576ac39c0706d6 Mon Sep 17 00:00:00 2001 From: simonge Date: Thu, 9 Jan 2025 17:51:44 +0000 Subject: [PATCH 6/7] First go at full workflow (pending other PRs) --- .gitlab-ci.yml | 106 ++++----- .../lowq2/reconstruction_training/Snakefile | 56 ++++- .../TaggerRegression.py | 2 +- .../reconstruction_training/TestModel.py | 211 ++++++++++++++++++ 4 files changed, 310 insertions(+), 65 deletions(-) create mode 100644 benchmarks/lowq2/reconstruction_training/TestModel.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f5f7b7c5..1a6202d9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -125,62 +125,64 @@ get_data: - runner_system_failure include: - - local: 'benchmarks/backgrounds/config.yml' - - local: 'benchmarks/backwards_ecal/config.yml' - - local: 'benchmarks/calo_pid/config.yml' - - local: 'benchmarks/ecal_gaps/config.yml' - - local: 'benchmarks/tracking_detectors/config.yml' - - local: 'benchmarks/tracking_performances/config.yml' - - local: 'benchmarks/tracking_performances_dis/config.yml' - - local: 'benchmarks/barrel_ecal/config.yml' - - local: 'benchmarks/barrel_hcal/config.yml' - - local: 'benchmarks/lfhcal/config.yml' - - local: 'benchmarks/zdc/config.yml' - - local: 'benchmarks/zdc_lyso/config.yml' - - local: 'benchmarks/zdc_neutron/config.yml' - - local: 'benchmarks/zdc_photon/config.yml' - - local: 'benchmarks/zdc_pi0/config.yml' - - local: 'benchmarks/material_scan/config.yml' - - local: 'benchmarks/pid/config.yml' - - local: 'benchmarks/timing/config.yml' - - local: 'benchmarks/b0_tracker/config.yml' - - local: 'benchmarks/insert_muon/config.yml' - - local: 'benchmarks/insert_tau/config.yml' - - local: 'benchmarks/zdc_sigma/config.yml' - - local: 'benchmarks/zdc_lambda/config.yml' - - local: 'benchmarks/insert_neutron/config.yml' - - local: 'benchmarks/femc_electron/config.yml' - - local: 'benchmarks/femc_photon/config.yml' - - local: 'benchmarks/femc_pi0/config.yml' + # - local: 'benchmarks/backgrounds/config.yml' + # - local: 'benchmarks/backwards_ecal/config.yml' + # - local: 'benchmarks/calo_pid/config.yml' + # - local: 'benchmarks/ecal_gaps/config.yml' + # - local: 'benchmarks/tracking_detectors/config.yml' + # - local: 'benchmarks/tracking_performances/config.yml' + # - local: 'benchmarks/tracking_performances_dis/config.yml' + # - local: 'benchmarks/barrel_ecal/config.yml' + # - local: 'benchmarks/barrel_hcal/config.yml' + # - local: 'benchmarks/lfhcal/config.yml' + # - local: 'benchmarks/zdc/config.yml' + # - local: 'benchmarks/zdc_lyso/config.yml' + # - local: 'benchmarks/zdc_neutron/config.yml' + # - local: 'benchmarks/zdc_photon/config.yml' + # - local: 'benchmarks/zdc_pi0/config.yml' + # - local: 'benchmarks/material_scan/config.yml' + # - local: 'benchmarks/pid/config.yml' + # - local: 'benchmarks/timing/config.yml' + # - local: 'benchmarks/b0_tracker/config.yml' + # - local: 'benchmarks/insert_muon/config.yml' + # - local: 'benchmarks/insert_tau/config.yml' + # - local: 'benchmarks/zdc_sigma/config.yml' + # - local: 'benchmarks/zdc_lambda/config.yml' + # - local: 'benchmarks/insert_neutron/config.yml' + # - local: 'benchmarks/femc_electron/config.yml' + # - local: 'benchmarks/femc_photon/config.yml' + # - local: 'benchmarks/femc_pi0/config.yml' + - local: 'benchmarks/lowq2/reconstruction_training/config.yml' deploy_results: allow_failure: true stage: deploy needs: - - "collect_results:backgrounds" - - "collect_results:backwards_ecal" - - "collect_results:barrel_ecal" - - "collect_results:barrel_hcal" - - "collect_results:calo_pid" - - "collect_results:ecal_gaps" - - "collect_results:lfhcal" - - "collect_results:material_scan" - - "collect_results:pid" - - "collect_results:tracking_performance" - - "collect_results:tracking_performance_campaigns" - - "collect_results:zdc_sigma" - - "collect_results:zdc_lambda" - - "collect_results:insert_neutron" - - "collect_results:tracking_performances_dis" - - "collect_results:zdc" - - "collect_results:zdc_lyso" - - "collect_results:zdc_neutron" - - "collect_results:insert_muon" - - "collect_results:insert_tau" - - "collect_results:zdc_photon" - - "collect_results:zdc_pi0" - - "collect_results:femc_electron" - - "collect_results:femc_photon" - - "collect_results:femc_pi0" + # - "collect_results:backgrounds" + # - "collect_results:backwards_ecal" + # - "collect_results:barrel_ecal" + # - "collect_results:barrel_hcal" + # - "collect_results:calo_pid" + # - "collect_results:ecal_gaps" + # - "collect_results:lfhcal" + # - "collect_results:material_scan" + # - "collect_results:pid" + # - "collect_results:tracking_performance" + # - "collect_results:tracking_performance_campaigns" + # - "collect_results:zdc_sigma" + # - "collect_results:zdc_lambda" + # - "collect_results:insert_neutron" + # - "collect_results:tracking_performances_dis" + # - "collect_results:zdc" + # - "collect_results:zdc_lyso" + # - "collect_results:zdc_neutron" + # - "collect_results:insert_muon" + # - "collect_results:insert_tau" + # - "collect_results:zdc_photon" + # - "collect_results:zdc_pi0" + # - "collect_results:femc_electron" + # - "collect_results:femc_photon" + # - "collect_results:femc_pi0" + - "collect_results:lowq2_reconstruction_training" script: - snakemake $SNAKEMAKE_FLAGS --cores 1 results/metadata.json - find results -print | sort | tee summary.txt diff --git a/benchmarks/lowq2/reconstruction_training/Snakefile b/benchmarks/lowq2/reconstruction_training/Snakefile index 1792b772..fcabdf44 100644 --- a/benchmarks/lowq2/reconstruction_training/Snakefile +++ b/benchmarks/lowq2/reconstruction_training/Snakefile @@ -1,11 +1,17 @@ +import os + +# Get the LOWQ2_DATADIR environment variable, or use a default value if it is not set +outputdir = os.getenv("LOWQ2_DATADIR", "") +resultsdir = os.getenv("LOWQ2_RESULTSDIR", "") + # Creates or copies the feature and target tensors for the lowq2 reconstruction training into a new file. rule lowq2_tensor_recon: output: - "tensors.eicrecon.tree.edm4eic.root", + f"{outputdir}tensors.eicrecon.tree.edm4eic.root", log: - "tensors.eicrecon.tree.edm4eic.root.log", - params: - input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,3), number=range(0, 20)), + f"{outputdir}tensors.eicrecon.tree.edm4eic.root.log", + params: + input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,2), number=range(0, 1)), compact_xml="epic_lowq2.xml", shell: """ @@ -18,10 +24,10 @@ rule lowq2_tensor_recon: # Trains a regression model to predict the TaggerTrackerTargetTensor from the TaggerTrackerFeatureTensor. rule lowq2_reconstruction_training: input: - data="tensors.eicrecon.tree.edm4eic.root", + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", script="TaggerRegression.py", output: - "TestTaggerRegression.onnx", + f"{outputdir}TestTaggerTrackerTransportation.onnx", shell: """ python {input.script} --dataFiles {input.data} --outModelFile {output} @@ -30,9 +36,9 @@ rule lowq2_reconstruction_training: # Runs the inference with the default model. rule lowq2_reconstruction_inference: input: - data="tensors.eicrecon.tree.edm4eic.root", + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", output: - "TaggerInference.eicrecon.tree.edm4eic.root", + f"{outputdir}TaggerInference.eicrecon.tree.edm4eic.root", params: compact_xml="epic_lowq2.xml", shell: @@ -43,13 +49,13 @@ rule lowq2_reconstruction_inference: -Pdd4hep:xml_files={params.compact_xml} \ """ -# Runs the inference with the new model. +# Check inference runs with the new model. rule lowq2_reconstruction_new_inference: input: - data="tensors.eicrecon.tree.edm4eic.root", - model="TestTaggerRegression.onnx", + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", + model=f"{outputdir}TestTaggerTrackerTransportation.onnx", output: - "TaggerInferenceNew.eicrecon.tree.edm4eic.root", + f"{outputdir}TaggerInferenceNew.eicrecon.tree.edm4eic.root", params: compact_xml="epic_lowq2.xml", shell: @@ -60,4 +66,30 @@ rule lowq2_reconstruction_new_inference: -Pdd4hep:xml_files={params.compact_xml} \ -PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model} \ -Peicrecon:LogLevel=error \ + """ + +# Create plots showing the performance of a model. +rule lowq2_reconstruction_new_plot: + input: + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", + model=lambda wildcards: f"{outputdir}{wildcards.model}.onnx", + output: + directory(expand(f"{resultsdir}{{model}}", model="{model}")), + shell: + """ + mkdir -p {output} + python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output} + """ + +# Create plots showing the performance of a model. +rule lowq2_reconstruction_old_plot: + input: + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", + model=lambda wildcards: f"calibrations/onnx/{wildcards.model}.onnx", + output: + directory(expand(f"{resultsdir}{{model}}", model="{model}")), + shell: + """ + mkdir -p {output} + python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output} """ \ No newline at end of file diff --git a/benchmarks/lowq2/reconstruction_training/TaggerRegression.py b/benchmarks/lowq2/reconstruction_training/TaggerRegression.py index e0575126..e3d36ee1 100644 --- a/benchmarks/lowq2/reconstruction_training/TaggerRegression.py +++ b/benchmarks/lowq2/reconstruction_training/TaggerRegression.py @@ -38,7 +38,7 @@ print(f"Training data: {len(train_input_data)} samples") -epochs = 100 +epochs = 1000 model = trainModel(epochs, train_loader, val_loader) # Save the trained model to ONNX format diff --git a/benchmarks/lowq2/reconstruction_training/TestModel.py b/benchmarks/lowq2/reconstruction_training/TestModel.py new file mode 100644 index 00000000..dfd95931 --- /dev/null +++ b/benchmarks/lowq2/reconstruction_training/TestModel.py @@ -0,0 +1,211 @@ +import onnxruntime as ort +import argparse +import numpy as np +from ProcessData import create_arrays +import matplotlib.pyplot as plt +from matplotlib.colors import LogNorm +from scipy.stats import norm +from scipy.optimize import curve_fit + +# Parse arguments +parser = argparse.ArgumentParser(description='Train a regression model for the Tagger.') +parser.add_argument('--modelFile', type=str, default="regression_model.onnx", help='Path to the ONNX model file') +parser.add_argument('--dataFiles', type=str, nargs='+', help='Path to the data files') +parser.add_argument('--outDir', type=str, default=".", help='Output directory') +args = parser.parse_args() +modelFile = args.modelFile +dataFiles = args.dataFiles +outDir = args.outDir +outGraphFile = outDir + "/output_vs_target.png" +outGraphFile2 = outDir + "/output_vs_target2.png" +outGraphFile3 = outDir + "/output_vs_target3.png" +outGraphFile4 = outDir + "/output_vs_target4.png" + +input_data, target_data = create_arrays(dataFiles) + +target_data = np.array(target_data) + +# Load the ONNX model +session = ort.InferenceSession(modelFile) + +# Run the model on the input data +input_name = session.get_inputs()[0].name +output_name = session.get_outputs()[0].name +input_data = np.array(input_data,dtype=np.float32) +output = session.run([output_name], {input_name: input_data}) +output = np.array(output[0]) + +out_theta = np.arctan2(np.sqrt(output[:,0]**2 + output[:,1]**2),output[:,2]) +out_phi = np.arctan2(output[:,1],output[:,0]) +out_mag = np.sqrt(output[:,0]**2 + output[:,1]**2 + output[:,2]**2) +in_theta = np.arctan2(np.sqrt(target_data[:,0]**2 + target_data[:,1]**2),target_data[:,2]) +in_phi = np.arctan2(target_data[:,1],target_data[:,0]) +in_mag = np.sqrt(target_data[:,0]**2 + target_data[:,1]**2 + target_data[:,2]**2) + + +thetadiff = out_theta - in_theta +phidiff = out_phi - in_phi +# Move phidiff to within -pi/2 and pi/2 +phidiff = (phidiff + np.pi) % (2 * np.pi) - np.pi +magdiff = out_mag - in_mag + +diff = (target_data - output)/target_data +diffrange = [[-5,5],[-5,5],[-0.5,0.5]] +datarange = [[-0.02,0.02],[-0.02,0.02],[-1,0]] + +# Use the 'seismic' colormap +cmap = plt.get_cmap('seismic') + +# Creates histograms to compare the target and output data +fig, axs = plt.subplots(3, 3, figsize=(12, 12)) +for i in range(3): + # 2D histograms showing trends in the data + axs[0,i].hist2d(target_data[:,i], output[:,i], bins=100, range=[datarange[i],datarange[i]], cmap="seismic", norm=LogNorm(), label="Output vs Target") + axs[0,i].set_xlabel(f"Variable {i} Target") + axs[0,i].set_ylabel(f"Variable {i} Output") + + axs[1,i].hist(diff[:,i], bins=100, alpha=0.5, range=diffrange[i], label="Difference") + axs[1,i].set_xlabel(f"Variable {i} Difference") + axs[1,i].set_ylabel("Counts") + + axs[2,i].hist2d(target_data[:,i], diff[:,i], bins=100, range=[datarange[i],diffrange[i]], cmap="seismic", norm=LogNorm(), label="Difference vs Target") + axs[2,i].set_xlabel(f"Variable {i} Target") + axs[2,i].set_ylabel(f"Variable {i} Difference") + +plt.show() +plt.savefig(outGraphFile) + +# Creates histograms to compare theta, phi and mag target and output data +fig2, axs2 = plt.subplots(3, 3, figsize=(12, 12)) + +thetarange = [np.pi-0.01,np.pi] +phirange = [-np.pi,np.pi] +magrange = [0,1] + +thetadiffrange = [-0.01,0.01] +phidiffrange = [-np.pi,np.pi] +magdiffrange = [-0.1,0.1] + +# 2D histograms showing trends in the data +axs2[0,0].hist2d(out_theta, in_theta, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs2[0,0].set_xlabel("Theta Target") +axs2[0,0].set_ylabel("Theta Output") + +axs2[0,1].hist2d(out_phi, in_phi, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target") +axs2[0,1].set_xlabel("Phi Target") +axs2[0,1].set_ylabel("Phi Output") + +axs2[0,2].hist2d(out_mag, in_mag, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs2[0,2].set_xlabel("Mag Target") +axs2[0,2].set_ylabel("Mag Output") + +axs2[1,0].hist(thetadiff, bins=100, alpha=0.5, range=thetadiffrange, label="Difference") +axs2[1,0].set_xlabel("Theta Difference") +axs2[1,0].set_ylabel("Counts") + +axs2[1,1].hist(phidiff, bins=100, alpha=0.5, range=phidiffrange, label="Difference") +axs2[1,1].set_xlabel("Phi Difference") +axs2[1,1].set_ylabel("Counts") + +axs2[1,2].hist(magdiff, bins=100, alpha=0.5, range=magdiffrange, label="Difference") +axs2[1,2].set_xlabel("Mag Difference") +axs2[1,2].set_ylabel("Counts") + +axs2[2,0].hist2d(in_theta, thetadiff, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs2[2,0].set_xlabel("Theta Target") +axs2[2,0].set_ylabel("Theta Difference") + +axs2[2,1].hist2d(in_phi, phidiff, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target") +axs2[2,1].set_xlabel("Phi Target") +axs2[2,1].set_ylabel("Phi Difference") + +axs2[2,2].hist2d(in_mag, magdiff, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs2[2,2].set_xlabel("Mag Target") +axs2[2,2].set_ylabel("Mag Difference") + +plt.show() +plt.savefig(outGraphFile2) + +# Create histograms where the theta value has been cut at less than 3.14 +fig3, axs3 = plt.subplots(3, 3, figsize=(12, 12)) + +out_theta_cut = out_theta[out_theta < 3.14] +in_theta_cut = in_theta[out_theta < 3.14] +thetadiff_cut = thetadiff[out_theta < 3.14] + +out_phi_cut = out_phi[out_theta < 3.14] +in_phi_cut = in_phi[out_theta < 3.14] +phidiff_cut = phidiff[out_theta < 3.14] + +out_mag_cut = out_mag[out_theta < 3.14] +in_mag_cut = in_mag[out_theta < 3.14] +magdiff_cut = magdiff[out_theta < 3.14] + +axs3[0,0].hist2d(out_theta_cut, in_theta_cut, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs3[0,0].set_xlabel("Theta Target") +axs3[0,0].set_ylabel("Theta Output") + +axs3[0,1].hist2d(out_phi_cut, in_phi_cut, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target") +axs3[0,1].set_xlabel("Phi Target") +axs3[0,1].set_ylabel("Phi Output") + +axs3[0,2].hist2d(out_mag_cut, in_mag_cut, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs3[0,2].set_xlabel("Mag Target") +axs3[0,2].set_ylabel("Mag Output") + +axs3[1,0].hist(thetadiff_cut, bins=100, alpha=0.5, range=thetadiffrange, label="Difference") +axs3[1,0].set_xlabel("Theta Difference") +axs3[1,0].set_ylabel("Counts") + +axs3[1,1].hist(phidiff_cut, bins=100, alpha=0.5, range=phidiffrange, label="Difference") +axs3[1,1].set_xlabel("Phi Difference") +axs3[1,1].set_ylabel("Counts") + +axs3[1,2].hist(magdiff_cut, bins=100, alpha=0.5, range=magdiffrange, label="Difference") +axs3[1,2].set_xlabel("Mag Difference") +axs3[1,2].set_ylabel("Counts") + +axs3[2,0].hist2d(in_theta_cut, thetadiff_cut, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs3[2,0].set_xlabel("Theta Target") +axs3[2,0].set_ylabel("Theta Difference") + +axs3[2,1].hist2d(in_phi_cut, phidiff_cut, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target") +axs3[2,1].set_xlabel("Phi Target") +axs3[2,1].set_ylabel("Phi Difference") + +axs3[2,2].hist2d(in_mag_cut, magdiff_cut, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs3[2,2].set_xlabel("Mag Target") +axs3[2,2].set_ylabel("Mag Difference") + +plt.show() +plt.savefig(outGraphFile3) + +# Create plots where a Gaussian has been fitted to the data +# Function to fit a Gaussian and plot the results +def plot_gaussian_fit(ax, data, range, xlabel, ylabel): + def gaussian(x, mu, sigma, amplitude): + return amplitude * np.exp(-0.5 * ((x - mu) / sigma) ** 2) + + hist, bin_edges = np.histogram(data, bins=100, range=range, density=True) + bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 + + popt, _ = curve_fit(gaussian, bin_centers, hist, p0=[0, 0.01, 1]) + mu, sigma, amplitude = popt + print(f"mu={mu}, sigma={sigma}, amplitude={amplitude}") + + x = np.linspace(range[0], range[1], 100) + ax.plot(x, gaussian(x, *popt), 'k', linewidth=2) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.hist(data, bins=100, alpha=0.5, range=range, edgecolor='black', density=True) + ax.legend([f'Fit: $\mu$={mu:.5f}, $\sigma$={sigma:.5f}']) + +# Create histograms with Gaussian fits +fig4, axs4 = plt.subplots(3, 1, figsize=(8, 12)) + +plot_gaussian_fit(axs4[0], thetadiff, thetadiffrange, "Theta Difference", "Density") +plot_gaussian_fit(axs4[1], phidiff_cut, phidiffrange, "Phi Difference", "Density") +plot_gaussian_fit(axs4[2], magdiff, magdiffrange, "Mag Difference", "Density") + +plt.show() +plt.savefig(outGraphFile4) \ No newline at end of file From 2473f796db611544bd9148eed24af768ba356547 Mon Sep 17 00:00:00 2001 From: simonge Date: Thu, 9 Jan 2025 18:33:15 +0000 Subject: [PATCH 7/7] Add missing config.yml --- .../lowq2/reconstruction_training/config.yml | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 benchmarks/lowq2/reconstruction_training/config.yml diff --git a/benchmarks/lowq2/reconstruction_training/config.yml b/benchmarks/lowq2/reconstruction_training/config.yml new file mode 100644 index 00000000..aa58950b --- /dev/null +++ b/benchmarks/lowq2/reconstruction_training/config.yml @@ -0,0 +1,22 @@ +bench:lowq2_reconstruction_training: + allow_failure: true # until inference merged into EICrecon + extends: .det_benchmark + stage: benchmarks + script: + - export LOWQ2_DATADIR=data/lowq2/reconstruction_training/ + - export LOWQ2_RESULTSDIR=results/lowq2/reconstruction_training/ + - mkdir -p $LOWQ2_DATADIR + - mkdir -p $LOWQ2_RESULTSDIR + - snakemake $SNAKEMAKE_FLAGS --cores 5 ${LOWQ2_RESULTSDIR}TestTaggerTrackerTransportation ${LOWQ2_RESULTSDIR}TaggerTrackerTransportation + +collect_results:lowq2_reconstruction_training: + allow_failure: true # until inference merged into EICrecon + extends: .det_benchmark + stage: collect + needs: + - "bench:lowq2_reconstruction_training" + script: + - ls -lrht + - mv results{,_save}/ # move results directory out of the way to preserve it + - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output ${LOWQ2_RESULTSDIR}TestTaggerTrackerTransportation ${LOWQ2_RESULTSDIR}TaggerTrackerTransportation + - mv results{_save,}/