diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f5f7b7c5..1a6202d9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -125,62 +125,64 @@ get_data: - runner_system_failure include: - - local: 'benchmarks/backgrounds/config.yml' - - local: 'benchmarks/backwards_ecal/config.yml' - - local: 'benchmarks/calo_pid/config.yml' - - local: 'benchmarks/ecal_gaps/config.yml' - - local: 'benchmarks/tracking_detectors/config.yml' - - local: 'benchmarks/tracking_performances/config.yml' - - local: 'benchmarks/tracking_performances_dis/config.yml' - - local: 'benchmarks/barrel_ecal/config.yml' - - local: 'benchmarks/barrel_hcal/config.yml' - - local: 'benchmarks/lfhcal/config.yml' - - local: 'benchmarks/zdc/config.yml' - - local: 'benchmarks/zdc_lyso/config.yml' - - local: 'benchmarks/zdc_neutron/config.yml' - - local: 'benchmarks/zdc_photon/config.yml' - - local: 'benchmarks/zdc_pi0/config.yml' - - local: 'benchmarks/material_scan/config.yml' - - local: 'benchmarks/pid/config.yml' - - local: 'benchmarks/timing/config.yml' - - local: 'benchmarks/b0_tracker/config.yml' - - local: 'benchmarks/insert_muon/config.yml' - - local: 'benchmarks/insert_tau/config.yml' - - local: 'benchmarks/zdc_sigma/config.yml' - - local: 'benchmarks/zdc_lambda/config.yml' - - local: 'benchmarks/insert_neutron/config.yml' - - local: 'benchmarks/femc_electron/config.yml' - - local: 'benchmarks/femc_photon/config.yml' - - local: 'benchmarks/femc_pi0/config.yml' + # - local: 'benchmarks/backgrounds/config.yml' + # - local: 'benchmarks/backwards_ecal/config.yml' + # - local: 'benchmarks/calo_pid/config.yml' + # - local: 'benchmarks/ecal_gaps/config.yml' + # - local: 'benchmarks/tracking_detectors/config.yml' + # - local: 'benchmarks/tracking_performances/config.yml' + # - local: 'benchmarks/tracking_performances_dis/config.yml' + # - local: 'benchmarks/barrel_ecal/config.yml' + # - local: 'benchmarks/barrel_hcal/config.yml' + # - local: 'benchmarks/lfhcal/config.yml' + # - local: 'benchmarks/zdc/config.yml' + # - local: 'benchmarks/zdc_lyso/config.yml' + # - local: 'benchmarks/zdc_neutron/config.yml' + # - local: 'benchmarks/zdc_photon/config.yml' + # - local: 'benchmarks/zdc_pi0/config.yml' + # - local: 'benchmarks/material_scan/config.yml' + # - local: 'benchmarks/pid/config.yml' + # - local: 'benchmarks/timing/config.yml' + # - local: 'benchmarks/b0_tracker/config.yml' + # - local: 'benchmarks/insert_muon/config.yml' + # - local: 'benchmarks/insert_tau/config.yml' + # - local: 'benchmarks/zdc_sigma/config.yml' + # - local: 'benchmarks/zdc_lambda/config.yml' + # - local: 'benchmarks/insert_neutron/config.yml' + # - local: 'benchmarks/femc_electron/config.yml' + # - local: 'benchmarks/femc_photon/config.yml' + # - local: 'benchmarks/femc_pi0/config.yml' + - local: 'benchmarks/lowq2/reconstruction_training/config.yml' deploy_results: allow_failure: true stage: deploy needs: - - "collect_results:backgrounds" - - "collect_results:backwards_ecal" - - "collect_results:barrel_ecal" - - "collect_results:barrel_hcal" - - "collect_results:calo_pid" - - "collect_results:ecal_gaps" - - "collect_results:lfhcal" - - "collect_results:material_scan" - - "collect_results:pid" - - "collect_results:tracking_performance" - - "collect_results:tracking_performance_campaigns" - - "collect_results:zdc_sigma" - - "collect_results:zdc_lambda" - - "collect_results:insert_neutron" - - "collect_results:tracking_performances_dis" - - "collect_results:zdc" - - "collect_results:zdc_lyso" - - "collect_results:zdc_neutron" - - "collect_results:insert_muon" - - "collect_results:insert_tau" - - "collect_results:zdc_photon" - - "collect_results:zdc_pi0" - - "collect_results:femc_electron" - - "collect_results:femc_photon" - - "collect_results:femc_pi0" + # - "collect_results:backgrounds" + # - "collect_results:backwards_ecal" + # - "collect_results:barrel_ecal" + # - "collect_results:barrel_hcal" + # - "collect_results:calo_pid" + # - "collect_results:ecal_gaps" + # - "collect_results:lfhcal" + # - "collect_results:material_scan" + # - "collect_results:pid" + # - "collect_results:tracking_performance" + # - "collect_results:tracking_performance_campaigns" + # - "collect_results:zdc_sigma" + # - "collect_results:zdc_lambda" + # - "collect_results:insert_neutron" + # - "collect_results:tracking_performances_dis" + # - "collect_results:zdc" + # - "collect_results:zdc_lyso" + # - "collect_results:zdc_neutron" + # - "collect_results:insert_muon" + # - "collect_results:insert_tau" + # - "collect_results:zdc_photon" + # - "collect_results:zdc_pi0" + # - "collect_results:femc_electron" + # - "collect_results:femc_photon" + # - "collect_results:femc_pi0" + - "collect_results:lowq2_reconstruction_training" script: - snakemake $SNAKEMAKE_FLAGS --cores 1 results/metadata.json - find results -print | sort | tee summary.txt diff --git a/Snakefile b/Snakefile index 14e28f0e..d6f21d01 100644 --- a/Snakefile +++ b/Snakefile @@ -30,27 +30,28 @@ def find_epic_libraries(): return libs -include: "benchmarks/backgrounds/Snakefile" -include: "benchmarks/backwards_ecal/Snakefile" -include: "benchmarks/barrel_ecal/Snakefile" -include: "benchmarks/calo_pid/Snakefile" -include: "benchmarks/ecal_gaps/Snakefile" -include: "benchmarks/material_scan/Snakefile" -include: "benchmarks/tracking_performances/Snakefile" -include: "benchmarks/tracking_performances_dis/Snakefile" -include: "benchmarks/lfhcal/Snakefile" -include: "benchmarks/zdc_lyso/Snakefile" -include: "benchmarks/zdc_neutron/Snakefile" -include: "benchmarks/insert_muon/Snakefile" -include: "benchmarks/zdc_lambda/Snakefile" -include: "benchmarks/zdc_photon/Snakefile" -include: "benchmarks/zdc_pi0/Snakefile" -include: "benchmarks/zdc_sigma/Snakefile" -include: "benchmarks/insert_neutron/Snakefile" -include: "benchmarks/insert_tau/Snakefile" -include: "benchmarks/femc_electron/Snakefile" -include: "benchmarks/femc_photon/Snakefile" -include: "benchmarks/femc_pi0/Snakefile" +# include: "benchmarks/backgrounds/Snakefile" +# include: "benchmarks/backwards_ecal/Snakefile" +# include: "benchmarks/barrel_ecal/Snakefile" +# include: "benchmarks/calo_pid/Snakefile" +# include: "benchmarks/ecal_gaps/Snakefile" +# include: "benchmarks/material_scan/Snakefile" +# include: "benchmarks/tracking_performances/Snakefile" +# include: "benchmarks/tracking_performances_dis/Snakefile" +# include: "benchmarks/lfhcal/Snakefile" +# include: "benchmarks/zdc_lyso/Snakefile" +# include: "benchmarks/zdc_neutron/Snakefile" +# include: "benchmarks/insert_muon/Snakefile" +# include: "benchmarks/zdc_lambda/Snakefile" +# include: "benchmarks/zdc_photon/Snakefile" +# include: "benchmarks/zdc_pi0/Snakefile" +# include: "benchmarks/zdc_sigma/Snakefile" +# include: "benchmarks/insert_neutron/Snakefile" +# include: "benchmarks/insert_tau/Snakefile" +# include: "benchmarks/femc_electron/Snakefile" +# include: "benchmarks/femc_photon/Snakefile" +# include: "benchmarks/femc_pi0/Snakefile" +include: "benchmarks/lowq2/reconstruction_training/Snakefile" use_s3 = config["remote_provider"].lower() == "s3" use_xrootd = config["remote_provider"].lower() == "xrootd" diff --git a/benchmarks/lowq2/reconstruction_training/ProcessData.py b/benchmarks/lowq2/reconstruction_training/ProcessData.py new file mode 100644 index 00000000..faf2dc10 --- /dev/null +++ b/benchmarks/lowq2/reconstruction_training/ProcessData.py @@ -0,0 +1,20 @@ +import uproot +import awkward as ak + +def create_arrays(dataFiles): + + # List of branches to load + branches = ["_TaggerTrackerFeatureTensor_shape","_TaggerTrackerFeatureTensor_floatData","_TaggerTrackerTargetTensor_floatData"] + + # Load data from concatenated list of files + data = uproot.concatenate([f"{file}:events" for file in dataFiles], branches, library="ak") + + # Filter events with at least one track + num_tracks = data["_TaggerTrackerFeatureTensor_shape"][:,0] + filtered_data = data[num_tracks == 1] + + input_data = filtered_data["_TaggerTrackerFeatureTensor_floatData"] + target_data = filtered_data["_TaggerTrackerTargetTensor_floatData"] + + return input_data, target_data + diff --git a/benchmarks/lowq2/reconstruction_training/RegressionModel.py b/benchmarks/lowq2/reconstruction_training/RegressionModel.py new file mode 100644 index 00000000..17e1031f --- /dev/null +++ b/benchmarks/lowq2/reconstruction_training/RegressionModel.py @@ -0,0 +1,103 @@ +import torch +import torch.nn as nn +import torch.optim as optim +import numpy as np + +class RegressionModel(nn.Module): + def __init__(self): + super(RegressionModel, self).__init__() + self.fc1 = nn.Linear(4, 512) + self.fc2 = nn.Linear(512, 64) + self.fc4 = nn.Linear(64, 3) + self.input_mean = torch.tensor([0.0, 0.0, 0.0, 0.0]) + self.input_std = torch.tensor([1.0, 1.0, 1.0, 1.0]) + # self.input_covariance = torch.tensor([[1.0, 0.0, 0.0, 0.0], + # [0.0, 1.0, 0.0, 0.0], + # [0.0, 0.0, 1.0, 0.0], + # [0.0, 0.0, 0.0, 1.0]]) + self.output_mean = torch.tensor([0.0, 0.0, 0.0]) + self.output_std = torch.tensor([1.0, 1.0, 1.0]) + # self.output_correlation = torch.tensor([[1.0, 0.0, 0.0], + # [0.0, 1.0, 0.0], + # [0.0, 0.0, 1.0]]) + + def forward(self, x): + x = (x-self.input_mean)/self.input_std + x = torch.tanh(self.fc1(x)) + x = torch.tanh(self.fc2(x)) + x = self.fc4(x) + x = x*self.output_std + self.output_mean + return x + + def adapt(self, input_data, output_data): + in_mean = input_data.mean(axis=0) + in_std = input_data.std (axis=0) + self.input_mean = torch.tensor(in_mean) + self.input_std = torch.tensor(in_std) + + # Calculate the correlation matrix of the input data + # input_normalized = (input_data-in_mean)/in_std + # input_correlation = np.corrcoef(input_normalized, rowvar=False) + # Invert the correlation matrix and convert into float tensor + # self.input_covariance = torch.tensor(np.linalg.inv(input_correlation).astype(np.float32)) + + self.output_mean = torch.tensor(output_data.mean(axis=0)) + self.output_std = torch.tensor(output_data.std (axis=0)) + +def makeModel(): + # Create the model + model = RegressionModel() + # Define the optimizer + optimizer = optim.Adam(model.parameters(), lr=0.0001) + # Define the loss function + criterion = nn.MSELoss() + + return model, optimizer, criterion + +def trainModel(epochs, train_loader, val_loader): + + # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + # print(f"Using device: {device}") + + model, optimizer, criterion = makeModel() + # model.to(device) + + # Verify that the model parameters are on the GPU + # for name, param in model.named_parameters(): + # print(f"{name} is on {param.device}") + + # Adapt the model using the training data from the training loader + model.adapt(train_loader.dataset.tensors[0].detach().numpy(), train_loader.dataset.tensors[1].detach().numpy()) + + for epoch in range(epochs): + model.train() + running_loss = 0.0 + for inputs, targets in train_loader: + # inputs, targets = inputs.to(device), targets.to(device) + optimizer.zero_grad() + outputs = model(inputs) + loss = criterion(outputs, targets) + loss.backward() + optimizer.step() + running_loss += loss.item() * inputs.size(0) + + epoch_loss = running_loss / len(train_loader.dataset) + # print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}") + + + # Validation step + model.eval() + val_loss = 0.0 + with torch.no_grad(): + for val_inputs, val_targets in val_loader: + # val_inputs, val_targets = val_inputs.to(device), val_targets.to(device) + val_outputs = model(val_inputs) + val_loss += criterion(val_outputs, val_targets).item() * val_inputs.size(0) + # val_outputs = model(val_input) + # val_loss = criterion(val_outputs, val_target) + + val_loss /= len(val_loader.dataset) + + print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss}, Val Loss: {val_loss}") + + return model \ No newline at end of file diff --git a/benchmarks/lowq2/reconstruction_training/Snakefile b/benchmarks/lowq2/reconstruction_training/Snakefile new file mode 100644 index 00000000..fcabdf44 --- /dev/null +++ b/benchmarks/lowq2/reconstruction_training/Snakefile @@ -0,0 +1,95 @@ +import os + +# Get the LOWQ2_DATADIR environment variable, or use a default value if it is not set +outputdir = os.getenv("LOWQ2_DATADIR", "") +resultsdir = os.getenv("LOWQ2_RESULTSDIR", "") + +# Creates or copies the feature and target tensors for the lowq2 reconstruction training into a new file. +rule lowq2_tensor_recon: + output: + f"{outputdir}tensors.eicrecon.tree.edm4eic.root", + log: + f"{outputdir}tensors.eicrecon.tree.edm4eic.root.log", + params: + input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,2), number=range(0, 1)), + compact_xml="epic_lowq2.xml", + shell: + """ + eicrecon {params.input} -Ppodio:output_file={output} \ + -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor \ + -Pplugins_to_ignore=janatop,LUMISPECCAL,ECTOF,BTOF,FOFFMTRK,RPOTS,B0TRK,MPGD,ECTRK,DRICH,DIRC,pid,tracking,acts,EEMC,BEMC,FEMC,EHCAL,BHCAL,FHCAL,B0ECAL,ZDC,BTRK,BVTX,PFRICH,richgeo,evaluator,pid_lut,reco,rootfile \ + -Pdd4hep:xml_files={params.compact_xml} \ + """ + +# Trains a regression model to predict the TaggerTrackerTargetTensor from the TaggerTrackerFeatureTensor. +rule lowq2_reconstruction_training: + input: + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", + script="TaggerRegression.py", + output: + f"{outputdir}TestTaggerTrackerTransportation.onnx", + shell: + """ + python {input.script} --dataFiles {input.data} --outModelFile {output} + """ + +# Runs the inference with the default model. +rule lowq2_reconstruction_inference: + input: + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", + output: + f"{outputdir}TaggerInference.eicrecon.tree.edm4eic.root", + params: + compact_xml="epic_lowq2.xml", + shell: + """ + eicrecon {input.data} -Ppodio:output_file={output} \ + -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor,TaggerTrackerPredictionTensor,TaggerTrackerReconstructedParticles \ + -Pplugins_to_ignore=janatop,LUMISPECCAL,ECTOF,BTOF,FOFFMTRK,RPOTS,B0TRK,MPGD,ECTRK,DRICH,DIRC,pid,tracking,acts,EEMC,BEMC,FEMC,EHCAL,BHCAL,FHCAL,B0ECAL,ZDC,BTRK,BVTX,PFRICH,richgeo,evaluator,pid_lut,reco,rootfile \ + -Pdd4hep:xml_files={params.compact_xml} \ + """ + +# Check inference runs with the new model. +rule lowq2_reconstruction_new_inference: + input: + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", + model=f"{outputdir}TestTaggerTrackerTransportation.onnx", + output: + f"{outputdir}TaggerInferenceNew.eicrecon.tree.edm4eic.root", + params: + compact_xml="epic_lowq2.xml", + shell: + """ + eicrecon {input.data} -Ppodio:output_file={output} \ + -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor,TaggerTrackerPredictionTensor,TaggerTrackerReconstructedParticles \ + -Pplugins_to_ignore=janatop,LUMISPECCAL,ECTOF,BTOF,FOFFMTRK,RPOTS,B0TRK,MPGD,ECTRK,DRICH,DIRC,pid,tracking,acts,EEMC,BEMC,FEMC,EHCAL,BHCAL,FHCAL,B0ECAL,ZDC,BTRK,BVTX,PFRICH,richgeo,evaluator,pid_lut,reco,rootfile \ + -Pdd4hep:xml_files={params.compact_xml} \ + -PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model} \ + -Peicrecon:LogLevel=error \ + """ + +# Create plots showing the performance of a model. +rule lowq2_reconstruction_new_plot: + input: + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", + model=lambda wildcards: f"{outputdir}{wildcards.model}.onnx", + output: + directory(expand(f"{resultsdir}{{model}}", model="{model}")), + shell: + """ + mkdir -p {output} + python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output} + """ + +# Create plots showing the performance of a model. +rule lowq2_reconstruction_old_plot: + input: + data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root", + model=lambda wildcards: f"calibrations/onnx/{wildcards.model}.onnx", + output: + directory(expand(f"{resultsdir}{{model}}", model="{model}")), + shell: + """ + mkdir -p {output} + python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output} + """ \ No newline at end of file diff --git a/benchmarks/lowq2/reconstruction_training/TaggerRegression.py b/benchmarks/lowq2/reconstruction_training/TaggerRegression.py new file mode 100644 index 00000000..e3d36ee1 --- /dev/null +++ b/benchmarks/lowq2/reconstruction_training/TaggerRegression.py @@ -0,0 +1,51 @@ +# import edm4hep +import torch +import argparse +from ProcessData import create_arrays +from torch.utils.data import DataLoader, TensorDataset +from RegressionModel import makeModel, trainModel + +# Parse arguments +parser = argparse.ArgumentParser(description='Train a regression model for the Tagger.') +parser.add_argument('--dataFiles', type=str, nargs='+', help='Path to the data files') +parser.add_argument('--outModelFile', type=str, default="regression_model.onnx", help='Output file for the trained model') +args = parser.parse_args() +dataFiles = args.dataFiles +outModelFile = args.outModelFile + +input_data, target_data = create_arrays(dataFiles) + +torch_input_data = torch.tensor(input_data) +torch_target_data = torch.tensor(target_data) + +# Split data into training and validation sets +validation_fraction = 0.25 +split_index = int(len(torch_input_data) * (1 - validation_fraction)) + +val_input_data = torch_input_data[split_index:] +val_target_data = torch_target_data[split_index:] +train_input_data = torch_input_data[:split_index] +train_target_data = torch_target_data[:split_index] + +# Create TensorDatasets +train_dataset = TensorDataset(train_input_data, train_target_data) +val_dataset = TensorDataset(val_input_data, val_target_data) +batch_size = 4096 + +# Create DataLoaders +train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True ) +val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) + +print(f"Training data: {len(train_input_data)} samples") + +epochs = 1000 +model = trainModel(epochs, train_loader, val_loader) + +# Save the trained model to ONNX format +dummy_input = torch_input_data[0].unsqueeze(0) # Create a dummy input for the model + +torch.onnx.export(model, dummy_input, outModelFile, + input_names=['input'], output_names=['output'], + dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}) + +print(f"Model has been saved to {outModelFile}") \ No newline at end of file diff --git a/benchmarks/lowq2/reconstruction_training/TestModel.py b/benchmarks/lowq2/reconstruction_training/TestModel.py new file mode 100644 index 00000000..dfd95931 --- /dev/null +++ b/benchmarks/lowq2/reconstruction_training/TestModel.py @@ -0,0 +1,211 @@ +import onnxruntime as ort +import argparse +import numpy as np +from ProcessData import create_arrays +import matplotlib.pyplot as plt +from matplotlib.colors import LogNorm +from scipy.stats import norm +from scipy.optimize import curve_fit + +# Parse arguments +parser = argparse.ArgumentParser(description='Train a regression model for the Tagger.') +parser.add_argument('--modelFile', type=str, default="regression_model.onnx", help='Path to the ONNX model file') +parser.add_argument('--dataFiles', type=str, nargs='+', help='Path to the data files') +parser.add_argument('--outDir', type=str, default=".", help='Output directory') +args = parser.parse_args() +modelFile = args.modelFile +dataFiles = args.dataFiles +outDir = args.outDir +outGraphFile = outDir + "/output_vs_target.png" +outGraphFile2 = outDir + "/output_vs_target2.png" +outGraphFile3 = outDir + "/output_vs_target3.png" +outGraphFile4 = outDir + "/output_vs_target4.png" + +input_data, target_data = create_arrays(dataFiles) + +target_data = np.array(target_data) + +# Load the ONNX model +session = ort.InferenceSession(modelFile) + +# Run the model on the input data +input_name = session.get_inputs()[0].name +output_name = session.get_outputs()[0].name +input_data = np.array(input_data,dtype=np.float32) +output = session.run([output_name], {input_name: input_data}) +output = np.array(output[0]) + +out_theta = np.arctan2(np.sqrt(output[:,0]**2 + output[:,1]**2),output[:,2]) +out_phi = np.arctan2(output[:,1],output[:,0]) +out_mag = np.sqrt(output[:,0]**2 + output[:,1]**2 + output[:,2]**2) +in_theta = np.arctan2(np.sqrt(target_data[:,0]**2 + target_data[:,1]**2),target_data[:,2]) +in_phi = np.arctan2(target_data[:,1],target_data[:,0]) +in_mag = np.sqrt(target_data[:,0]**2 + target_data[:,1]**2 + target_data[:,2]**2) + + +thetadiff = out_theta - in_theta +phidiff = out_phi - in_phi +# Move phidiff to within -pi/2 and pi/2 +phidiff = (phidiff + np.pi) % (2 * np.pi) - np.pi +magdiff = out_mag - in_mag + +diff = (target_data - output)/target_data +diffrange = [[-5,5],[-5,5],[-0.5,0.5]] +datarange = [[-0.02,0.02],[-0.02,0.02],[-1,0]] + +# Use the 'seismic' colormap +cmap = plt.get_cmap('seismic') + +# Creates histograms to compare the target and output data +fig, axs = plt.subplots(3, 3, figsize=(12, 12)) +for i in range(3): + # 2D histograms showing trends in the data + axs[0,i].hist2d(target_data[:,i], output[:,i], bins=100, range=[datarange[i],datarange[i]], cmap="seismic", norm=LogNorm(), label="Output vs Target") + axs[0,i].set_xlabel(f"Variable {i} Target") + axs[0,i].set_ylabel(f"Variable {i} Output") + + axs[1,i].hist(diff[:,i], bins=100, alpha=0.5, range=diffrange[i], label="Difference") + axs[1,i].set_xlabel(f"Variable {i} Difference") + axs[1,i].set_ylabel("Counts") + + axs[2,i].hist2d(target_data[:,i], diff[:,i], bins=100, range=[datarange[i],diffrange[i]], cmap="seismic", norm=LogNorm(), label="Difference vs Target") + axs[2,i].set_xlabel(f"Variable {i} Target") + axs[2,i].set_ylabel(f"Variable {i} Difference") + +plt.show() +plt.savefig(outGraphFile) + +# Creates histograms to compare theta, phi and mag target and output data +fig2, axs2 = plt.subplots(3, 3, figsize=(12, 12)) + +thetarange = [np.pi-0.01,np.pi] +phirange = [-np.pi,np.pi] +magrange = [0,1] + +thetadiffrange = [-0.01,0.01] +phidiffrange = [-np.pi,np.pi] +magdiffrange = [-0.1,0.1] + +# 2D histograms showing trends in the data +axs2[0,0].hist2d(out_theta, in_theta, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs2[0,0].set_xlabel("Theta Target") +axs2[0,0].set_ylabel("Theta Output") + +axs2[0,1].hist2d(out_phi, in_phi, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target") +axs2[0,1].set_xlabel("Phi Target") +axs2[0,1].set_ylabel("Phi Output") + +axs2[0,2].hist2d(out_mag, in_mag, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs2[0,2].set_xlabel("Mag Target") +axs2[0,2].set_ylabel("Mag Output") + +axs2[1,0].hist(thetadiff, bins=100, alpha=0.5, range=thetadiffrange, label="Difference") +axs2[1,0].set_xlabel("Theta Difference") +axs2[1,0].set_ylabel("Counts") + +axs2[1,1].hist(phidiff, bins=100, alpha=0.5, range=phidiffrange, label="Difference") +axs2[1,1].set_xlabel("Phi Difference") +axs2[1,1].set_ylabel("Counts") + +axs2[1,2].hist(magdiff, bins=100, alpha=0.5, range=magdiffrange, label="Difference") +axs2[1,2].set_xlabel("Mag Difference") +axs2[1,2].set_ylabel("Counts") + +axs2[2,0].hist2d(in_theta, thetadiff, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs2[2,0].set_xlabel("Theta Target") +axs2[2,0].set_ylabel("Theta Difference") + +axs2[2,1].hist2d(in_phi, phidiff, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target") +axs2[2,1].set_xlabel("Phi Target") +axs2[2,1].set_ylabel("Phi Difference") + +axs2[2,2].hist2d(in_mag, magdiff, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs2[2,2].set_xlabel("Mag Target") +axs2[2,2].set_ylabel("Mag Difference") + +plt.show() +plt.savefig(outGraphFile2) + +# Create histograms where the theta value has been cut at less than 3.14 +fig3, axs3 = plt.subplots(3, 3, figsize=(12, 12)) + +out_theta_cut = out_theta[out_theta < 3.14] +in_theta_cut = in_theta[out_theta < 3.14] +thetadiff_cut = thetadiff[out_theta < 3.14] + +out_phi_cut = out_phi[out_theta < 3.14] +in_phi_cut = in_phi[out_theta < 3.14] +phidiff_cut = phidiff[out_theta < 3.14] + +out_mag_cut = out_mag[out_theta < 3.14] +in_mag_cut = in_mag[out_theta < 3.14] +magdiff_cut = magdiff[out_theta < 3.14] + +axs3[0,0].hist2d(out_theta_cut, in_theta_cut, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs3[0,0].set_xlabel("Theta Target") +axs3[0,0].set_ylabel("Theta Output") + +axs3[0,1].hist2d(out_phi_cut, in_phi_cut, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target") +axs3[0,1].set_xlabel("Phi Target") +axs3[0,1].set_ylabel("Phi Output") + +axs3[0,2].hist2d(out_mag_cut, in_mag_cut, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target") +axs3[0,2].set_xlabel("Mag Target") +axs3[0,2].set_ylabel("Mag Output") + +axs3[1,0].hist(thetadiff_cut, bins=100, alpha=0.5, range=thetadiffrange, label="Difference") +axs3[1,0].set_xlabel("Theta Difference") +axs3[1,0].set_ylabel("Counts") + +axs3[1,1].hist(phidiff_cut, bins=100, alpha=0.5, range=phidiffrange, label="Difference") +axs3[1,1].set_xlabel("Phi Difference") +axs3[1,1].set_ylabel("Counts") + +axs3[1,2].hist(magdiff_cut, bins=100, alpha=0.5, range=magdiffrange, label="Difference") +axs3[1,2].set_xlabel("Mag Difference") +axs3[1,2].set_ylabel("Counts") + +axs3[2,0].hist2d(in_theta_cut, thetadiff_cut, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs3[2,0].set_xlabel("Theta Target") +axs3[2,0].set_ylabel("Theta Difference") + +axs3[2,1].hist2d(in_phi_cut, phidiff_cut, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target") +axs3[2,1].set_xlabel("Phi Target") +axs3[2,1].set_ylabel("Phi Difference") + +axs3[2,2].hist2d(in_mag_cut, magdiff_cut, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target") +axs3[2,2].set_xlabel("Mag Target") +axs3[2,2].set_ylabel("Mag Difference") + +plt.show() +plt.savefig(outGraphFile3) + +# Create plots where a Gaussian has been fitted to the data +# Function to fit a Gaussian and plot the results +def plot_gaussian_fit(ax, data, range, xlabel, ylabel): + def gaussian(x, mu, sigma, amplitude): + return amplitude * np.exp(-0.5 * ((x - mu) / sigma) ** 2) + + hist, bin_edges = np.histogram(data, bins=100, range=range, density=True) + bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 + + popt, _ = curve_fit(gaussian, bin_centers, hist, p0=[0, 0.01, 1]) + mu, sigma, amplitude = popt + print(f"mu={mu}, sigma={sigma}, amplitude={amplitude}") + + x = np.linspace(range[0], range[1], 100) + ax.plot(x, gaussian(x, *popt), 'k', linewidth=2) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.hist(data, bins=100, alpha=0.5, range=range, edgecolor='black', density=True) + ax.legend([f'Fit: $\mu$={mu:.5f}, $\sigma$={sigma:.5f}']) + +# Create histograms with Gaussian fits +fig4, axs4 = plt.subplots(3, 1, figsize=(8, 12)) + +plot_gaussian_fit(axs4[0], thetadiff, thetadiffrange, "Theta Difference", "Density") +plot_gaussian_fit(axs4[1], phidiff_cut, phidiffrange, "Phi Difference", "Density") +plot_gaussian_fit(axs4[2], magdiff, magdiffrange, "Mag Difference", "Density") + +plt.show() +plt.savefig(outGraphFile4) \ No newline at end of file diff --git a/benchmarks/lowq2/reconstruction_training/config.yml b/benchmarks/lowq2/reconstruction_training/config.yml new file mode 100644 index 00000000..aa58950b --- /dev/null +++ b/benchmarks/lowq2/reconstruction_training/config.yml @@ -0,0 +1,22 @@ +bench:lowq2_reconstruction_training: + allow_failure: true # until inference merged into EICrecon + extends: .det_benchmark + stage: benchmarks + script: + - export LOWQ2_DATADIR=data/lowq2/reconstruction_training/ + - export LOWQ2_RESULTSDIR=results/lowq2/reconstruction_training/ + - mkdir -p $LOWQ2_DATADIR + - mkdir -p $LOWQ2_RESULTSDIR + - snakemake $SNAKEMAKE_FLAGS --cores 5 ${LOWQ2_RESULTSDIR}TestTaggerTrackerTransportation ${LOWQ2_RESULTSDIR}TaggerTrackerTransportation + +collect_results:lowq2_reconstruction_training: + allow_failure: true # until inference merged into EICrecon + extends: .det_benchmark + stage: collect + needs: + - "bench:lowq2_reconstruction_training" + script: + - ls -lrht + - mv results{,_save}/ # move results directory out of the way to preserve it + - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output ${LOWQ2_RESULTSDIR}TestTaggerTrackerTransportation ${LOWQ2_RESULTSDIR}TaggerTrackerTransportation + - mv results{_save,}/