From 8d7b7e76f65c7ad60d7a950e2f1b8cb992eedd3e Mon Sep 17 00:00:00 2001
From: simonge <simon.gardner@glasgow.ac.uk>
Date: Wed, 8 Jan 2025 16:01:14 +0000
Subject: [PATCH 1/7] Added pytorch training scripts

---
 .../RegressionModel.py                        | 104 ++++++++++++++++++
 .../TaggerRegression.py                       |  50 +++++++++
 2 files changed, 154 insertions(+)
 create mode 100644 benchmarks/LOWQ2/reconstruction_training/RegressionModel.py
 create mode 100644 benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py

diff --git a/benchmarks/LOWQ2/reconstruction_training/RegressionModel.py b/benchmarks/LOWQ2/reconstruction_training/RegressionModel.py
new file mode 100644
index 00000000..589779ef
--- /dev/null
+++ b/benchmarks/LOWQ2/reconstruction_training/RegressionModel.py
@@ -0,0 +1,104 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+
+class RegressionModel(nn.Module):
+    def __init__(self):
+        super(RegressionModel, self).__init__()
+        self.fc1  = nn.Linear(4, 512)
+        self.fc2  = nn.Linear(512, 64)
+        self.fc4  = nn.Linear(64, 3)
+        self.input_mean       = torch.tensor([0.0, 0.0, 0.0, 0.0])
+        self.input_std        = torch.tensor([1.0, 1.0, 1.0, 1.0])
+        self.input_covariance = torch.tensor([[1.0, 0.0, 0.0, 0.0],
+                                              [0.0, 1.0, 0.0, 0.0],
+                                              [0.0, 0.0, 1.0, 0.0],
+                                              [0.0, 0.0, 0.0, 1.0]])
+        self.output_mean = torch.tensor([0.0, 0.0, 0.0])
+        self.output_std  = torch.tensor([1.0, 1.0, 1.0])
+        self.output_correlation = torch.tensor([[1.0, 0.0, 0.0],
+                                                [0.0, 1.0, 0.0],
+                                                [0.0, 0.0, 1.0]])
+
+    def forward(self, x):
+        x = (x-self.input_mean)/self.input_std
+        x = torch.tanh(self.fc1(x))
+        x = torch.tanh(self.fc2(x))
+        x = self.fc4(x)
+        x = x*self.output_std + self.output_mean
+        return x
+    
+    def adapt(self, input_data, output_data):
+        in_mean = input_data.mean(axis=0)
+        in_std  = input_data.std (axis=0)
+        self.input_mean  = torch.tensor(in_mean)
+        self.input_std   = torch.tensor(in_std)
+
+        # Calculate the correlation matrix of the input data
+        input_normalized  = (input_data-in_mean)/in_std   
+        input_correlation = np.corrcoef(input_normalized, rowvar=False)         
+        # Invert the correlation matrix and convert into float tensor
+        self.input_covariance = torch.tensor(np.linalg.inv(input_correlation).astype(np.float32))
+
+        self.output_mean = torch.tensor(output_data.mean(axis=0))
+        self.output_std  = torch.tensor(output_data.std (axis=0))
+
+def makeModel():
+    # Create the model
+    model = RegressionModel()
+    # Define the optimizer
+    optimizer = optim.Adam(model.parameters(), lr=0.0001)
+
+    # Define the loss function
+    criterion = nn.MSELoss()
+
+    return model, optimizer, criterion
+
+def trainModel(epochs, train_loader, val_loader):
+    
+    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # print(f"Using device: {device}")
+    
+    model, optimizer, criterion = makeModel()
+    # model.to(device)
+    
+    # Verify that the model parameters are on the GPU
+    # for name, param in model.named_parameters():
+    #     print(f"{name} is on {param.device}")
+
+    # Adapt the model using the training data from the training loader
+    model.adapt(train_loader.dataset.tensors[0].detach().numpy(), train_loader.dataset.tensors[1].detach().numpy())
+
+    for epoch in range(epochs):
+        model.train()
+        running_loss = 0.0
+        for inputs, targets in train_loader:
+            # inputs, targets = inputs.to(device), targets.to(device)
+            optimizer.zero_grad()
+            outputs = model(inputs)
+            loss = criterion(outputs, targets)
+            loss.backward()
+            optimizer.step()
+            running_loss += loss.item() * inputs.size(0)
+        
+        epoch_loss = running_loss / len(train_loader.dataset)
+        # print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}")
+
+        
+        # Validation step
+        model.eval()
+        val_loss = 0.0
+        with torch.no_grad():
+            for val_inputs, val_targets in val_loader:
+                # val_inputs, val_targets = val_inputs.to(device), val_targets.to(device)
+                val_outputs = model(val_inputs)
+                val_loss += criterion(val_outputs, val_targets).item() * val_inputs.size(0)
+            # val_outputs = model(val_input)
+            # val_loss = criterion(val_outputs, val_target)
+
+        val_loss /= len(val_loader.dataset)
+
+        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss}, Val Loss: {val_loss}")
+
+    return model
\ No newline at end of file
diff --git a/benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py b/benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py
new file mode 100644
index 00000000..6e7eb8a7
--- /dev/null
+++ b/benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py
@@ -0,0 +1,50 @@
+# import edm4hep
+import torch
+import argparse
+from ProcessData import create_arrays
+from torch.utils.data import DataLoader, TensorDataset
+from RegressionModel import makeModel, trainModel
+
+# Parse arguments
+parser = argparse.ArgumentParser(description='Train a regression model for the Tagger.')
+parser.add_argument('--dataFiles', type=str, nargs='+', help='Path to the data files')
+parser.add_argument('--outModelFile', type=str, default="regression_model.onnx", help='Output file for the trained model')
+args         = parser.parse_args()
+dataFiles    = args.dataFiles
+outModelFile = args.outModelFile
+
+input_data, target_data = create_arrays(dataFiles)
+
+torch_input_data = torch.tensor(input_data)
+torch_target_data = torch.tensor(target_data)
+
+# Split data into training and validation sets
+validation_fraction = 0.25
+split_index = int(len(torch_input_data) * (1 - validation_fraction))
+val_input_data = torch_input_data[split_index:]
+val_target_data = torch_target_data[split_index:]
+train_input_data = torch_input_data[:split_index]
+train_target_data = torch_target_data[:split_index] 
+
+# Create TensorDatasets
+train_dataset = TensorDataset(train_input_data, train_target_data)
+val_dataset   = TensorDataset(val_input_data, val_target_data)
+batch_size    = 4096
+
+# Create DataLoaders
+train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True )
+val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False)
+
+print(f"Training data: {len(train_input_data)} samples")
+
+epochs = 2000
+model = trainModel(epochs, train_loader, val_loader)
+
+# Save the trained model to ONNX format
+dummy_input = torch_input_data[0].unsqueeze(0)  # Create a dummy input for the model
+
+torch.onnx.export(model, dummy_input, outModelFile, 
+                  input_names=['input'], output_names=['output'],
+                  dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}})
+
+print(f"Model has been saved to {outModelFile}")
\ No newline at end of file

From 25a3706a1b86afa8c7b52c06a15d6ecb634e23da Mon Sep 17 00:00:00 2001
From: simonge <simon.gardner@glasgow.ac.uk>
Date: Wed, 8 Jan 2025 16:08:11 +0000
Subject: [PATCH 2/7] Update formatting

---
 .../RegressionModel.py                        | 21 +++++++++----------
 .../TaggerRegression.py                       | 13 ++++++------
 2 files changed, 17 insertions(+), 17 deletions(-)
 rename benchmarks/{LOWQ2 => lowq2}/reconstruction_training/RegressionModel.py (82%)
 rename benchmarks/{LOWQ2 => lowq2}/reconstruction_training/TaggerRegression.py (84%)

diff --git a/benchmarks/LOWQ2/reconstruction_training/RegressionModel.py b/benchmarks/lowq2/reconstruction_training/RegressionModel.py
similarity index 82%
rename from benchmarks/LOWQ2/reconstruction_training/RegressionModel.py
rename to benchmarks/lowq2/reconstruction_training/RegressionModel.py
index 589779ef..17e1031f 100644
--- a/benchmarks/LOWQ2/reconstruction_training/RegressionModel.py
+++ b/benchmarks/lowq2/reconstruction_training/RegressionModel.py
@@ -11,15 +11,15 @@ def __init__(self):
         self.fc4  = nn.Linear(64, 3)
         self.input_mean       = torch.tensor([0.0, 0.0, 0.0, 0.0])
         self.input_std        = torch.tensor([1.0, 1.0, 1.0, 1.0])
-        self.input_covariance = torch.tensor([[1.0, 0.0, 0.0, 0.0],
-                                              [0.0, 1.0, 0.0, 0.0],
-                                              [0.0, 0.0, 1.0, 0.0],
-                                              [0.0, 0.0, 0.0, 1.0]])
+        # self.input_covariance = torch.tensor([[1.0, 0.0, 0.0, 0.0],
+        #                                       [0.0, 1.0, 0.0, 0.0],
+        #                                       [0.0, 0.0, 1.0, 0.0],
+        #                                       [0.0, 0.0, 0.0, 1.0]])
         self.output_mean = torch.tensor([0.0, 0.0, 0.0])
         self.output_std  = torch.tensor([1.0, 1.0, 1.0])
-        self.output_correlation = torch.tensor([[1.0, 0.0, 0.0],
-                                                [0.0, 1.0, 0.0],
-                                                [0.0, 0.0, 1.0]])
+        # self.output_correlation = torch.tensor([[1.0, 0.0, 0.0],
+        #                                         [0.0, 1.0, 0.0],
+        #                                         [0.0, 0.0, 1.0]])
 
     def forward(self, x):
         x = (x-self.input_mean)/self.input_std
@@ -36,10 +36,10 @@ def adapt(self, input_data, output_data):
         self.input_std   = torch.tensor(in_std)
 
         # Calculate the correlation matrix of the input data
-        input_normalized  = (input_data-in_mean)/in_std   
-        input_correlation = np.corrcoef(input_normalized, rowvar=False)         
+        # input_normalized  = (input_data-in_mean)/in_std   
+        # input_correlation = np.corrcoef(input_normalized, rowvar=False)         
         # Invert the correlation matrix and convert into float tensor
-        self.input_covariance = torch.tensor(np.linalg.inv(input_correlation).astype(np.float32))
+        # self.input_covariance = torch.tensor(np.linalg.inv(input_correlation).astype(np.float32))
 
         self.output_mean = torch.tensor(output_data.mean(axis=0))
         self.output_std  = torch.tensor(output_data.std (axis=0))
@@ -49,7 +49,6 @@ def makeModel():
     model = RegressionModel()
     # Define the optimizer
     optimizer = optim.Adam(model.parameters(), lr=0.0001)
-
     # Define the loss function
     criterion = nn.MSELoss()
 
diff --git a/benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py b/benchmarks/lowq2/reconstruction_training/TaggerRegression.py
similarity index 84%
rename from benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py
rename to benchmarks/lowq2/reconstruction_training/TaggerRegression.py
index 6e7eb8a7..e0575126 100644
--- a/benchmarks/LOWQ2/reconstruction_training/TaggerRegression.py
+++ b/benchmarks/lowq2/reconstruction_training/TaggerRegression.py
@@ -20,10 +20,11 @@
 
 # Split data into training and validation sets
 validation_fraction = 0.25
-split_index = int(len(torch_input_data) * (1 - validation_fraction))
-val_input_data = torch_input_data[split_index:]
-val_target_data = torch_target_data[split_index:]
-train_input_data = torch_input_data[:split_index]
+split_index         = int(len(torch_input_data) * (1 - validation_fraction))
+
+val_input_data    = torch_input_data[split_index:]
+val_target_data   = torch_target_data[split_index:]
+train_input_data  = torch_input_data[:split_index]
 train_target_data = torch_target_data[:split_index] 
 
 # Create TensorDatasets
@@ -37,8 +38,8 @@
 
 print(f"Training data: {len(train_input_data)} samples")
 
-epochs = 2000
-model = trainModel(epochs, train_loader, val_loader)
+epochs = 100
+model  = trainModel(epochs, train_loader, val_loader)
 
 # Save the trained model to ONNX format
 dummy_input = torch_input_data[0].unsqueeze(0)  # Create a dummy input for the model

From 08117088c44a88e37c06f88320d4811e0228da43 Mon Sep 17 00:00:00 2001
From: simonge <simon.gardner@glasgow.ac.uk>
Date: Wed, 8 Jan 2025 16:33:28 +0000
Subject: [PATCH 3/7] Added (copied) Snakefile

---
 Snakefile                                     |  1 +
 .../lowq2/reconstruction_training/Snakefile   | 48 +++++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100644 benchmarks/lowq2/reconstruction_training/Snakefile

diff --git a/Snakefile b/Snakefile
index 14e28f0e..0cae3fb7 100644
--- a/Snakefile
+++ b/Snakefile
@@ -51,6 +51,7 @@ include: "benchmarks/insert_tau/Snakefile"
 include: "benchmarks/femc_electron/Snakefile"
 include: "benchmarks/femc_photon/Snakefile"
 include: "benchmarks/femc_pi0/Snakefile"
+include: "benchmarks/lowq2/reconstruction_training/Snakefile"
 
 use_s3 = config["remote_provider"].lower() == "s3"
 use_xrootd = config["remote_provider"].lower() == "xrootd"
diff --git a/benchmarks/lowq2/reconstruction_training/Snakefile b/benchmarks/lowq2/reconstruction_training/Snakefile
new file mode 100644
index 00000000..1e028329
--- /dev/null
+++ b/benchmarks/lowq2/reconstruction_training/Snakefile
@@ -0,0 +1,48 @@
+rule calo_pid_recon:
+    input:
+        "root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.08.1/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run6.ab.0001.eicrecon.tree.edm4eic.root",
+    output:
+        "sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root",
+    log:
+        "sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root.log",
+    wildcard_constraints:
+        INDEX="\d{4}",
+    shell: """
+set -m # monitor mode to prevent lingering processes
+exec env DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \
+  eicrecon {input} -Ppodio:output_file={output} \
+  -Ppodio:output_collections=MCParticles,EcalEndcapNRecHits,EcalEndcapNClusters,EcalEndcapNParticleIDInput_features,EcalEndcapNParticleIDTarget
+"""
+
+
+rule calo_pid:
+    input:
+        electrons=expand(
+            "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root",
+            PARTICLE=["e-"],
+            ENERGY=["100MeVto20GeV"],
+            PHASE_SPACE=["130to177deg"],
+            INDEX=range(100),
+        ),
+        pions=expand(
+            "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root",
+            PARTICLE=["pi-"],
+            ENERGY=["100MeVto20GeV"],
+            PHASE_SPACE=["130to177deg"],
+            INDEX=range(100),
+        ),
+        matplotlibrc=".matplotlibrc",
+        script="benchmarks/lowq2/reconstruction_training/TaggerRegression.py",
+    output:
+        directory("results/{DETECTOR_CONFIG}/calo_pid")
+    shell:
+        """
+env \
+MATPLOTLIBRC={input.matplotlibrc} \
+DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \
+PLOT_TITLE={wildcards.DETECTOR_CONFIG} \
+INPUT_ELECTRONS="{input.electrons}" \
+INPUT_PIONS="{input.pions}" \
+OUTPUT_DIR={output} \
+python {input.script}
+"""

From 15598be1c91017eb24aba9471de910a84a54dae7 Mon Sep 17 00:00:00 2001
From: simonge <simon.gardner@glasgow.ac.uk>
Date: Wed, 8 Jan 2025 17:27:30 +0000
Subject: [PATCH 4/7] Update to snakefiles

---
 Snakefile                                     | 42 +++++------
 .../lowq2/reconstruction_training/Snakefile   | 69 ++++++++-----------
 2 files changed, 50 insertions(+), 61 deletions(-)

diff --git a/Snakefile b/Snakefile
index 0cae3fb7..d6f21d01 100644
--- a/Snakefile
+++ b/Snakefile
@@ -30,27 +30,27 @@ def find_epic_libraries():
     return libs
 
 
-include: "benchmarks/backgrounds/Snakefile"
-include: "benchmarks/backwards_ecal/Snakefile"
-include: "benchmarks/barrel_ecal/Snakefile"
-include: "benchmarks/calo_pid/Snakefile"
-include: "benchmarks/ecal_gaps/Snakefile"
-include: "benchmarks/material_scan/Snakefile"
-include: "benchmarks/tracking_performances/Snakefile"
-include: "benchmarks/tracking_performances_dis/Snakefile"
-include: "benchmarks/lfhcal/Snakefile"
-include: "benchmarks/zdc_lyso/Snakefile"
-include: "benchmarks/zdc_neutron/Snakefile"
-include: "benchmarks/insert_muon/Snakefile"
-include: "benchmarks/zdc_lambda/Snakefile"
-include: "benchmarks/zdc_photon/Snakefile"
-include: "benchmarks/zdc_pi0/Snakefile"
-include: "benchmarks/zdc_sigma/Snakefile"
-include: "benchmarks/insert_neutron/Snakefile"
-include: "benchmarks/insert_tau/Snakefile"
-include: "benchmarks/femc_electron/Snakefile"
-include: "benchmarks/femc_photon/Snakefile"
-include: "benchmarks/femc_pi0/Snakefile"
+# include: "benchmarks/backgrounds/Snakefile"
+# include: "benchmarks/backwards_ecal/Snakefile"
+# include: "benchmarks/barrel_ecal/Snakefile"
+# include: "benchmarks/calo_pid/Snakefile"
+# include: "benchmarks/ecal_gaps/Snakefile"
+# include: "benchmarks/material_scan/Snakefile"
+# include: "benchmarks/tracking_performances/Snakefile"
+# include: "benchmarks/tracking_performances_dis/Snakefile"
+# include: "benchmarks/lfhcal/Snakefile"
+# include: "benchmarks/zdc_lyso/Snakefile"
+# include: "benchmarks/zdc_neutron/Snakefile"
+# include: "benchmarks/insert_muon/Snakefile"
+# include: "benchmarks/zdc_lambda/Snakefile"
+# include: "benchmarks/zdc_photon/Snakefile"
+# include: "benchmarks/zdc_pi0/Snakefile"
+# include: "benchmarks/zdc_sigma/Snakefile"
+# include: "benchmarks/insert_neutron/Snakefile"
+# include: "benchmarks/insert_tau/Snakefile"
+# include: "benchmarks/femc_electron/Snakefile"
+# include: "benchmarks/femc_photon/Snakefile"
+# include: "benchmarks/femc_pi0/Snakefile"
 include: "benchmarks/lowq2/reconstruction_training/Snakefile"
 
 use_s3 = config["remote_provider"].lower() == "s3"
diff --git a/benchmarks/lowq2/reconstruction_training/Snakefile b/benchmarks/lowq2/reconstruction_training/Snakefile
index 1e028329..62e00465 100644
--- a/benchmarks/lowq2/reconstruction_training/Snakefile
+++ b/benchmarks/lowq2/reconstruction_training/Snakefile
@@ -1,48 +1,37 @@
-rule calo_pid_recon:
-    input:
-        "root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.08.1/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run6.ab.0001.eicrecon.tree.edm4eic.root",
+rule lowq2_tensor_recon:
     output:
-        "sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root",
+        "tensors.eicrecon.tree.edm4eic.root",
     log:
-        "sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root.log",
-    wildcard_constraints:
-        INDEX="\d{4}",
-    shell: """
-set -m # monitor mode to prevent lingering processes
-exec env DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \
-  eicrecon {input} -Ppodio:output_file={output} \
-  -Ppodio:output_collections=MCParticles,EcalEndcapNRecHits,EcalEndcapNClusters,EcalEndcapNParticleIDInput_features,EcalEndcapNParticleIDTarget
-"""
+        "tensors.eicrecon.tree.edm4eic.root.log",
+    shell:  
+        """
+        eicrecon root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.08.1/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run6.ab.0001.eicrecon.tree.edm4eic.root -Ppodio:output_file={output} \
+        -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor \
+        -Pjana:nevents=1000
+        """
 
 
-rule calo_pid:
+rule lowq2_reconstruction_training:
+    input:
+        data="tensors.eicrecon.tree.edm4eic.root",
+        script="TaggerRegression.py",
+    output:
+        "TaggerRegressionTest.onnx",
+    shell:
+        """
+        python {input.script} --input {input.data} --outModelFile {output}
+        """
+
+rule lowq2_reconstruction_inference:
     input:
-        electrons=expand(
-            "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root",
-            PARTICLE=["e-"],
-            ENERGY=["100MeVto20GeV"],
-            PHASE_SPACE=["130to177deg"],
-            INDEX=range(100),
-        ),
-        pions=expand(
-            "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root",
-            PARTICLE=["pi-"],
-            ENERGY=["100MeVto20GeV"],
-            PHASE_SPACE=["130to177deg"],
-            INDEX=range(100),
-        ),
-        matplotlibrc=".matplotlibrc",
-        script="benchmarks/lowq2/reconstruction_training/TaggerRegression.py",
+        data="tensors.eicrecon.tree.edm4eic.root",
+        model="TaggerRegressionTest.onnx",
     output:
-        directory("results/{DETECTOR_CONFIG}/calo_pid")
+        "TaggerInference.eicrecon.tree.edm4eic.root",
     shell:
         """
-env \
-MATPLOTLIBRC={input.matplotlibrc} \
-DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \
-PLOT_TITLE={wildcards.DETECTOR_CONFIG} \
-INPUT_ELECTRONS="{input.electrons}" \
-INPUT_PIONS="{input.pions}" \
-OUTPUT_DIR={output} \
-python {input.script}
-"""
+        eicrecon {input.data} -Ppodio:output_file={output}
+        -Ptagger:inference_model={input.model} 
+        -Pjana:nevents=1000
+        -PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model}
+        """
\ No newline at end of file

From c5f076c88c1f622eaa774ecd94e781e8a7c49599 Mon Sep 17 00:00:00 2001
From: simonge <simon.gardner@glasgow.ac.uk>
Date: Thu, 9 Jan 2025 15:32:48 +0000
Subject: [PATCH 5/7] Updated to work with snakemake locally

---
 .../reconstruction_training/ProcessData.py    | 20 ++++++++
 .../lowq2/reconstruction_training/Snakefile   | 46 +++++++++++++++----
 2 files changed, 56 insertions(+), 10 deletions(-)
 create mode 100644 benchmarks/lowq2/reconstruction_training/ProcessData.py

diff --git a/benchmarks/lowq2/reconstruction_training/ProcessData.py b/benchmarks/lowq2/reconstruction_training/ProcessData.py
new file mode 100644
index 00000000..faf2dc10
--- /dev/null
+++ b/benchmarks/lowq2/reconstruction_training/ProcessData.py
@@ -0,0 +1,20 @@
+import uproot
+import awkward as ak
+
+def create_arrays(dataFiles):
+
+    # List of branches to load
+    branches = ["_TaggerTrackerFeatureTensor_shape","_TaggerTrackerFeatureTensor_floatData","_TaggerTrackerTargetTensor_floatData"]
+
+    # Load data from concatenated list of files
+    data = uproot.concatenate([f"{file}:events" for file in dataFiles], branches, library="ak")
+    
+    # Filter events with at least one track
+    num_tracks = data["_TaggerTrackerFeatureTensor_shape"][:,0]
+    filtered_data = data[num_tracks == 1]
+
+    input_data = filtered_data["_TaggerTrackerFeatureTensor_floatData"]
+    target_data = filtered_data["_TaggerTrackerTargetTensor_floatData"]
+
+    return input_data, target_data
+
diff --git a/benchmarks/lowq2/reconstruction_training/Snakefile b/benchmarks/lowq2/reconstruction_training/Snakefile
index 62e00465..1792b772 100644
--- a/benchmarks/lowq2/reconstruction_training/Snakefile
+++ b/benchmarks/lowq2/reconstruction_training/Snakefile
@@ -1,37 +1,63 @@
+# Creates or copies the feature and target tensors for the lowq2 reconstruction training into a new file.
 rule lowq2_tensor_recon:
     output:
         "tensors.eicrecon.tree.edm4eic.root",
     log:
         "tensors.eicrecon.tree.edm4eic.root.log",
+    params:        
+        input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,3), number=range(0, 20)),
+        compact_xml="epic_lowq2.xml",
     shell:  
         """
-        eicrecon root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.08.1/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run6.ab.0001.eicrecon.tree.edm4eic.root -Ppodio:output_file={output} \
+        eicrecon {params.input} -Ppodio:output_file={output} \
         -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor \
-        -Pjana:nevents=1000
+        -Pplugins_to_ignore=janatop,LUMISPECCAL,ECTOF,BTOF,FOFFMTRK,RPOTS,B0TRK,MPGD,ECTRK,DRICH,DIRC,pid,tracking,acts,EEMC,BEMC,FEMC,EHCAL,BHCAL,FHCAL,B0ECAL,ZDC,BTRK,BVTX,PFRICH,richgeo,evaluator,pid_lut,reco,rootfile \
+        -Pdd4hep:xml_files={params.compact_xml} \
         """
 
-
+# Trains a regression model to predict the TaggerTrackerTargetTensor from the TaggerTrackerFeatureTensor.
 rule lowq2_reconstruction_training:
     input:
         data="tensors.eicrecon.tree.edm4eic.root",
         script="TaggerRegression.py",
     output:
-        "TaggerRegressionTest.onnx",
+        "TestTaggerRegression.onnx",
     shell:
         """
-        python {input.script} --input {input.data} --outModelFile {output}
+        python {input.script} --dataFiles {input.data} --outModelFile {output}
         """
 
+# Runs the inference with the default model.
 rule lowq2_reconstruction_inference:
     input:
         data="tensors.eicrecon.tree.edm4eic.root",
-        model="TaggerRegressionTest.onnx",
     output:
         "TaggerInference.eicrecon.tree.edm4eic.root",
+    params:
+        compact_xml="epic_lowq2.xml",
+    shell:
+        """
+        eicrecon {input.data} -Ppodio:output_file={output} \
+        -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor,TaggerTrackerPredictionTensor,TaggerTrackerReconstructedParticles \
+        -Pplugins_to_ignore=janatop,LUMISPECCAL,ECTOF,BTOF,FOFFMTRK,RPOTS,B0TRK,MPGD,ECTRK,DRICH,DIRC,pid,tracking,acts,EEMC,BEMC,FEMC,EHCAL,BHCAL,FHCAL,B0ECAL,ZDC,BTRK,BVTX,PFRICH,richgeo,evaluator,pid_lut,reco,rootfile \
+        -Pdd4hep:xml_files={params.compact_xml} \
+        """
+
+# Runs the inference with the new model.
+rule lowq2_reconstruction_new_inference:
+    input:
+        data="tensors.eicrecon.tree.edm4eic.root",
+        model="TestTaggerRegression.onnx",
+    output:
+        "TaggerInferenceNew.eicrecon.tree.edm4eic.root",
+    params:
+        compact_xml="epic_lowq2.xml",
     shell:
         """
-        eicrecon {input.data} -Ppodio:output_file={output}
-        -Ptagger:inference_model={input.model} 
-        -Pjana:nevents=1000
-        -PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model}
+        eicrecon {input.data} -Ppodio:output_file={output} \
+        -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor,TaggerTrackerPredictionTensor,TaggerTrackerReconstructedParticles \
+        -Pplugins_to_ignore=janatop,LUMISPECCAL,ECTOF,BTOF,FOFFMTRK,RPOTS,B0TRK,MPGD,ECTRK,DRICH,DIRC,pid,tracking,acts,EEMC,BEMC,FEMC,EHCAL,BHCAL,FHCAL,B0ECAL,ZDC,BTRK,BVTX,PFRICH,richgeo,evaluator,pid_lut,reco,rootfile \
+        -Pdd4hep:xml_files={params.compact_xml} \
+        -PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model} \
+        -Peicrecon:LogLevel=error \
         """
\ No newline at end of file

From 4646a89c276f3fa1081df7a54e576ac39c0706d6 Mon Sep 17 00:00:00 2001
From: simonge <simon.gardner@glasgow.ac.uk>
Date: Thu, 9 Jan 2025 17:51:44 +0000
Subject: [PATCH 6/7] First go at full workflow (pending other PRs)

---
 .gitlab-ci.yml                                | 106 ++++-----
 .../lowq2/reconstruction_training/Snakefile   |  56 ++++-
 .../TaggerRegression.py                       |   2 +-
 .../reconstruction_training/TestModel.py      | 211 ++++++++++++++++++
 4 files changed, 310 insertions(+), 65 deletions(-)
 create mode 100644 benchmarks/lowq2/reconstruction_training/TestModel.py

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index f5f7b7c5..1a6202d9 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -125,62 +125,64 @@ get_data:
       - runner_system_failure
 
 include: 
-  - local: 'benchmarks/backgrounds/config.yml'
-  - local: 'benchmarks/backwards_ecal/config.yml'
-  - local: 'benchmarks/calo_pid/config.yml'
-  - local: 'benchmarks/ecal_gaps/config.yml'
-  - local: 'benchmarks/tracking_detectors/config.yml'
-  - local: 'benchmarks/tracking_performances/config.yml'
-  - local: 'benchmarks/tracking_performances_dis/config.yml'
-  - local: 'benchmarks/barrel_ecal/config.yml'
-  - local: 'benchmarks/barrel_hcal/config.yml'
-  - local: 'benchmarks/lfhcal/config.yml'
-  - local: 'benchmarks/zdc/config.yml'
-  - local: 'benchmarks/zdc_lyso/config.yml'
-  - local: 'benchmarks/zdc_neutron/config.yml'
-  - local: 'benchmarks/zdc_photon/config.yml'
-  - local: 'benchmarks/zdc_pi0/config.yml'
-  - local: 'benchmarks/material_scan/config.yml'
-  - local: 'benchmarks/pid/config.yml'
-  - local: 'benchmarks/timing/config.yml'
-  - local: 'benchmarks/b0_tracker/config.yml'
-  - local: 'benchmarks/insert_muon/config.yml'
-  - local: 'benchmarks/insert_tau/config.yml'
-  - local: 'benchmarks/zdc_sigma/config.yml'
-  - local: 'benchmarks/zdc_lambda/config.yml'
-  - local: 'benchmarks/insert_neutron/config.yml'
-  - local: 'benchmarks/femc_electron/config.yml'
-  - local: 'benchmarks/femc_photon/config.yml'
-  - local: 'benchmarks/femc_pi0/config.yml'
+  # - local: 'benchmarks/backgrounds/config.yml'
+  # - local: 'benchmarks/backwards_ecal/config.yml'
+  # - local: 'benchmarks/calo_pid/config.yml'
+  # - local: 'benchmarks/ecal_gaps/config.yml'
+  # - local: 'benchmarks/tracking_detectors/config.yml'
+  # - local: 'benchmarks/tracking_performances/config.yml'
+  # - local: 'benchmarks/tracking_performances_dis/config.yml'
+  # - local: 'benchmarks/barrel_ecal/config.yml'
+  # - local: 'benchmarks/barrel_hcal/config.yml'
+  # - local: 'benchmarks/lfhcal/config.yml'
+  # - local: 'benchmarks/zdc/config.yml'
+  # - local: 'benchmarks/zdc_lyso/config.yml'
+  # - local: 'benchmarks/zdc_neutron/config.yml'
+  # - local: 'benchmarks/zdc_photon/config.yml'
+  # - local: 'benchmarks/zdc_pi0/config.yml'
+  # - local: 'benchmarks/material_scan/config.yml'
+  # - local: 'benchmarks/pid/config.yml'
+  # - local: 'benchmarks/timing/config.yml'
+  # - local: 'benchmarks/b0_tracker/config.yml'
+  # - local: 'benchmarks/insert_muon/config.yml'
+  # - local: 'benchmarks/insert_tau/config.yml'
+  # - local: 'benchmarks/zdc_sigma/config.yml'
+  # - local: 'benchmarks/zdc_lambda/config.yml'
+  # - local: 'benchmarks/insert_neutron/config.yml'
+  # - local: 'benchmarks/femc_electron/config.yml'
+  # - local: 'benchmarks/femc_photon/config.yml'
+  # - local: 'benchmarks/femc_pi0/config.yml'
+  - local: 'benchmarks/lowq2/reconstruction_training/config.yml'
 deploy_results:
   allow_failure: true
   stage: deploy
   needs:
-    - "collect_results:backgrounds"
-    - "collect_results:backwards_ecal"
-    - "collect_results:barrel_ecal"
-    - "collect_results:barrel_hcal"
-    - "collect_results:calo_pid"
-    - "collect_results:ecal_gaps"
-    - "collect_results:lfhcal"
-    - "collect_results:material_scan"
-    - "collect_results:pid"
-    - "collect_results:tracking_performance"
-    - "collect_results:tracking_performance_campaigns"
-    - "collect_results:zdc_sigma"
-    - "collect_results:zdc_lambda"
-    - "collect_results:insert_neutron"
-    - "collect_results:tracking_performances_dis"
-    - "collect_results:zdc"
-    - "collect_results:zdc_lyso"
-    - "collect_results:zdc_neutron"
-    - "collect_results:insert_muon"
-    - "collect_results:insert_tau"
-    - "collect_results:zdc_photon"
-    - "collect_results:zdc_pi0"
-    - "collect_results:femc_electron"
-    - "collect_results:femc_photon"
-    - "collect_results:femc_pi0"
+    # - "collect_results:backgrounds"
+    # - "collect_results:backwards_ecal"
+    # - "collect_results:barrel_ecal"
+    # - "collect_results:barrel_hcal"
+    # - "collect_results:calo_pid"
+    # - "collect_results:ecal_gaps"
+    # - "collect_results:lfhcal"
+    # - "collect_results:material_scan"
+    # - "collect_results:pid"
+    # - "collect_results:tracking_performance"
+    # - "collect_results:tracking_performance_campaigns"
+    # - "collect_results:zdc_sigma"
+    # - "collect_results:zdc_lambda"
+    # - "collect_results:insert_neutron"
+    # - "collect_results:tracking_performances_dis"
+    # - "collect_results:zdc"
+    # - "collect_results:zdc_lyso"
+    # - "collect_results:zdc_neutron"
+    # - "collect_results:insert_muon"
+    # - "collect_results:insert_tau"
+    # - "collect_results:zdc_photon"
+    # - "collect_results:zdc_pi0"
+    # - "collect_results:femc_electron"
+    # - "collect_results:femc_photon"
+    # - "collect_results:femc_pi0"
+    - "collect_results:lowq2_reconstruction_training"
   script:
     - snakemake $SNAKEMAKE_FLAGS --cores 1 results/metadata.json
     - find results -print | sort | tee summary.txt
diff --git a/benchmarks/lowq2/reconstruction_training/Snakefile b/benchmarks/lowq2/reconstruction_training/Snakefile
index 1792b772..fcabdf44 100644
--- a/benchmarks/lowq2/reconstruction_training/Snakefile
+++ b/benchmarks/lowq2/reconstruction_training/Snakefile
@@ -1,11 +1,17 @@
+import os
+
+# Get the LOWQ2_DATADIR environment variable, or use a default value if it is not set
+outputdir  = os.getenv("LOWQ2_DATADIR", "")
+resultsdir = os.getenv("LOWQ2_RESULTSDIR", "")
+
 # Creates or copies the feature and target tensors for the lowq2 reconstruction training into a new file.
 rule lowq2_tensor_recon:
     output:
-        "tensors.eicrecon.tree.edm4eic.root",
+        f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
     log:
-        "tensors.eicrecon.tree.edm4eic.root.log",
-    params:        
-        input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,3), number=range(0, 20)),
+        f"{outputdir}tensors.eicrecon.tree.edm4eic.root.log",
+    params:
+        input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,2), number=range(0, 1)),
         compact_xml="epic_lowq2.xml",
     shell:  
         """
@@ -18,10 +24,10 @@ rule lowq2_tensor_recon:
 # Trains a regression model to predict the TaggerTrackerTargetTensor from the TaggerTrackerFeatureTensor.
 rule lowq2_reconstruction_training:
     input:
-        data="tensors.eicrecon.tree.edm4eic.root",
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
         script="TaggerRegression.py",
     output:
-        "TestTaggerRegression.onnx",
+        f"{outputdir}TestTaggerTrackerTransportation.onnx",
     shell:
         """
         python {input.script} --dataFiles {input.data} --outModelFile {output}
@@ -30,9 +36,9 @@ rule lowq2_reconstruction_training:
 # Runs the inference with the default model.
 rule lowq2_reconstruction_inference:
     input:
-        data="tensors.eicrecon.tree.edm4eic.root",
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
     output:
-        "TaggerInference.eicrecon.tree.edm4eic.root",
+        f"{outputdir}TaggerInference.eicrecon.tree.edm4eic.root",
     params:
         compact_xml="epic_lowq2.xml",
     shell:
@@ -43,13 +49,13 @@ rule lowq2_reconstruction_inference:
         -Pdd4hep:xml_files={params.compact_xml} \
         """
 
-# Runs the inference with the new model.
+# Check inference runs with the new model.
 rule lowq2_reconstruction_new_inference:
     input:
-        data="tensors.eicrecon.tree.edm4eic.root",
-        model="TestTaggerRegression.onnx",
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
+        model=f"{outputdir}TestTaggerTrackerTransportation.onnx",
     output:
-        "TaggerInferenceNew.eicrecon.tree.edm4eic.root",
+        f"{outputdir}TaggerInferenceNew.eicrecon.tree.edm4eic.root",
     params:
         compact_xml="epic_lowq2.xml",
     shell:
@@ -60,4 +66,30 @@ rule lowq2_reconstruction_new_inference:
         -Pdd4hep:xml_files={params.compact_xml} \
         -PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model} \
         -Peicrecon:LogLevel=error \
+        """
+
+# Create plots showing the performance of a model.
+rule lowq2_reconstruction_new_plot:
+    input:
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
+        model=lambda wildcards: f"{outputdir}{wildcards.model}.onnx",
+    output:
+        directory(expand(f"{resultsdir}{{model}}", model="{model}")),
+    shell:
+        """
+        mkdir -p {output}
+        python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output}
+        """
+
+# Create plots showing the performance of a model.
+rule lowq2_reconstruction_old_plot:
+    input:
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
+        model=lambda wildcards: f"calibrations/onnx/{wildcards.model}.onnx",
+    output:
+        directory(expand(f"{resultsdir}{{model}}", model="{model}")),
+    shell:
+        """
+        mkdir -p {output}
+        python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output}
         """
\ No newline at end of file
diff --git a/benchmarks/lowq2/reconstruction_training/TaggerRegression.py b/benchmarks/lowq2/reconstruction_training/TaggerRegression.py
index e0575126..e3d36ee1 100644
--- a/benchmarks/lowq2/reconstruction_training/TaggerRegression.py
+++ b/benchmarks/lowq2/reconstruction_training/TaggerRegression.py
@@ -38,7 +38,7 @@
 
 print(f"Training data: {len(train_input_data)} samples")
 
-epochs = 100
+epochs = 1000
 model  = trainModel(epochs, train_loader, val_loader)
 
 # Save the trained model to ONNX format
diff --git a/benchmarks/lowq2/reconstruction_training/TestModel.py b/benchmarks/lowq2/reconstruction_training/TestModel.py
new file mode 100644
index 00000000..dfd95931
--- /dev/null
+++ b/benchmarks/lowq2/reconstruction_training/TestModel.py
@@ -0,0 +1,211 @@
+import onnxruntime as ort
+import argparse
+import numpy as np
+from ProcessData import create_arrays
+import matplotlib.pyplot as plt
+from matplotlib.colors import LogNorm
+from scipy.stats import norm
+from scipy.optimize import curve_fit
+
+# Parse arguments
+parser = argparse.ArgumentParser(description='Train a regression model for the Tagger.')
+parser.add_argument('--modelFile', type=str, default="regression_model.onnx", help='Path to the ONNX model file')
+parser.add_argument('--dataFiles', type=str, nargs='+', help='Path to the data files')
+parser.add_argument('--outDir', type=str, default=".", help='Output directory')
+args = parser.parse_args()
+modelFile     = args.modelFile
+dataFiles     = args.dataFiles
+outDir        = args.outDir
+outGraphFile  = outDir + "/output_vs_target.png"
+outGraphFile2 = outDir + "/output_vs_target2.png"
+outGraphFile3 = outDir + "/output_vs_target3.png"
+outGraphFile4 = outDir + "/output_vs_target4.png"
+
+input_data, target_data = create_arrays(dataFiles)
+
+target_data = np.array(target_data)
+
+# Load the ONNX model
+session = ort.InferenceSession(modelFile)
+
+# Run the model on the input data
+input_name = session.get_inputs()[0].name
+output_name = session.get_outputs()[0].name
+input_data = np.array(input_data,dtype=np.float32)
+output = session.run([output_name], {input_name: input_data})
+output = np.array(output[0])
+
+out_theta = np.arctan2(np.sqrt(output[:,0]**2 + output[:,1]**2),output[:,2])
+out_phi = np.arctan2(output[:,1],output[:,0])
+out_mag = np.sqrt(output[:,0]**2 + output[:,1]**2 + output[:,2]**2)
+in_theta = np.arctan2(np.sqrt(target_data[:,0]**2 + target_data[:,1]**2),target_data[:,2])
+in_phi = np.arctan2(target_data[:,1],target_data[:,0])
+in_mag = np.sqrt(target_data[:,0]**2 + target_data[:,1]**2 + target_data[:,2]**2)
+
+
+thetadiff = out_theta - in_theta
+phidiff = out_phi - in_phi
+# Move phidiff to within -pi/2 and pi/2
+phidiff = (phidiff + np.pi) % (2 * np.pi) - np.pi
+magdiff = out_mag - in_mag
+
+diff = (target_data - output)/target_data
+diffrange = [[-5,5],[-5,5],[-0.5,0.5]]
+datarange = [[-0.02,0.02],[-0.02,0.02],[-1,0]]
+
+# Use the 'seismic' colormap
+cmap = plt.get_cmap('seismic')
+
+# Creates histograms to compare the target and output data
+fig, axs = plt.subplots(3, 3, figsize=(12, 12))
+for i in range(3):
+    # 2D histograms showing trends in the data
+    axs[0,i].hist2d(target_data[:,i], output[:,i], bins=100, range=[datarange[i],datarange[i]], cmap="seismic", norm=LogNorm(), label="Output vs Target")
+    axs[0,i].set_xlabel(f"Variable {i} Target")
+    axs[0,i].set_ylabel(f"Variable {i} Output")
+
+    axs[1,i].hist(diff[:,i], bins=100, alpha=0.5, range=diffrange[i], label="Difference")
+    axs[1,i].set_xlabel(f"Variable {i} Difference")
+    axs[1,i].set_ylabel("Counts")
+
+    axs[2,i].hist2d(target_data[:,i], diff[:,i], bins=100, range=[datarange[i],diffrange[i]], cmap="seismic", norm=LogNorm(), label="Difference vs Target")
+    axs[2,i].set_xlabel(f"Variable {i} Target")
+    axs[2,i].set_ylabel(f"Variable {i} Difference")
+
+plt.show()
+plt.savefig(outGraphFile)
+
+# Creates histograms to compare theta, phi and mag target and output data
+fig2, axs2 = plt.subplots(3, 3, figsize=(12, 12))
+
+thetarange = [np.pi-0.01,np.pi]
+phirange = [-np.pi,np.pi]
+magrange = [0,1]
+
+thetadiffrange = [-0.01,0.01]
+phidiffrange = [-np.pi,np.pi]
+magdiffrange = [-0.1,0.1]
+
+# 2D histograms showing trends in the data
+axs2[0,0].hist2d(out_theta, in_theta, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target")
+axs2[0,0].set_xlabel("Theta Target")
+axs2[0,0].set_ylabel("Theta Output")
+
+axs2[0,1].hist2d(out_phi, in_phi, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target")
+axs2[0,1].set_xlabel("Phi Target")
+axs2[0,1].set_ylabel("Phi Output")
+
+axs2[0,2].hist2d(out_mag, in_mag, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target")
+axs2[0,2].set_xlabel("Mag Target")
+axs2[0,2].set_ylabel("Mag Output")
+
+axs2[1,0].hist(thetadiff, bins=100, alpha=0.5, range=thetadiffrange, label="Difference")
+axs2[1,0].set_xlabel("Theta Difference")
+axs2[1,0].set_ylabel("Counts")
+
+axs2[1,1].hist(phidiff, bins=100, alpha=0.5, range=phidiffrange, label="Difference")
+axs2[1,1].set_xlabel("Phi Difference")
+axs2[1,1].set_ylabel("Counts")
+
+axs2[1,2].hist(magdiff, bins=100, alpha=0.5, range=magdiffrange, label="Difference")
+axs2[1,2].set_xlabel("Mag Difference")
+axs2[1,2].set_ylabel("Counts")
+
+axs2[2,0].hist2d(in_theta, thetadiff, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target")
+axs2[2,0].set_xlabel("Theta Target")
+axs2[2,0].set_ylabel("Theta Difference")
+
+axs2[2,1].hist2d(in_phi, phidiff, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target")
+axs2[2,1].set_xlabel("Phi Target")
+axs2[2,1].set_ylabel("Phi Difference")
+
+axs2[2,2].hist2d(in_mag, magdiff, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target")
+axs2[2,2].set_xlabel("Mag Target")
+axs2[2,2].set_ylabel("Mag Difference")
+
+plt.show()
+plt.savefig(outGraphFile2)
+
+# Create histograms where the theta value has been cut at less than 3.14
+fig3, axs3 = plt.subplots(3, 3, figsize=(12, 12))
+
+out_theta_cut = out_theta[out_theta < 3.14]
+in_theta_cut = in_theta[out_theta < 3.14]
+thetadiff_cut = thetadiff[out_theta < 3.14]
+
+out_phi_cut = out_phi[out_theta < 3.14]
+in_phi_cut = in_phi[out_theta < 3.14]
+phidiff_cut = phidiff[out_theta < 3.14]
+
+out_mag_cut = out_mag[out_theta < 3.14]
+in_mag_cut = in_mag[out_theta < 3.14]
+magdiff_cut = magdiff[out_theta < 3.14]
+
+axs3[0,0].hist2d(out_theta_cut, in_theta_cut, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target")
+axs3[0,0].set_xlabel("Theta Target")
+axs3[0,0].set_ylabel("Theta Output")
+
+axs3[0,1].hist2d(out_phi_cut, in_phi_cut, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target")
+axs3[0,1].set_xlabel("Phi Target")
+axs3[0,1].set_ylabel("Phi Output")
+
+axs3[0,2].hist2d(out_mag_cut, in_mag_cut, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target")
+axs3[0,2].set_xlabel("Mag Target")
+axs3[0,2].set_ylabel("Mag Output")
+
+axs3[1,0].hist(thetadiff_cut, bins=100, alpha=0.5, range=thetadiffrange, label="Difference")
+axs3[1,0].set_xlabel("Theta Difference")
+axs3[1,0].set_ylabel("Counts")
+
+axs3[1,1].hist(phidiff_cut, bins=100, alpha=0.5, range=phidiffrange, label="Difference")
+axs3[1,1].set_xlabel("Phi Difference")
+axs3[1,1].set_ylabel("Counts")
+
+axs3[1,2].hist(magdiff_cut, bins=100, alpha=0.5, range=magdiffrange, label="Difference")
+axs3[1,2].set_xlabel("Mag Difference")
+axs3[1,2].set_ylabel("Counts")
+
+axs3[2,0].hist2d(in_theta_cut, thetadiff_cut, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target")    
+axs3[2,0].set_xlabel("Theta Target")
+axs3[2,0].set_ylabel("Theta Difference")
+
+axs3[2,1].hist2d(in_phi_cut, phidiff_cut, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target")
+axs3[2,1].set_xlabel("Phi Target")
+axs3[2,1].set_ylabel("Phi Difference")
+
+axs3[2,2].hist2d(in_mag_cut, magdiff_cut, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target")
+axs3[2,2].set_xlabel("Mag Target")
+axs3[2,2].set_ylabel("Mag Difference")
+
+plt.show()
+plt.savefig(outGraphFile3)
+
+# Create plots where a Gaussian has been fitted to the data
+# Function to fit a Gaussian and plot the results
+def plot_gaussian_fit(ax, data, range, xlabel, ylabel):
+    def gaussian(x, mu, sigma, amplitude):
+        return amplitude * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
+
+    hist, bin_edges = np.histogram(data, bins=100, range=range, density=True)
+    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
+
+    popt, _ = curve_fit(gaussian, bin_centers, hist, p0=[0, 0.01, 1])
+    mu, sigma, amplitude = popt
+    print(f"mu={mu}, sigma={sigma}, amplitude={amplitude}")
+
+    x = np.linspace(range[0], range[1], 100)
+    ax.plot(x, gaussian(x, *popt), 'k', linewidth=2)
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+    ax.hist(data, bins=100, alpha=0.5, range=range, edgecolor='black', density=True)
+    ax.legend([f'Fit: $\mu$={mu:.5f}, $\sigma$={sigma:.5f}'])
+
+# Create histograms with Gaussian fits
+fig4, axs4 = plt.subplots(3, 1, figsize=(8, 12))
+
+plot_gaussian_fit(axs4[0], thetadiff, thetadiffrange, "Theta Difference", "Density")
+plot_gaussian_fit(axs4[1], phidiff_cut, phidiffrange, "Phi Difference", "Density")
+plot_gaussian_fit(axs4[2], magdiff, magdiffrange, "Mag Difference", "Density")
+
+plt.show()
+plt.savefig(outGraphFile4)
\ No newline at end of file

From 2473f796db611544bd9148eed24af768ba356547 Mon Sep 17 00:00:00 2001
From: simonge <simon.gardner@glasgow.ac.uk>
Date: Thu, 9 Jan 2025 18:33:15 +0000
Subject: [PATCH 7/7] Add missing config.yml

---
 .../lowq2/reconstruction_training/config.yml  | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 benchmarks/lowq2/reconstruction_training/config.yml

diff --git a/benchmarks/lowq2/reconstruction_training/config.yml b/benchmarks/lowq2/reconstruction_training/config.yml
new file mode 100644
index 00000000..aa58950b
--- /dev/null
+++ b/benchmarks/lowq2/reconstruction_training/config.yml
@@ -0,0 +1,22 @@
+bench:lowq2_reconstruction_training:
+  allow_failure: true # until inference merged into EICrecon
+  extends: .det_benchmark
+  stage: benchmarks
+  script:
+    - export LOWQ2_DATADIR=data/lowq2/reconstruction_training/
+    - export LOWQ2_RESULTSDIR=results/lowq2/reconstruction_training/
+    - mkdir -p $LOWQ2_DATADIR
+    - mkdir -p $LOWQ2_RESULTSDIR
+    - snakemake $SNAKEMAKE_FLAGS --cores 5 ${LOWQ2_RESULTSDIR}TestTaggerTrackerTransportation ${LOWQ2_RESULTSDIR}TaggerTrackerTransportation
+
+collect_results:lowq2_reconstruction_training:
+  allow_failure: true # until inference merged into EICrecon
+  extends: .det_benchmark
+  stage: collect
+  needs:
+    - "bench:lowq2_reconstruction_training"
+  script:
+    - ls -lrht
+    - mv results{,_save}/ # move results directory out of the way to preserve it
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output ${LOWQ2_RESULTSDIR}TestTaggerTrackerTransportation ${LOWQ2_RESULTSDIR}TaggerTrackerTransportation
+    - mv results{_save,}/