From 4ad6fc68b2033fb553c2703ebce8272580ed9a86 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 5 Feb 2024 12:57:57 -0500
Subject: [PATCH 01/87] Update run_param_config.py

Adding MSK/CMO Bait/Target set to table
---
 scripts/run_param_config.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/scripts/run_param_config.py b/scripts/run_param_config.py
index 90799e3..8351ca9 100644
--- a/scripts/run_param_config.py
+++ b/scripts/run_param_config.py
@@ -651,6 +651,18 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "no",
 				MD: "yes"
 		},
+		"MSK-CH": {
+				BAITS: "/igo/home/igo/resources/ilist/hg38/CMO-CH/CMO-CH.hg38.baits",
+				TARGETS: "/igo/home/igo/resources/ilist/hg38/CMO-CH/CMO-CH.hg38.targets",
+				MSKQ: "no",
+				MD: "yes"
+		},
+		"CMO-CH": {
+				BAITS: "/igo/home/igo/resources/ilist/hg38/CMO-CH/CMO-CH.hg38.baits",
+				TARGETS: "/igo/home/igo/resources/ilist/hg38/CMO-CH/CMO-CH.hg38.targets",
+				MSKQ: "no",
+				MD: "yes"
+		},
 		"HumanWholeGenome": {
 				MSKQ: "no",
 				MD: "yes",

From 843424dd7be553b54291f8f1f1d9377574c13e8e Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 12 Feb 2024 16:31:40 -0500
Subject: [PATCH 02/87] Update LaunchMetrics.py

increase the bin memory for ChIPSeq and HiC data, or for DNA recipes that generate a lot of mark dup data.
---
 scripts/LaunchMetrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 533e1b6..59a6ee7 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -167,7 +167,7 @@ def dragen(sample, run, sample_parameters, work_directory, dragen_directory, fas
 			dragen_path = "/igo/work/igo/dragen_hash_tables/{}".format(sample_parameters["GTAG"])
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
-		launch_dragen = "/opt/edico/bin/dragen --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
+		launch_dragen = "/opt/edico/bin/dragen --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
 		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)

From 7309927708b52ee97043b2e8ef9e71bbc0d6c5d0 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Fri, 16 Feb 2024 09:58:34 -0500
Subject: [PATCH 03/87] send_json_data.sh

added the send_json_data bash script to post cellranger GEX and VDJ data to RUN QC.  the /home/igo/Scripts where the script was previously stored was deleted, therefore causing an error when trying to post GEX and VDJ data to RUN QC
---
 scripts/cellranger.py     |  2 +-
 scripts/send_json_data.sh | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 scripts/send_json_data.sh

diff --git a/scripts/cellranger.py b/scripts/cellranger.py
index e2268f8..f4f1887 100644
--- a/scripts/cellranger.py
+++ b/scripts/cellranger.py
@@ -191,7 +191,7 @@ def create_json(send_json, sequencer_and_run, project, tag, work_area):
     with open(json_data_file, "w") as jfile:
         json.dump(send_json, jfile)
         
-    bsub_json = "bsub -J create_json___{} -o create_json___{}.log -w \"done({}*)\" sh /home/igo/Scripts/PicardScripts/send_json_data.sh {} {}".format(job_id, job_id, job_id, work_area, json_data_file)
+    bsub_json = "bsub -J create_json___{} -o create_json___{}.log -w \"done({}*)\" sh /igo/work/igo/igo-demux/scripts/send_json_data.sh {} {}".format(job_id, job_id, job_id, work_area, json_data_file)
     print(bsub_json)
     subprocess.run(bsub_json, shell = True)
 
diff --git a/scripts/send_json_data.sh b/scripts/send_json_data.sh
new file mode 100644
index 0000000..8fb3aa2
--- /dev/null
+++ b/scripts/send_json_data.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+
+##Usage sh send_json_data.sh patth_to_json json_file  
+args=$@
+
+path_to_json=$1
+json_file=$2
+
+cd $path_to_json
+
+echo $path_to_json
+json_data=$(cat $json_file)
+echo $json_data
+
+
+curl -d "$json_data" -H "Content-Type: application/json" -X POST "http://igodb.mskcc.org:8080/ngs-stats/saveCellRangerSample"
+

From f7e4d15c88fb218ae93a361d0f8e9d2d0c005e69 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Wed, 28 Feb 2024 16:31:49 -0500
Subject: [PATCH 04/87] add json option for visium

---
 scripts/cellranger.py         |  4 ++++
 scripts/cellranger_spatial.py | 33 ++++++++++++++++++++++++++-------
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/scripts/cellranger.py b/scripts/cellranger.py
index f4f1887..0778a12 100644
--- a/scripts/cellranger.py
+++ b/scripts/cellranger.py
@@ -332,6 +332,10 @@ def launch_cellranger(sample_sheet, sequencer_and_run):
                             probe = config_dict[tag]["probe"][sample_genome_dict[sample]]
                             cmd = cmd + " --probe-set={}".format(probe)
                         
+                        # if there is manual alignment json file availabe, add that to the cmd
+                        if sample_info.json != "EMPTY":
+                            cmd = cmd + " --loupe-alignment={}".format(sample_info.json)
+
                         bsub_cmd = "bsub -J {}_{}_{}_SPATIAL -o {}_SPATIAL.out{}{}".format(sequencer_and_run, project, sample, sample, cmd, OPTIONS)
                         print(bsub_cmd)
                         subprocess.run(bsub_cmd, shell=True)
diff --git a/scripts/cellranger_spatial.py b/scripts/cellranger_spatial.py
index a6af142..70b27a3 100644
--- a/scripts/cellranger_spatial.py
+++ b/scripts/cellranger_spatial.py
@@ -22,8 +22,10 @@ def __init__(self, sample, project_id):
         self.chip_id = "EMPTY"
         self.preservation = "EMPTY"
         self.tiff_image = "EMPTY"
+        self.json = "EMPTY"
         self.get_info_from_LIMS()
         self.copy_tiff(project_id)
+        self.copy_json(project_id)
 
     def get_info_from_LIMS(self):
         response = requests.get(ENDPOINT + self.IGO_ID , auth = ("pms", "tiagostarbuckslightbike"), verify = False)
@@ -42,12 +44,29 @@ def copy_tiff(self, project_id):
         if not os.path.exists(destination_loc):
             os.makedirs(destination_loc)
 
-        # copy all the image files using rsync?
-        original_tiff_image = glob.glob(source_loc_dir + "/" + self.sample_name + "*")
-        if len(original_tiff_image) != 1 or ".tif" not in original_tiff_image[0]:
+        # copy image file per sample
+        original_tiff_image = source_loc_dir + "/" + self.sample_name + ".tif"
+        if os.path.isfile(original_tiff_image):
+            shutil.copy(original_tiff_image, destination_file)
+            self.tiff_image = destination_file
+            print("copy {} to {}".format(original_tiff_image, destination_file))
+        else:
             print("tif file is not in proper format for sample {}, please check".format(self.IGO_ID))
+            
+    # copy json file if exists
+    def copy_json(self, project_id):
+        # project_id format as Project_12345
+        source_loc = original_tiff_images_directory + project_id + "/json/" + self.sample_name + ".json"
+        destination_loc = tiff_images_directory + project_id
+        destination_file = destination_loc + "/" + self.sample_name + ".json"
+
+        # create director if not exists
+        if not os.path.exists(destination_loc):
+            os.makedirs(destination_loc)
+        
+        if os.path.isfile(source_loc):
+            shutil.copy(source_loc, destination_file)
+            self.json = destination_file
+            print("copy {} to {}".format(source_loc, destination_file))
         else:
-            shutil.copy(original_tiff_image[0], destination_file)
-            self.tiff_image = destination_file
-            print("copy {} to {}".format(original_tiff_image[0], destination_file))
-            
\ No newline at end of file
+            print("json file does not exist for {}".format(self.sample_name))            

From 2fa413c2497150328a00d63876b152ccf7f0e28e Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Thu, 29 Feb 2024 09:16:33 -0500
Subject: [PATCH 05/87] Update cellranger.py

---
 scripts/cellranger.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/cellranger.py b/scripts/cellranger.py
index 0778a12..cffe807 100644
--- a/scripts/cellranger.py
+++ b/scripts/cellranger.py
@@ -426,6 +426,10 @@ def lanuch_by_project(project_directory, recipe, species):
                     probe = config_dict[tag]["probe"][species]
                     cmd = cmd + " --probe-set={}".format(probe)
                 
+                # if there is manual alignment json file availabe, add that to the cmd
+                if sample_info.json != "EMPTY":
+                    cmd = cmd + " --loupe-alignment={}".format(sample_info.json)
+                
                 bsub_cmd = "bsub -J {}_{}_{}_SPATIAL -o {}_SPATIAL.out{}{}".format(sequencer_and_run, project, sample, sample, cmd, OPTIONS)
                 print(bsub_cmd)
                 subprocess.run(bsub_cmd, shell=True)

From 1725d7bc53b8301e95466f79152191c162025e2e Mon Sep 17 00:00:00 2001
From: luc <44953736+CuijieLu@users.noreply.github.com>
Date: Thu, 29 Feb 2024 09:45:37 -0500
Subject: [PATCH 06/87] Update test_SampleSheet.py

---
 test_SampleSheet.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test_SampleSheet.py b/test_SampleSheet.py
index d1a2af7..f6a40b9 100644
--- a/test_SampleSheet.py
+++ b/test_SampleSheet.py
@@ -2,7 +2,7 @@
 import pytest
 
 def test_mixed_10X_barcodes():
-    x = SampleSheet("test/MICHELLE_0543_10X_MIXED.csv")
+    x = SampleSheet("./test/MICHELLE_0543_10X_MIXED.csv")
     ss_list = x.split_sample_sheet()
 
     if "OverrideCycles" in ss_list[1].df_ss_header.astype(str):
@@ -10,7 +10,7 @@ def test_mixed_10X_barcodes():
     print(ss_list[2].df_ss_header)
 
 def test_only_10XSI_barcodes():
-    x = SampleSheet("test/SampleSheet_10X_SI.csv")
+    x = SampleSheet("./test/SampleSheet_10X_SI.csv")
     print("Calling split sample sheet.")
     ss_list = x.split_sample_sheet()
     print("After split sample sheet.")
@@ -72,4 +72,4 @@ def test_only_DLP_split():
     x = SampleSheet("test/MICHELLE_420_ONLY_DLP.csv")
     ss_list = x.split_sample_sheet()
     assert(len(ss_list) == 1)
-    assert("Lane" in ss_list[0].df_ss_data.columns)
\ No newline at end of file
+    assert("Lane" in ss_list[0].df_ss_data.columns)

From 9951110e76640a8fade93f2a388761dba98a6ad9 Mon Sep 17 00:00:00 2001
From: luc <44953736+CuijieLu@users.noreply.github.com>
Date: Thu, 29 Feb 2024 09:52:11 -0500
Subject: [PATCH 07/87] Update test_SampleSheet.py

---
 test_SampleSheet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test_SampleSheet.py b/test_SampleSheet.py
index f6a40b9..24b5484 100644
--- a/test_SampleSheet.py
+++ b/test_SampleSheet.py
@@ -2,7 +2,7 @@
 import pytest
 
 def test_mixed_10X_barcodes():
-    x = SampleSheet("./test/MICHELLE_0543_10X_MIXED.csv")
+    x = SampleSheet("igo-demux/test/MICHELLE_0543_10X_MIXED.csv")
     ss_list = x.split_sample_sheet()
 
     if "OverrideCycles" in ss_list[1].df_ss_header.astype(str):
@@ -10,7 +10,7 @@ def test_mixed_10X_barcodes():
     print(ss_list[2].df_ss_header)
 
 def test_only_10XSI_barcodes():
-    x = SampleSheet("./test/SampleSheet_10X_SI.csv")
+    x = SampleSheet("test/SampleSheet_10X_SI.csv")
     print("Calling split sample sheet.")
     ss_list = x.split_sample_sheet()
     print("After split sample sheet.")

From 1efc78676f8146c44fdd4f7411ff58bd749a441f Mon Sep 17 00:00:00 2001
From: luc <44953736+CuijieLu@users.noreply.github.com>
Date: Thu, 29 Feb 2024 10:26:04 -0500
Subject: [PATCH 08/87] Update test_SampleSheet.py

---
 test_SampleSheet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_SampleSheet.py b/test_SampleSheet.py
index 24b5484..2eed0cc 100644
--- a/test_SampleSheet.py
+++ b/test_SampleSheet.py
@@ -2,7 +2,7 @@
 import pytest
 
 def test_mixed_10X_barcodes():
-    x = SampleSheet("igo-demux/test/MICHELLE_0543_10X_MIXED.csv")
+    x = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/MICHELLE_0543_10X_MIXED.csv")
     ss_list = x.split_sample_sheet()
 
     if "OverrideCycles" in ss_list[1].df_ss_header.astype(str):

From d8ea275524608d70ea46dcbd93e565360c5df1e5 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Thu, 29 Feb 2024 10:36:58 -0500
Subject: [PATCH 09/87] Update test_SampleSheet.py

---
 test_SampleSheet.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/test_SampleSheet.py b/test_SampleSheet.py
index 2eed0cc..970d7de 100644
--- a/test_SampleSheet.py
+++ b/test_SampleSheet.py
@@ -10,7 +10,7 @@ def test_mixed_10X_barcodes():
     print(ss_list[2].df_ss_header)
 
 def test_only_10XSI_barcodes():
-    x = SampleSheet("test/SampleSheet_10X_SI.csv")
+    x = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/SampleSheet_10X_SI.csv")
     print("Calling split sample sheet.")
     ss_list = x.split_sample_sheet()
     print("After split sample sheet.")
@@ -18,30 +18,30 @@ def test_only_10XSI_barcodes():
     assert(len(ss_list) == 1)
 
 def test_read_10X_sample_sheet():
-    samplesheet = SampleSheet("test/SampleSheet_10X_SI.csv")
+    samplesheet = SampleSheet("./test/SampleSheet_10X_SI.csv")
     corrected = convert_SI_barcodes(samplesheet)
     print(corrected.df_ss_data.to_string())
     assert(len(corrected.df_ss_data) == 16)
 
 def test_read_empty_sample_sheet():
-    x = SampleSheet("test/empty_sample_sheet.csv")
+    x = SampleSheet("/test/empty_sample_sheet.csv")
     print("Success")
 
 def test_read_blank_sample_sheet():
     with pytest.raises(Exception):
-        x = SampleSheet("test/blank_sample_sheet.csv")
+        x = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/blank_sample_sheet.csv")
 
 def test_read_SE_sample_sheet():
-    x = SampleSheet("test/SampleSheet_PEPE.csv")
+    x = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/SampleSheet_PEPE.csv")
     print("Success")
 
 def test_WGS_only_not_split():
-    x = SampleSheet("test/DIANA_0434.csv")
+    x = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/DIANA_0434.csv")
     ss_list = x.split_sample_sheet()
     assert(len(ss_list) == 1)
 
 def test_barcode_read_lengths():
-    x = SampleSheet("test/SampleSheet.csv")
+    x = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/SampleSheet.csv")
     assert (x.read_lengths[0] == 151)
     assert (x.read_lengths[1] == 151)
 

From ffc9fa34cc059dbdc236a4398e02e763ebdf6c77 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Thu, 29 Feb 2024 10:46:56 -0500
Subject: [PATCH 10/87] update test file path

---
 test_SampleSheet.py   | 12 ++++++------
 test_demux_run_dag.py |  4 ++--
 test_scripts.py       |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/test_SampleSheet.py b/test_SampleSheet.py
index 970d7de..539ef65 100644
--- a/test_SampleSheet.py
+++ b/test_SampleSheet.py
@@ -18,13 +18,13 @@ def test_only_10XSI_barcodes():
     assert(len(ss_list) == 1)
 
 def test_read_10X_sample_sheet():
-    samplesheet = SampleSheet("./test/SampleSheet_10X_SI.csv")
+    samplesheet = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/SampleSheet_10X_SI.csv")
     corrected = convert_SI_barcodes(samplesheet)
     print(corrected.df_ss_data.to_string())
     assert(len(corrected.df_ss_data) == 16)
 
 def test_read_empty_sample_sheet():
-    x = SampleSheet("/test/empty_sample_sheet.csv")
+    x = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/empty_sample_sheet.csv")
     print("Success")
 
 def test_read_blank_sample_sheet():
@@ -46,15 +46,15 @@ def test_barcode_read_lengths():
     assert (x.read_lengths[1] == 151)
 
 def test_recipe_set():
-    x = SampleSheet("test/SampleSheet.csv")
+    x = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/SampleSheet.csv")
     assert ("DLP" in x.recipe_set)
 
 def test_barcode_list():
-    x = SampleSheet("test/SampleSheet.csv")
+    x = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/SampleSheet.csv")
     assert ("AAGGACATAACCCCGT" in x.barcode_list)
     
 def test_split():
-    x = SampleSheet("test/SampleSheet_DLP.csv")
+    x = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/SampleSheet_DLP.csv")
     ss_list = x.split_sample_sheet()
     path0 = ss_list[0].path
     path1 = ss_list[1].path
@@ -69,7 +69,7 @@ def test_split():
 
 # Test when a sample sheet is only DLP lane information is removed and it is demuxed with "NoLaneSplitting" option in the sample sheet
 def test_only_DLP_split():
-    x = SampleSheet("test/MICHELLE_420_ONLY_DLP.csv")
+    x = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/MICHELLE_420_ONLY_DLP.csv")
     ss_list = x.split_sample_sheet()
     assert(len(ss_list) == 1)
     assert("Lane" in ss_list[0].df_ss_data.columns)
diff --git a/test_demux_run_dag.py b/test_demux_run_dag.py
index 80323c9..8d88612 100644
--- a/test_demux_run_dag.py
+++ b/test_demux_run_dag.py
@@ -2,14 +2,14 @@
 import demux_run_dag
 
 def test_WGS_only_not_split():
-    x = SampleSheet("test/SampleSheet_220304_MICHELLE_0485_BHFN7NDSX3.csv")
+    x = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/SampleSheet_220304_MICHELLE_0485_BHFN7NDSX3.csv")
     cmd_set = demux_run_dag.build_dragen_cmds(x, "MICHELLE_0485_BHFN7NDSX3")
     assert(len(cmd_set) == 7)
 
 
 def test_get_dlp_chip():
     # Test that the DLP chip returned is correct even when the run has multiple DLP projects with different chip IDs
-    sample_sheet = SampleSheet("test/SampleSheet_220412_MICHELLE_0501_BHFNH5DSX3_DLP.csv")
+    sample_sheet = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/SampleSheet_220412_MICHELLE_0501_BHFNH5DSX3_DLP.csv")
     for project in sample_sheet.project_set:
         chip_id = demux_run_dag.get_dlp_chip(sample_sheet, project)
         if project == "13098":
diff --git a/test_scripts.py b/test_scripts.py
index 2376554..e14975c 100644
--- a/test_scripts.py
+++ b/test_scripts.py
@@ -57,8 +57,8 @@ def testGettotalreads():
     assert(total_reads_dict["PDX_WD0010_P1_1850_IGO_12754_E_2"] == 602357556)
 
 def testGettotalreadsDLP():
-    sample_sheet = SampleSheet("test/SampleSheet_DLP_multiprojects.csv")
-    total_reads_dict = scripts.get_total_reads_from_demux.get_total_reads_DLP(sample_sheet, "test/Demultiplex_Stats_DLP.csv" )
+    sample_sheet = SampleSheet("/home/runner/work/igo-demux/igo-demux/test/SampleSheet_DLP_multiprojects.csv")
+    total_reads_dict = scripts.get_total_reads_from_demux.get_total_reads_DLP(sample_sheet, "/home/runner/work/igo-demux/igo-demux/test/Demultiplex_Stats_DLP.csv" )
     print(total_reads_dict)
     assert(total_reads_dict["Project_11113_L"]["samples"][1] == 3802998466)
     assert(total_reads_dict["Project_11113_L"]["pos_control"][1] == 654555718)

From cfb6100aa2242ad58bcf1dda6adb8d5a8bebbc7e Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Thu, 29 Feb 2024 10:50:17 -0500
Subject: [PATCH 11/87] Update test_scripts.py

---
 test_scripts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_scripts.py b/test_scripts.py
index e14975c..e9174b0 100644
--- a/test_scripts.py
+++ b/test_scripts.py
@@ -51,7 +51,7 @@ def testCellranger_get_sequencer_runID():
 
 def testGettotalreads():
     sample_list = ["PDX_WD0010_P1_1845_IGO_12754_E_1", "PDX_WD0010_P1_1850_IGO_12754_E_2"]
-    total_reads_dict = scripts.get_total_reads_from_demux.get_total_reads(sample_list, "test/Demultiplex_Stats.csv")
+    total_reads_dict = scripts.get_total_reads_from_demux.get_total_reads(sample_list, "/home/runner/work/igo-demux/igo-demux/test/Demultiplex_Stats.csv")
     print(total_reads_dict)
     assert(total_reads_dict["PDX_WD0010_P1_1845_IGO_12754_E_1"] == 770373032)
     assert(total_reads_dict["PDX_WD0010_P1_1850_IGO_12754_E_2"] == 602357556)

From 4ac47f49c291762d1aa466e324a5fd3bab9d8e90 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Thu, 7 Mar 2024 07:45:29 -0500
Subject: [PATCH 12/87] Update LaunchMetrics.py

temporary update to test SAMPLES on DRAGEN 4.2
---
 scripts/LaunchMetrics.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 59a6ee7..0445334 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -125,7 +125,8 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		# get the correct path for the reference
 		if (sample_parameters["GTAG"] == "GRCh38"):
-			rna_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
+			# rna_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
+			rna_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
 		else:
 			rna_path = "/igo/work/igo/dragen_hash_tables/{}".format(sample_parameters["GTAG"])
 			
@@ -133,7 +134,7 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		
 		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} ".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
-		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
+		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
 		
@@ -162,13 +163,14 @@ def dragen(sample, run, sample_parameters, work_directory, dragen_directory, fas
 		
 		# get the correct path for the reference
 		if (sample_parameters["GTAG"] == "GRCh38"):
-			dragen_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
+			# dragen_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
+			dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
 		else:
 			dragen_path = "/igo/work/igo/dragen_hash_tables/{}".format(sample_parameters["GTAG"])
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen = "/opt/edico/bin/dragen --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		

From a53ec4c0d6ab095c88742ae8b627966aa0797012 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Thu, 7 Mar 2024 09:35:58 -0500
Subject: [PATCH 13/87] Update LaunchMetrics.py

Finished running stats for DRAGEN 4.2.  changing back to original code
---
 scripts/LaunchMetrics.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 0445334..59a6ee7 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -125,8 +125,7 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		# get the correct path for the reference
 		if (sample_parameters["GTAG"] == "GRCh38"):
-			# rna_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
-			rna_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
+			rna_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
 		else:
 			rna_path = "/igo/work/igo/dragen_hash_tables/{}".format(sample_parameters["GTAG"])
 			
@@ -134,7 +133,7 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		
 		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} ".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
-		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
+		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
 		
@@ -163,14 +162,13 @@ def dragen(sample, run, sample_parameters, work_directory, dragen_directory, fas
 		
 		# get the correct path for the reference
 		if (sample_parameters["GTAG"] == "GRCh38"):
-			# dragen_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
-			dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
+			dragen_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
 		else:
 			dragen_path = "/igo/work/igo/dragen_hash_tables/{}".format(sample_parameters["GTAG"])
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen = "/opt/edico/bin/dragen --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		

From e239ec078b9ab5cf59757c8dcccd1a08ac3fcd80 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Thu, 14 Mar 2024 10:39:38 -0400
Subject: [PATCH 14/87] add function for pooled ONT samples

---
 scripts/ont_stats.py | 64 ++++++++++++++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 17 deletions(-)

diff --git a/scripts/ont_stats.py b/scripts/ont_stats.py
index f23c825..291ae89 100644
--- a/scripts/ont_stats.py
+++ b/scripts/ont_stats.py
@@ -5,22 +5,46 @@
 import os
 from collections import OrderedDict
 
-# TODO check for multiple run
-def get_read_length_and_summary(file_path):
-    summary_metrix = pd.read_csv(file_path, delimiter = "\t")
-    read_length = summary_metrix[summary_metrix["passes_filtering"]]["sequence_length_template"].tolist()
-    read_length.sort(reverse = True)
-    median = statistics.median(read_length)
-    N50_value = sum(read_length) / 2
-    total = 0
-    for item in read_length:
-        total += item
-        if total >= N50_value:
-            N50 = item
-            break
-    
+# TODO get barcode info from lims
+# check if the run is pooled
+def if_pooled(sequencing_summary_df):
+    pooled = False
+    if "barcode_kit" in sequencing_summary_df.columns:
+        pooled = True
+    return pooled
+
+# get stats metric if the run is not pooled
+def get_read_length_and_summary(sequencing_summary_df):
+    read_length = sequencing_summary_df[sequencing_summary_df["passes_filtering"]]["sequence_length_template"].tolist()
+    if len(read_length) != 0:
+        read_length.sort(reverse = True)
+        median = statistics.median(read_length)
+        N50_value = sum(read_length) / 2
+        total = 0
+        for item in read_length:
+            total += item
+            if total >= N50_value:
+                N50 = item
+                break
+    else:
+        median = 0
+        N50_value = 0
+        N50 = 0
     return(len(read_length), N50_value * 2 / 1000000000, N50, median)
 
+# get stats metric if the run is pooled
+def get_read_length_and_summary_pooled(sequencing_summary_df, sample_name):
+    sample_dict = {}
+    samples = sequencing_summary_df["barcode_arrangement"].unique()
+    for sample in samples:
+        sample_df = sequencing_summary_df.loc[sequencing_summary_df['barcode_arrangement'] == sample]
+        sample_sub = sample_name + "_" + sample
+        stats = get_read_length_and_summary(sample_df)
+        # only record barcodes with more than 10000 reads
+        if stats[0] > 10000:
+            sample_dict[sample_sub] = get_read_length_and_summary(sample_df)
+    return sample_dict
+
 def write_to_csv(sample_dict):
     file_name = "summary.csv"
     with open(file_name,'w') as file:
@@ -35,12 +59,18 @@ def write_to_csv(sample_dict):
     project_directory = sys.argv[1]
     os.chdir(project_directory)
     sample_list = next(os.walk("."))[1]
-    sample_dict = OrderedDict()
+    sample_dict = {}
     sample_list.sort()
     for sample in sample_list:
         destination = project_directory + "/" + sample
         file = glob.glob(destination + "/*/sequencing_summary_*")
         if len(file) != 0:
-            sample_dict[sample] = get_read_length_and_summary(file[0])
-    
+            summary_metrix = pd.read_csv(file[0], delimiter = "\t")
+            pooled = if_pooled(summary_metrix)
+            if pooled:
+                sample_dict_sub = get_read_length_and_summary_pooled(summary_metrix, sample)
+                sample_dict.update(sample_dict_sub)
+            else:
+                sample_dict[sample] = get_read_length_and_summary(summary_metrix)
+
     write_to_csv(sample_dict)

From 30b01ec8f78cc84fb946dae14885f10911dda1ba Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 18 Mar 2024 14:11:30 -0400
Subject: [PATCH 15/87] Update run_param_config.py

adding "SMARTSeq" recipe to run_param_config script to run RNA stats on samples with the aforementioned recipe
---
 scripts/run_param_config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/run_param_config.py b/scripts/run_param_config.py
index 8351ca9..f74814a 100644
--- a/scripts/run_param_config.py
+++ b/scripts/run_param_config.py
@@ -74,6 +74,8 @@ def get_ordered_dic(unordered_dic):
 		".*SMARTer.*": { TYPE: "RNA" },
 		"FusionDiscoverySeq": { TYPE: "RNA" },
 		".*Ribo.*": { TYPE: "RNA" },
+		"SMART-Seq": { TYPE: "RNA" },
+		"SMARTSeq": { TYPE: "RNA" },
 		".*CDH1_RNA.*": { TYPE: "CAPTURE" },
 		# FOR NEW ENTRIES
 		# "{regex}": { TYPE: type }

From 649b17dac66602191f29b444a7a3b6acd57a710a Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Tue, 19 Mar 2024 12:29:17 -0400
Subject: [PATCH 16/87] Update LaunchMetrics.py

taking id02 out of production. License expired
---
 scripts/LaunchMetrics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 59a6ee7..d14b790 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -133,7 +133,7 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		
 		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} ".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
-		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
+		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
 		
@@ -168,7 +168,7 @@ def dragen(sample, run, sample_parameters, work_directory, dragen_directory, fas
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen = "/opt/edico/bin/dragen --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		
@@ -211,7 +211,7 @@ def dragen_methylation(sample, run, sample_parameters, work_directory, dragen_di
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen_methylation = "/opt/edico/bin/dragen --enable-methylation-calling true --methylation-protocol directional --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_methylation_job_name_header, sample.sample_id, dragen_directory, launch_dragen_methylation)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id03\" -q dragen -n 48 -M 4 {3}".format(dragen_methylation_job_name_header, sample.sample_id, dragen_directory, launch_dragen_methylation)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		

From 0b50c75db5a313d2f661ba3316eb8a29a3fbeb5f Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Thu, 21 Mar 2024 09:28:13 -0400
Subject: [PATCH 17/87] Update LaunchMetrics.py

temporary change to generate bams using dragen 4.2
---
 scripts/LaunchMetrics.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index d14b790..9390184 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -125,15 +125,16 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		# get the correct path for the reference
 		if (sample_parameters["GTAG"] == "GRCh38"):
-			rna_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
+			# rna_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
+			rna_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
 		else:
-			rna_path = "/igo/work/igo/dragen_hash_tables/{}".format(sample_parameters["GTAG"])
+			rna_path = "/igo/work/igo/dragen_hash_tables/4.2/{}".format(sample_parameters["GTAG"])
 			
 		rna_dragen_job_name_header = "{}___RNA_DRAGEN___".format(run)
 		
 		
 		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} ".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
-		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
+		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
 		
@@ -162,13 +163,14 @@ def dragen(sample, run, sample_parameters, work_directory, dragen_directory, fas
 		
 		# get the correct path for the reference
 		if (sample_parameters["GTAG"] == "GRCh38"):
-			dragen_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
+			# dragen_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
+			dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
 		else:
-			dragen_path = "/igo/work/igo/dragen_hash_tables/{}".format(sample_parameters["GTAG"])
+			dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/{}".format(sample_parameters["GTAG"])
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen = "/opt/edico/bin/dragen --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		

From 73c96588c8dc72260b94e6425fcfb9d2b799cc6d Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Thu, 21 Mar 2024 09:34:11 -0400
Subject: [PATCH 18/87] Update LaunchMetrics.py

add special directory for DRAGEN 4.2 testing
---
 scripts/LaunchMetrics.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 9390184..969afc1 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -33,6 +33,9 @@ def __init__(self):
 		
 	def launch_metrics(self, all_samples, run, project_directory):
 		#
+		# special run 
+		run = "FAUCI_0121_B222WMMLT4_special"
+		
 		# create output directories
 		parent_directory = "/igo/staging/stats"
 		work_directory = "{}/{}/".format(parent_directory, run)

From c06f662bb81be393628f97809aa2e6d3eb2e4762 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Thu, 21 Mar 2024 09:47:17 -0400
Subject: [PATCH 19/87] Update LaunchMetrics.py

taking out the special run
---
 scripts/LaunchMetrics.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 969afc1..9390184 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -33,9 +33,6 @@ def __init__(self):
 		
 	def launch_metrics(self, all_samples, run, project_directory):
 		#
-		# special run 
-		run = "FAUCI_0121_B222WMMLT4_special"
-		
 		# create output directories
 		parent_directory = "/igo/staging/stats"
 		work_directory = "{}/{}/".format(parent_directory, run)

From f0ca0628d0326db557afc95880ee5351dca7a733 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Fri, 22 Mar 2024 17:55:53 -0400
Subject: [PATCH 20/87] Update LaunchMetrics.py

putting id03 back into production
---
 scripts/LaunchMetrics.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 9390184..6b72816 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -125,16 +125,16 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		# get the correct path for the reference
 		if (sample_parameters["GTAG"] == "GRCh38"):
-			# rna_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
-			rna_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
+			rna_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
+			# rna_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
 		else:
-			rna_path = "/igo/work/igo/dragen_hash_tables/4.2/{}".format(sample_parameters["GTAG"])
+			rna_path = "/igo/work/igo/dragen_hash_tables/{}".format(sample_parameters["GTAG"])
 			
 		rna_dragen_job_name_header = "{}___RNA_DRAGEN___".format(run)
 		
 		
 		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} ".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
-		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
+		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
 		
@@ -163,14 +163,14 @@ def dragen(sample, run, sample_parameters, work_directory, dragen_directory, fas
 		
 		# get the correct path for the reference
 		if (sample_parameters["GTAG"] == "GRCh38"):
-			# dragen_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
-			dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
+			dragen_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
+			# dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
 		else:
-			dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/{}".format(sample_parameters["GTAG"])
+			dragen_path = "/igo/work/igo/dragen_hash_tables/{}".format(sample_parameters["GTAG"])
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen = "/opt/edico/bin/dragen --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		

From 4908efed8b68a5bb44211f58e877adf3acb194c8 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 25 Mar 2024 07:44:01 -0400
Subject: [PATCH 21/87] Update LaunchMetrics.py

taking ID03 DRAGEN server out of production.  License quota exceeded
---
 scripts/LaunchMetrics.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 6b72816..9390184 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -125,16 +125,16 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		# get the correct path for the reference
 		if (sample_parameters["GTAG"] == "GRCh38"):
-			rna_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
-			# rna_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
+			# rna_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
+			rna_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
 		else:
-			rna_path = "/igo/work/igo/dragen_hash_tables/{}".format(sample_parameters["GTAG"])
+			rna_path = "/igo/work/igo/dragen_hash_tables/4.2/{}".format(sample_parameters["GTAG"])
 			
 		rna_dragen_job_name_header = "{}___RNA_DRAGEN___".format(run)
 		
 		
 		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} ".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
-		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
+		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
 		
@@ -163,14 +163,14 @@ def dragen(sample, run, sample_parameters, work_directory, dragen_directory, fas
 		
 		# get the correct path for the reference
 		if (sample_parameters["GTAG"] == "GRCh38"):
-			dragen_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
-			# dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
+			# dragen_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
+			dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
 		else:
-			dragen_path = "/igo/work/igo/dragen_hash_tables/{}".format(sample_parameters["GTAG"])
+			dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/{}".format(sample_parameters["GTAG"])
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen = "/opt/edico/bin/dragen --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		

From f1b3e25d2f1057b53ef2a379b8ef451ae8e76c15 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 25 Mar 2024 16:28:42 -0400
Subject: [PATCH 22/87] Update LaunchMetrics.py

adding demux only routine to the script
---
 scripts/LaunchMetrics.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 9390184..cf7323b 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -10,12 +10,16 @@
 import shutil
 import pathlib
 import scripts.generate_run_params
+import scripts.get_total_reads_from_demux
 
 
 # Global Variable : we do not want to process these experiments in this script
-DO_NOT_PROCESS = ["10X_Genomics", "DLP"]
+DO_NOT_PROCESS = ["DLP"]
 # These recipes will be evaluated using DRAGEN because of their larger size of fastqs
 RUN_ON_DRAGEN = ["MissionBio", "SingleCellCNV", "MouseWholeGenome", "HumanWholeGenome", "PombeWholeGenome", "ChIPSeq", "AmpliconSeq"]
+# these projects willl only need demux stats
+DEMUX_ONLY = ["SMARTSeq", "10X_Genomics"]
+
 # Organisms to have DRAGEN BAMS
 DRAGEN_RNA_GENOMES = ["GRCh38", "grcm39"]
 # this list contains the headers of the columns.  we will access the data using these listings
@@ -38,6 +42,7 @@ def launch_metrics(self, all_samples, run, project_directory):
 		work_directory = "{}/{}/".format(parent_directory, run)
 		rna_directory = "{}RNA/".format(work_directory)
 		dragen_directory = "{}DRAGEN/".format(work_directory)
+		stats_done_directory = "/igo/stats/DONE/{}".format(run.split("_")[0])
 		
 		# create work directory	
 		pathlib.Path(work_directory).mkdir(parents = True, exist_ok = True)
@@ -59,6 +64,12 @@ def launch_metrics(self, all_samples, run, project_directory):
 			# test to see if there are some samples that this script will not process
 			if any(s in sample.recipe for s in DO_NOT_PROCESS):
 				continue
+			
+			if any(s in sample.recipe for s in DEMUX_ONLY):
+				demux_report_file = "/igo/staging/FASTQ/{}/Reports/Demultiplex_Stats.csv".format(run)
+				demux_reads_per_sample = scripts.get_total_reads_from_demux.get_total_reads([sample.sample_id], demux_report_file)
+				scripts.get_total_reads_from_demux.write_to_am_txt(run, sample.sample_id, demux_reads_per_sample[sample.sample_id], stats_done_directory)
+				
 			# grab the sample parameters (bait set, type, gtag, etc)
 			sample_parameters = self.get_parameters(sample.genome, sample.recipe)
 			# process the RNA data seperately

From 7d26837ad8fd1022c80f6ebc6962bc98b6ca1d51 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Mon, 25 Mar 2024 16:30:05 -0400
Subject: [PATCH 23/87] change 10x run to be checked by run length

---
 demux_run_dag.py                    | 3 ++-
 scripts/get_sequencing_read_data.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/demux_run_dag.py b/demux_run_dag.py
index f364ab0..ba9ee55 100644
--- a/demux_run_dag.py
+++ b/demux_run_dag.py
@@ -170,7 +170,8 @@ def stats(ds, **kwargs):
 
             return "DLP stats posted and yaml file generated"
 
-        if any("10X_" in s for s in sample_sheet.recipe_set):
+        atac, use_bases_mask = scripts.get_sequencing_read_data.main(sequencer_path)
+        if use_bases_mask == [29, 89]:
             # if is atac run, demux is using cellranger mkfastq
             if scripts.get_sequencing_read_data.main(sequencer_path)[0]:
                 scripts.get_total_reads_from_demux.by_json(sequencer_and_run)
diff --git a/scripts/get_sequencing_read_data.py b/scripts/get_sequencing_read_data.py
index 8021ba3..bdcab04 100755
--- a/scripts/get_sequencing_read_data.py
+++ b/scripts/get_sequencing_read_data.py
@@ -36,7 +36,7 @@ def get_sequencing_read_data(sequencer_path):
 		use_bases_mask = "Y" + str(reads_tag[0][1]) + ",I" + str(reads_tag[1][1]) + ",Y" + str(reads_tag[2][1]) + ",Y" + str(reads_tag[3][1])
 	else:
 		atac = False
-		use_bases_mask = ""
+		use_bases_mask = [reads_tag[0][1], reads_tag[3][1]]
 		
 	return(atac, use_bases_mask)
 		

From b0a60f1fb0b647243fa0f41436838664fdb84dc2 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 25 Mar 2024 16:37:44 -0400
Subject: [PATCH 24/87] Update LaunchMetrics.py

need the continue statement to get out of the loop
---
 scripts/LaunchMetrics.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index cf7323b..1cec80d 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -69,6 +69,7 @@ def launch_metrics(self, all_samples, run, project_directory):
 				demux_report_file = "/igo/staging/FASTQ/{}/Reports/Demultiplex_Stats.csv".format(run)
 				demux_reads_per_sample = scripts.get_total_reads_from_demux.get_total_reads([sample.sample_id], demux_report_file)
 				scripts.get_total_reads_from_demux.write_to_am_txt(run, sample.sample_id, demux_reads_per_sample[sample.sample_id], stats_done_directory)
+				continue
 				
 			# grab the sample parameters (bait set, type, gtag, etc)
 			sample_parameters = self.get_parameters(sample.genome, sample.recipe)

From 90607e8467ad3b172658b1ea9ced3f59b7421d6d Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 25 Mar 2024 16:40:23 -0400
Subject: [PATCH 25/87] Update LaunchMetrics.py

---
 scripts/LaunchMetrics.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 1cec80d..f48fc26 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -68,6 +68,7 @@ def launch_metrics(self, all_samples, run, project_directory):
 			if any(s in sample.recipe for s in DEMUX_ONLY):
 				demux_report_file = "/igo/staging/FASTQ/{}/Reports/Demultiplex_Stats.csv".format(run)
 				demux_reads_per_sample = scripts.get_total_reads_from_demux.get_total_reads([sample.sample_id], demux_report_file)
+				print(demux_reads_per_sample)
 				scripts.get_total_reads_from_demux.write_to_am_txt(run, sample.sample_id, demux_reads_per_sample[sample.sample_id], stats_done_directory)
 				continue
 				

From d6488f151f040707d6e7b3f23b2ee22c6bae2c36 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 25 Mar 2024 16:44:12 -0400
Subject: [PATCH 26/87] Update LaunchMetrics.py

---
 scripts/LaunchMetrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index f48fc26..6abe131 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -43,7 +43,7 @@ def launch_metrics(self, all_samples, run, project_directory):
 		rna_directory = "{}RNA/".format(work_directory)
 		dragen_directory = "{}DRAGEN/".format(work_directory)
 		stats_done_directory = "/igo/stats/DONE/{}".format(run.split("_")[0])
-		
+		print(stats_done_directory)
 		# create work directory	
 		pathlib.Path(work_directory).mkdir(parents = True, exist_ok = True)
 		

From 6ec1dd7caf0a98dffe123724395c3407d36a1aa0 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 25 Mar 2024 16:48:14 -0400
Subject: [PATCH 27/87] Update LaunchMetrics.py

---
 scripts/LaunchMetrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 6abe131..db0fd1c 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -42,7 +42,7 @@ def launch_metrics(self, all_samples, run, project_directory):
 		work_directory = "{}/{}/".format(parent_directory, run)
 		rna_directory = "{}RNA/".format(work_directory)
 		dragen_directory = "{}DRAGEN/".format(work_directory)
-		stats_done_directory = "/igo/stats/DONE/{}".format(run.split("_")[0])
+		stats_done_directory = "/igo/stats/DONE/{}/".format(run.split("_")[0])
 		print(stats_done_directory)
 		# create work directory	
 		pathlib.Path(work_directory).mkdir(parents = True, exist_ok = True)

From 26484bf61f702d9237d80c01d1780b3ab753f527 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Wed, 27 Mar 2024 14:34:25 -0400
Subject: [PATCH 28/87] add atac to 10X situation

---
 demux_run_dag.py              |    6 +-
 test/Top_Unknown_Barcodes.csv | 1001 +++++++++++++++++++++++++++++++++
 2 files changed, 1004 insertions(+), 3 deletions(-)
 create mode 100644 test/Top_Unknown_Barcodes.csv

diff --git a/demux_run_dag.py b/demux_run_dag.py
index ba9ee55..6c85c45 100644
--- a/demux_run_dag.py
+++ b/demux_run_dag.py
@@ -3,7 +3,6 @@
 import subprocess
 from datetime import datetime, timedelta
 
-from numpy import equal
 import pandas
 from SampleSheet import SampleSheet
 import scripts.organise_fastq_split_by_lane
@@ -170,10 +169,11 @@ def stats(ds, **kwargs):
 
             return "DLP stats posted and yaml file generated"
 
+        # check if the run is 10X by read length
         atac, use_bases_mask = scripts.get_sequencing_read_data.main(sequencer_path)
-        if use_bases_mask == [29, 89]:
+        if use_bases_mask == [29, 89] or atac:
             # if is atac run, demux is using cellranger mkfastq
-            if scripts.get_sequencing_read_data.main(sequencer_path)[0]:
+            if atac:
                 scripts.get_total_reads_from_demux.by_json(sequencer_and_run)
                 scripts.upload_stats.upload_stats(sequencer_and_run)
 
diff --git a/test/Top_Unknown_Barcodes.csv b/test/Top_Unknown_Barcodes.csv
new file mode 100644
index 0000000..76acc74
--- /dev/null
+++ b/test/Top_Unknown_Barcodes.csv
@@ -0,0 +1,1001 @@
+Lane,index,index2,# Reads,% of Unknown Barcodes,% of All Reads
+1,AAAAAAAA,,2219166,0.016658,0.016654
+1,CCCCCCCC,,1271809,0.009547,0.009545
+1,TTTTTTTT,,813459,0.006106,0.006105
+1,TCCCCCCC,,306154,0.002298,0.002298
+1,CCCCCCCT,,283122,0.002125,0.002125
+1,CCCCCCTC,,259274,0.001946,0.001946
+1,CCCCCTCC,,253669,0.001904,0.001904
+1,CCCCTCCC,,252278,0.001894,0.001893
+1,CCCTCCCC,,251165,0.001885,0.001885
+1,CTCCCCCC,,246202,0.001848,0.001848
+1,CCTCCCCC,,245390,0.001842,0.001842
+1,CTTTTTTT,,215947,0.001621,0.001621
+1,TTTTTTTC,,208944,0.001568,0.001568
+1,TCTTTTTT,,203072,0.001524,0.001524
+1,TTTTTTCT,,192106,0.001442,0.001442
+1,TTTTTCTT,,191081,0.001434,0.001434
+1,GCCCCCCC,,188080,0.001412,0.001412
+1,TTCTTTTT,,187573,0.001408,0.001408
+1,TTTTCTTT,,180659,0.001356,0.001356
+1,TTTCTTTT,,180120,0.001352,0.001352
+1,CCCCCCCA,,170377,0.001279,0.001279
+1,CCCCCCCG,,155423,0.001167,0.001166
+1,TTTTTTTA,,151436,0.001137,0.001136
+1,CCCCCCAC,,150223,0.001128,0.001127
+1,CCCCCACC,,150057,0.001126,0.001126
+1,CCCCCCGC,,148185,0.001112,0.001112
+1,CCCACCCC,,147185,0.001105,0.001105
+1,CCCCACCC,,144821,0.001087,0.001087
+1,CGCCCCCC,,140909,0.001058,0.001057
+1,CCGCCCCC,,140841,0.001057,0.001057
+1,CCCCGCCC,,140507,0.001055,0.001054
+1,CCACCCCC,,138745,0.001041,0.001041
+1,AAAAAAAT,,135372,0.001016,0.001016
+1,CACCCCCC,,134748,0.001011,0.001011
+1,CCCGCCCC,,134499,0.001010,0.001009
+1,CCCCCGCC,,134076,0.001006,0.001006
+1,AAAAAATA,,132096,0.000992,0.000991
+1,TTTTTTAT,,131869,0.000990,0.000990
+1,TTTTTATT,,130836,0.000982,0.000982
+1,AAAAATAA,,125043,0.000939,0.000938
+1,TTTATTTT,,124636,0.000936,0.000935
+1,AAAATAAA,,124514,0.000935,0.000934
+1,TATTTTTT,,124251,0.000933,0.000932
+1,AATAAAAA,,124238,0.000933,0.000932
+1,TTTTATTT,,124113,0.000932,0.000931
+1,AAAAAAAC,,122686,0.000921,0.000921
+1,AAAAAACA,,122236,0.000918,0.000917
+1,AAATAAAA,,119977,0.000901,0.000900
+1,TTATTTTT,,119866,0.000900,0.000900
+1,ATAAAAAA,,119001,0.000893,0.000893
+1,ACAAAAAA,,115857,0.000870,0.000869
+1,AAAAACAA,,115104,0.000864,0.000864
+1,ACCCCCCC,,115060,0.000864,0.000864
+1,AACAAAAA,,114874,0.000862,0.000862
+1,CCCCCCTT,,110146,0.000827,0.000827
+1,AAAACAAA,,110106,0.000827,0.000826
+1,TTCCCCCC,,109518,0.000822,0.000822
+1,AAACAAAA,,107415,0.000806,0.000806
+1,TCTCCCCC,,102118,0.000767,0.000766
+1,TCCCCCCT,,100358,0.000753,0.000753
+1,TCCTCCCC,,98745,0.000741,0.000741
+1,CCCCCTCT,,97053,0.000729,0.000728
+1,ATTTTTTT,,96842,0.000727,0.000727
+1,GTTTTTTT,,96581,0.000725,0.000725
+1,CCCCCTTC,,96442,0.000724,0.000724
+1,CCTTTTTT,,96200,0.000722,0.000722
+1,TCCCTCCC,,95765,0.000719,0.000719
+1,TCCCCTCC,,94553,0.000710,0.000710
+1,TCCCCCTC,,94264,0.000708,0.000707
+1,TAAAAAAA,,93519,0.000702,0.000702
+1,CCCCTCCT,,92799,0.000697,0.000696
+1,CCCTTCCC,,91881,0.000690,0.000690
+1,CCCCTTCC,,91417,0.000686,0.000686
+1,TTTTTTCC,,90280,0.000678,0.000678
+1,CCCTCCCT,,90236,0.000677,0.000677
+1,CCTTCCCC,,89372,0.000671,0.000671
+1,CTTCCCCC,,89369,0.000671,0.000671
+1,CCCCTCTC,,88027,0.000661,0.000661
+1,CCTCCCCT,,85741,0.000644,0.000643
+1,CCTCTCCC,,85012,0.000638,0.000638
+1,CCCTCTCC,,84245,0.000632,0.000632
+1,CCCTCCTC,,84241,0.000632,0.000632
+1,CTCCCCCT,,84228,0.000632,0.000632
+1,TCCTTTTT,,84185,0.000632,0.000632
+1,CTCTCCCC,,84087,0.000631,0.000631
+1,CTCTTTTT,,82104,0.000616,0.000616
+1,TTTTTCTC,,81509,0.000612,0.000612
+1,TTTTTTTG,,81420,0.000611,0.000611
+1,CCTCCCTC,,81349,0.000611,0.000611
+1,CTCCTCCC,,80850,0.000607,0.000607
+1,CCTCCTCC,,80652,0.000605,0.000605
+1,TTTTTCCT,,80447,0.000604,0.000604
+1,CAAAAAAA,,80349,0.000603,0.000603
+1,CTTTTTTC,,79913,0.000600,0.000600
+1,CTCCCCTC,,79258,0.000595,0.000595
+1,CTCCCTCC,,78896,0.000592,0.000592
+1,TTGTTTTT,,78203,0.000587,0.000587
+1,TTTTTTGT,,78051,0.000586,0.000586
+1,TCTTTTTC,,77259,0.000580,0.000580
+1,CTTTTCTT,,76328,0.000573,0.000573
+1,TGTTTTTT,,75814,0.000569,0.000569
+1,CTTCTTTT,,75757,0.000569,0.000569
+1,CTTTTTCT,,74858,0.000562,0.000562
+1,TCTCTTTT,,74712,0.000561,0.000561
+1,TTTTCCTT,,74679,0.000561,0.000560
+1,TTTTCTTC,,74531,0.000559,0.000559
+1,TCTTTCTT,,74188,0.000557,0.000557
+1,CTTTCTTT,,73672,0.000553,0.000553
+1,TTCCTTTT,,73549,0.000552,0.000552
+1,TTTTGTTT,,73526,0.000552,0.000552
+1,TCTTTTCT,,73427,0.000551,0.000551
+1,TTTTTGTT,,73199,0.000549,0.000549
+1,TTCTTTTC,,72829,0.000547,0.000547
+1,TTTCTTTC,,72691,0.000546,0.000546
+1,TCTTCTTT,,72676,0.000546,0.000545
+1,TTTGTTTT,,71798,0.000539,0.000539
+1,TTTTCTCT,,71409,0.000536,0.000536
+1,TTTCCTTT,,70521,0.000529,0.000529
+1,TTCTTCTT,,69864,0.000524,0.000524
+1,TTCTTTCT,,69383,0.000521,0.000521
+1,TTCTCTTT,,69225,0.000520,0.000520
+1,TTTCTCTT,,68854,0.000517,0.000517
+1,TTTCTTCT,,68349,0.000513,0.000513
+1,CCCCCTTT,,63694,0.000478,0.000478
+1,TTTCCCCC,,62272,0.000467,0.000467
+1,CCCTTTTT,,60686,0.000456,0.000455
+1,TTTTTCCC,,58423,0.000439,0.000438
+1,CCCCTCTT,,56182,0.000422,0.000422
+1,TTCTCCCC,,55426,0.000416,0.000416
+1,TCTTCCCC,,55346,0.000415,0.000415
+1,TCCCCCTT,,55149,0.000414,0.000414
+1,CCCTCCTT,,53582,0.000402,0.000402
+1,CCCCTTTC,,53488,0.000402,0.000401
+1,CCCCTTCT,,53125,0.000399,0.000399
+1,CCCCTTTT,,52374,0.000393,0.000393
+1,CCTCTTTT,,52201,0.000392,0.000392
+1,TCCTTCCC,,51894,0.000390,0.000389
+1,TTCCTCCC,,51753,0.000388,0.000388
+1,TTCCCCCT,,51433,0.000386,0.000386
+1,CCTTTCCC,,51126,0.000384,0.000384
+1,TCCCCCCA,,51033,0.000383,0.000383
+1,TTTTCCCC,,50816,0.000381,0.000381
+1,CTTTCCCC,,50683,0.000380,0.000380
+1,TCCCTTTT,,50631,0.000380,0.000380
+1,CCCTTTCC,,50491,0.000379,0.000379
+1,CCTCCCTT,,50099,0.000376,0.000376
+1,CCCTTCCT,,49979,0.000375,0.000375
+1,TTTTTTAA,,49958,0.000375,0.000375
+1,TCCCCTTC,,49952,0.000375,0.000375
+1,TCTCTCCC,,49897,0.000375,0.000374
+1,TTCCCTCC,,49852,0.000374,0.000374
+1,TTTTCTCC,,49800,0.000374,0.000374
+1,CCTTTTTC,,49747,0.000373,0.000373
+1,TCCCCTCT,,49700,0.000373,0.000373
+1,TTCCCCTC,,49389,0.000371,0.000371
+1,TCCTCCCT,,49326,0.000370,0.000370
+1,TCCCTTCC,,49189,0.000369,0.000369
+1,TCTCCCCT,,48986,0.000368,0.000368
+1,CCCCCCTA,,48787,0.000366,0.000366
+1,TCCCTCCT,,48691,0.000365,0.000365
+1,CCTTCTTT,,48590,0.000365,0.000365
+1,CCCTTCTC,,48439,0.000364,0.000364
+1,CCTTTCTT,,48400,0.000363,0.000363
+1,CCCTCTCT,,48259,0.000362,0.000362
+1,TTTTCCCT,,48235,0.000362,0.000362
+1,CCCTCTTC,,48214,0.000362,0.000362
+1,GTCCCCCC,,48155,0.000361,0.000361
+1,GCCCCCCT,,47953,0.000360,0.000360
+1,CTCCCCTT,,47864,0.000359,0.000359
+1,TCTTTTCC,,47814,0.000359,0.000359
+1,TTTTCCTC,,47784,0.000359,0.000359
+1,TCTCCTCC,,47580,0.000357,0.000357
+1,CTCCTTTT,,47565,0.000357,0.000357
+1,CTTTTTCC,,47487,0.000356,0.000356
+1,TCCTCTCC,,47470,0.000356,0.000356
+1,CCTTTTCT,,47379,0.000356,0.000356
+1,CCCCCCAA,,47348,0.000355,0.000355
+1,TCTCCCTC,,47001,0.000353,0.000353
+1,TTTCTTCC,,46980,0.000353,0.000353
+1,TCCCTCTC,,46805,0.000351,0.000351
+1,TCCTCCTC,,46594,0.000350,0.000350
+1,CCTTCCCT,,46555,0.000349,0.000349
+1,GCTCCCCC,,46541,0.000349,0.000349
+1,CCCTCTTT,,46327,0.000348,0.000348
+1,TCCCCACC,,46174,0.000347,0.000347
+1,GCCTCCCC,,46024,0.000345,0.000345
+1,CTTCTCCC,,46001,0.000345,0.000345
+1,CTCTTCCC,,45953,0.000345,0.000345
+1,CCTCTTCC,,45924,0.000345,0.000345
+1,TTCTTTCC,,45923,0.000345,0.000345
+1,TCCTCTTT,,45837,0.000344,0.000344
+1,GCCCTCCC,,45774,0.000344,0.000344
+1,CCCCCTCA,,45712,0.000343,0.000343
+1,CCCCCCAT,,45664,0.000343,0.000343
+1,TCCCCCAC,,45549,0.000342,0.000342
+1,TGCCCCCC,,45492,0.000341,0.000341
+1,TCCTTTTC,,45411,0.000341,0.000341
+1,TCCACCCC,,45270,0.000340,0.000340
+1,CCTTCTCC,,45194,0.000339,0.000339
+1,TTTTTATA,,45089,0.000338,0.000338
+1,CTTTTTTA,,45020,0.000338,0.000338
+1,CCTCCTCT,,45013,0.000338,0.000338
+1,TCCTTCTT,,45006,0.000338,0.000338
+1,CCTCCTTC,,44932,0.000337,0.000337
+1,TTTCTCCC,,44910,0.000337,0.000337
+1,CCCCCCGT,,44839,0.000337,0.000337
+1,CCTTCCTC,,44818,0.000336,0.000336
+1,TTTTTTCA,,44768,0.000336,0.000336
+1,CCTCTCCT,,44718,0.000336,0.000336
+1,TTCCCTTT,,44543,0.000334,0.000334
+1,GCCCCTCC,,44477,0.000334,0.000334
+1,GGCCCCCC,,44452,0.000334,0.000334
+1,CTTCCCCT,,44383,0.000333,0.000333
+1,TCTTTCCT,,44329,0.000333,0.000333
+1,TCCCCCCG,,44294,0.000332,0.000332
+1,CCCTTCTT,,44251,0.000332,0.000332
+1,CTTTTCTC,,44152,0.000331,0.000331
+1,TCCCACCC,,44148,0.000331,0.000331
+1,GCCCCCTC,,44124,0.000331,0.000331
+1,TCCCCTTT,,44062,0.000331,0.000331
+1,TTTCCCTT,,44037,0.000331,0.000330
+1,CCCCTCCA,,44008,0.000330,0.000330
+1,TCTTTCTC,,43964,0.000330,0.000330
+1,TCTTTCCC,,43924,0.000330,0.000330
+1,CTTTTCCT,,43813,0.000329,0.000329
+1,TCTCCTTT,,43749,0.000328,0.000328
+1,TCCTTTCT,,43747,0.000328,0.000328
+1,CCTCTCTC,,43645,0.000328,0.000328
+1,CTTCCTCC,,43511,0.000327,0.000327
+1,CCCTCCCA,,43493,0.000326,0.000326
+1,CTCCCTTC,,43410,0.000326,0.000326
+1,TCTTCCTT,,43331,0.000325,0.000325
+1,CCCCCACT,,43324,0.000325,0.000325
+1,TCGCCCCC,,43267,0.000325,0.000325
+1,CCCCCCTG,,43193,0.000324,0.000324
+1,CCCTTTTC,,43190,0.000324,0.000324
+1,CTCTTTTC,,43185,0.000324,0.000324
+1,TACCCCCC,,43161,0.000324,0.000324
+1,CTCCCTCT,,43126,0.000324,0.000324
+1,CTCTCCCT,,43094,0.000323,0.000323
+1,CTCTCTTT,,43069,0.000323,0.000323
+1,CCCCCACA,,43068,0.000323,0.000323
+1,TTTCCTTC,,42975,0.000323,0.000323
+1,CTCTTCTT,,42906,0.000322,0.000322
+1,TTTTTAAT,,42901,0.000322,0.000322
+1,TCACCCCC,,42896,0.000322,0.000322
+1,TTTCTCTC,,42845,0.000322,0.000322
+1,CTTCCCTC,,42823,0.000321,0.000321
+1,CTCCTTCC,,42790,0.000321,0.000321
+1,CTTCCTTT,,42699,0.000321,0.000320
+1,CTCCCCCA,,42658,0.000320,0.000320
+1,CCTCCCCA,,42576,0.000320,0.000320
+1,TCTCTTTC,,42546,0.000319,0.000319
+1,CCCTTTCT,,42515,0.000319,0.000319
+1,TTCTTCCC,,42511,0.000319,0.000319
+1,TCTTTTTA,,42485,0.000319,0.000319
+1,CTCCTCCT,,42401,0.000318,0.000318
+1,TTTTTTAC,,42393,0.000318,0.000318
+1,TTCTTCCT,,42291,0.000317,0.000317
+1,TTTCTCCT,,42270,0.000317,0.000317
+1,CCTCCTTT,,42229,0.000317,0.000317
+1,CTTTCCTT,,42216,0.000317,0.000317
+1,CTTTTCCC,,42145,0.000316,0.000316
+1,TTCTTCTC,,42113,0.000316,0.000316
+1,TTCTCCTT,,42049,0.000316,0.000316
+1,TCCCCCGC,,41991,0.000315,0.000315
+1,CCCACCCT,,41984,0.000315,0.000315
+1,CCCCACCT,,41863,0.000314,0.000314
+1,TTTTTCTA,,41826,0.000314,0.000314
+1,TTCCTTTC,,41801,0.000314,0.000314
+1,TTCCTCTT,,41644,0.000313,0.000313
+1,TCTTCTTC,,41466,0.000311,0.000311
+1,CTCTCTCC,,41433,0.000311,0.000311
+1,TCTCTCTT,,41349,0.000310,0.000310
+1,CTTCTTTC,,41335,0.000310,0.000310
+1,TTTCCTCC,,41334,0.000310,0.000310
+1,CTCCTCTC,,41138,0.000309,0.000309
+1,CCCCCATC,,41136,0.000309,0.000309
+1,TTTTATTA,,41110,0.000309,0.000309
+1,CTCTTTCT,,41105,0.000309,0.000308
+1,TTTCCTCT,,41101,0.000309,0.000308
+1,TTTCCCCT,,41073,0.000308,0.000308
+1,CCTTTTCC,,41069,0.000308,0.000308
+1,CCCCCTAC,,41030,0.000308,0.000308
+1,CTCTCCTC,,41023,0.000308,0.000308
+1,CCCCCAAC,,40998,0.000308,0.000308
+1,CTTTCTTC,,40979,0.000308,0.000308
+1,TCCCGCCC,,40647,0.000305,0.000305
+1,CTTCTCTT,,40644,0.000305,0.000305
+1,TTCTCTTC,,40565,0.000305,0.000304
+1,TCTTCTCT,,40551,0.000304,0.000304
+1,TTTATTTA,,40487,0.000304,0.000304
+1,TCTCTTCT,,40199,0.000302,0.000302
+1,TCCGCCCC,,40093,0.000301,0.000301
+1,CCCCACCA,,40081,0.000301,0.000301
+1,TTTTAATT,,39992,0.000300,0.000300
+1,TTCCTTCT,,39990,0.000300,0.000300
+1,CCCCTACC,,39968,0.000300,0.000300
+1,CCCCCTCG,,39899,0.000300,0.000299
+1,CCCACCCA,,39796,0.000299,0.000299
+1,CATTTTTT,,39784,0.000299,0.000299
+1,CCCCTCAC,,39763,0.000298,0.000298
+1,TTTCCCTC,,39751,0.000298,0.000298
+1,CCCCCTGC,,39680,0.000298,0.000298
+1,TCCCTCTT,,39640,0.000298,0.000297
+1,CTTTCTCT,,39614,0.000297,0.000297
+1,TTTTTATC,,39596,0.000297,0.000297
+1,CTTTTTAT,,39595,0.000297,0.000297
+1,CTCCCTTT,,39582,0.000297,0.000297
+1,TTTTCTTA,,39581,0.000297,0.000297
+1,GCGCCCCC,,39546,0.000297,0.000297
+1,TCCTTTCC,,39498,0.000296,0.000296
+1,TTTCTTTA,,39432,0.000296,0.000296
+1,TTCTTTTA,,39344,0.000295,0.000295
+1,TTCCCCTT,,39337,0.000295,0.000295
+1,TCCCTTTC,,39326,0.000295,0.000295
+1,CCTTTCCT,,39307,0.000295,0.000295
+1,CTTCTTCT,,39278,0.000295,0.000295
+1,TTCTCTCT,,39255,0.000295,0.000295
+1,CCTTCCTT,,39219,0.000294,0.000294
+1,CTTTTATT,,39204,0.000294,0.000294
+1,CCTCTCTT,,39114,0.000294,0.000294
+1,CCCCTCCG,,39103,0.000294,0.000293
+1,AAAAAATT,,39010,0.000293,0.000293
+1,TCCTCCTT,,38987,0.000293,0.000293
+1,CCCCAACC,,38981,0.000293,0.000293
+1,CCCATCCC,,38881,0.000292,0.000292
+1,CCACCCCT,,38851,0.000292,0.000292
+1,TCCCCGCC,,38821,0.000291,0.000291
+1,CCCCCGCT,,38776,0.000291,0.000291
+1,CCCCACTC,,38775,0.000291,0.000291
+1,TCTTCCCT,,38774,0.000291,0.000291
+1,TCTTCTCC,,38715,0.000291,0.000291
+1,CCCTCACC,,38671,0.000290,0.000290
+1,CCCACCTC,,38669,0.000290,0.000290
+1,CCCCGCCT,,38566,0.000289,0.000289
+1,CCCTCCAC,,38545,0.000289,0.000289
+1,CCCTACCC,,38509,0.000289,0.000289
+1,CCCCATCC,,38466,0.000289,0.000289
+1,TCTCCCTT,,38292,0.000287,0.000287
+1,CTTATTTT,,38288,0.000287,0.000287
+1,CCTACCCC,,38279,0.000287,0.000287
+1,CCTCTTTC,,38262,0.000287,0.000287
+1,CCTTTCTC,,38259,0.000287,0.000287
+1,TAATTTTT,,38235,0.000287,0.000287
+1,TTTTTCAT,,38210,0.000287,0.000287
+1,TTCCTTCC,,38190,0.000287,0.000287
+1,CCCAACCC,,38146,0.000286,0.000286
+1,CCTCCCAC,,38084,0.000286,0.000286
+1,TCCCTTCT,,38082,0.000286,0.000286
+1,CCTCCACC,,38077,0.000286,0.000286
+1,TTCTCCCT,,38072,0.000286,0.000286
+1,TTCTCTCC,,38061,0.000286,0.000286
+1,TATTTTTA,,38015,0.000285,0.000285
+1,CTCCCACC,,38015,0.000285,0.000285
+1,CCGCCCCT,,38008,0.000285,0.000285
+1,CTTTATTT,,37908,0.000285,0.000284
+1,TTATTTTA,,37893,0.000284,0.000284
+1,CCCTCCCG,,37893,0.000284,0.000284
+1,TTTAATTT,,37854,0.000284,0.000284
+1,CTCCCCAC,,37824,0.000284,0.000284
+1,TTTTTACT,,37816,0.000284,0.000284
+1,CCCCCCGG,,37796,0.000284,0.000284
+1,TCTTTTAT,,37689,0.000283,0.000283
+1,GGGGGGGG,,37684,0.000283,0.000283
+1,TCCTTCCT,,37630,0.000282,0.000282
+1,CCCACTCC,,37625,0.000282,0.000282
+1,CTATTTTT,,37622,0.000282,0.000282
+1,TCTCTTCC,,37592,0.000282,0.000282
+1,CTCACCCC,,37584,0.000282,0.000282
+1,CCCCCGTC,,37499,0.000281,0.000281
+1,TCTTTATT,,37487,0.000281,0.000281
+1,CCGTCCCC,,37482,0.000281,0.000281
+1,TTTTATAT,,37447,0.000281,0.000281
+1,CCTCACCC,,37345,0.000280,0.000280
+1,CCCCTCGC,,37330,0.000280,0.000280
+1,CACCCCCT,,37328,0.000280,0.000280
+1,CGCCCCCT,,37316,0.000280,0.000280
+1,TCTTCCTC,,37211,0.000279,0.000279
+1,TCCTTCTC,,37208,0.000279,0.000279
+1,CCCCGTCC,,37131,0.000279,0.000279
+1,TTCCCTTC,,37130,0.000279,0.000279
+1,TTAATTTT,,37118,0.000279,0.000279
+1,CCCGCCCT,,37068,0.000278,0.000278
+1,CCTCTTCT,,37045,0.000278,0.000278
+1,CCAACCCC,,37035,0.000278,0.000278
+1,CTTTCCCT,,36922,0.000277,0.000277
+1,CCCCACAC,,36899,0.000277,0.000277
+1,TTCTCCTC,,36840,0.000277,0.000276
+1,TTTATTTC,,36770,0.000276,0.000276
+1,CTTTCTCC,,36729,0.000276,0.000276
+1,CTCCTCTT,,36640,0.000275,0.000275
+1,CTGCCCCC,,36632,0.000275,0.000275
+1,CCTTCTTC,,36609,0.000275,0.000275
+1,CCCACACC,,36593,0.000275,0.000275
+1,TTTATATT,,36588,0.000275,0.000275
+1,CCCCGCTC,,36563,0.000274,0.000274
+1,GCCCCCCG,,36562,0.000274,0.000274
+1,CCCTCCGC,,36491,0.000274,0.000274
+1,CTCCACCC,,36469,0.000274,0.000274
+1,CCCTGCCC,,36460,0.000274,0.000274
+1,TTAAAAAA,,36427,0.000273,0.000273
+1,CCACCCCA,,36417,0.000273,0.000273
+1,TCCTCTTC,,36413,0.000273,0.000273
+1,TTTATTAT,,36394,0.000273,0.000273
+1,TTTTATTC,,36392,0.000273,0.000273
+1,CTACCCCC,,36334,0.000273,0.000273
+1,TCATTTTT,,36328,0.000273,0.000273
+1,CTCCCCCG,,36303,0.000273,0.000272
+1,CCTCCCCG,,36283,0.000272,0.000272
+1,TCCTCTCT,,36271,0.000272,0.000272
+1,TTCCTCCT,,36196,0.000272,0.000272
+1,TTCCCTCT,,36140,0.000271,0.000271
+1,TATATTTT,,36119,0.000271,0.000271
+1,TCTATTTT,,36118,0.000271,0.000271
+1,CTCTTTCC,,36099,0.000271,0.000271
+1,CCGCTCCC,,36098,0.000271,0.000271
+1,GCCCGCCC,,36068,0.000271,0.000271
+1,CCATCCCC,,36000,0.000270,0.000270
+1,CCGCCCTC,,35980,0.000270,0.000270
+1,CCTTCTCT,,35971,0.000270,0.000270
+1,TCTCCTTC,,35961,0.000270,0.000270
+1,GCCGCCCC,,35958,0.000270,0.000270
+1,CCACCCTC,,35950,0.000270,0.000270
+1,CCCCTGCC,,35917,0.000270,0.000270
+1,CCCACCAC,,35870,0.000269,0.000269
+1,CTTCCCTT,,35854,0.000269,0.000269
+1,TCTTATTT,,35852,0.000269,0.000269
+1,CGTCCCCC,,35847,0.000269,0.000269
+1,TCTCTCCT,,35822,0.000269,0.000269
+1,TTCCTCTC,,35749,0.000268,0.000268
+1,CTCCTTTC,,35715,0.000268,0.000268
+1,CCCGTCCC,,35693,0.000268,0.000268
+1,ATTAAAAA,,35687,0.000268,0.000268
+1,CTTCTTCC,,35667,0.000268,0.000268
+1,AAAAATTA,,35605,0.000267,0.000267
+1,CCACTCCC,,35603,0.000267,0.000267
+1,CCTCCCGC,,35571,0.000267,0.000267
+1,CTCTCCTT,,35561,0.000267,0.000267
+1,CCGCCTCC,,35494,0.000266,0.000266
+1,TTTTCATT,,35436,0.000266,0.000266
+1,CTTTCCTC,,35356,0.000265,0.000265
+1,GCCCCCGC,,35317,0.000265,0.000265
+1,CCACCTCC,,35286,0.000265,0.000265
+1,CAACCCCC,,35282,0.000265,0.000265
+1,TATTTTTC,,35252,0.000265,0.000265
+1,CGCTCCCC,,35233,0.000264,0.000264
+1,CTCCCCGC,,35194,0.000264,0.000264
+1,TCTCCTCT,,35179,0.000264,0.000264
+1,TTTTACTT,,35135,0.000264,0.000264
+1,TTCTTTAT,,35096,0.000263,0.000263
+1,CACCCCTC,,35052,0.000263,0.000263
+1,CACCCCCA,,35035,0.000263,0.000263
+1,TATTTATT,,35011,0.000263,0.000263
+1,TCTCTCTC,,34962,0.000262,0.000262
+1,CCTGCCCC,,34940,0.000262,0.000262
+1,CATCCCCC,,34929,0.000262,0.000262
+1,CTCTTCCT,,34836,0.000261,0.000261
+1,TTATTATT,,34779,0.000261,0.000261
+1,TTTTCTAT,,34767,0.000261,0.000261
+1,TTCATTTT,,34663,0.000260,0.000260
+1,TTCTTATT,,34607,0.000260,0.000260
+1,CCCGCCTC,,34598,0.000260,0.000260
+1,TTATATTT,,34488,0.000259,0.000259
+1,TATTATTT,,34437,0.000259,0.000258
+1,CGCCTCCC,,34397,0.000258,0.000258
+1,CTCCTTCT,,34391,0.000258,0.000258
+1,TATTTTAT,,34352,0.000258,0.000258
+1,CACCCTCC,,34341,0.000258,0.000258
+1,CTCTTCTC,,34295,0.000257,0.000257
+1,TTTTATCT,,34276,0.000257,0.000257
+1,CCTCGCCC,,34271,0.000257,0.000257
+1,CGCCCCTC,,34247,0.000257,0.000257
+1,CGCCCTCC,,34234,0.000257,0.000257
+1,CACACCCC,,34158,0.000256,0.000256
+1,TACTTTTT,,34141,0.000256,0.000256
+1,TTATTTTC,,34122,0.000256,0.000256
+1,CCACACCC,,34106,0.000256,0.000256
+1,GCTTTTTT,,34085,0.000256,0.000256
+1,TTTCTATT,,34082,0.000256,0.000256
+1,TTTCTTAT,,34020,0.000255,0.000255
+1,CCCTCGCC,,33905,0.000255,0.000254
+1,TTCTATTT,,33888,0.000254,0.000254
+1,CTCCGCCC,,33887,0.000254,0.000254
+1,AAAAAACC,,33869,0.000254,0.000254
+1,CCCGCTCC,,33839,0.000254,0.000254
+1,CACTCCCC,,33794,0.000254,0.000254
+1,TTATTTAT,,33778,0.000254,0.000253
+1,CCACCACC,,33759,0.000253,0.000253
+1,TTTATTCT,,33748,0.000253,0.000253
+1,TTTATCTT,,33743,0.000253,0.000253
+1,TTTACTTT,,33671,0.000253,0.000253
+1,CTTCTCTC,,33663,0.000253,0.000253
+1,CACCTCCC,,33660,0.000253,0.000253
+1,CTCGCCCC,,33649,0.000253,0.000253
+1,CTTCCTTC,,33642,0.000253,0.000252
+1,AAAAATAT,,33621,0.000252,0.000252
+1,CTTCTCCT,,33607,0.000252,0.000252
+1,AATTTTTT,,33604,0.000252,0.000252
+1,ACCAAAAA,,33581,0.000252,0.000252
+1,AATTAAAA,,33428,0.000251,0.000251
+1,TTTCATTT,,33406,0.000251,0.000251
+1,CTTCCTCT,,33154,0.000249,0.000249
+1,CCACCCAC,,33085,0.000248,0.000248
+1,CTCTCTTC,,33047,0.000248,0.000248
+1,AAAATTAA,,32861,0.000247,0.000247
+1,GCCCCGCC,,32809,0.000246,0.000246
+1,CCCCCGGC,,32762,0.000246,0.000246
+1,CCTCCGCC,,32736,0.000246,0.000246
+1,CACCCACC,,32682,0.000245,0.000245
+1,CTCTCTCT,,32618,0.000245,0.000245
+1,TATTTTCT,,32515,0.000244,0.000244
+1,AAAATATA,,32472,0.000244,0.000244
+1,AACCCCCC,,32467,0.000244,0.000244
+1,TATTTCTT,,32419,0.000243,0.000243
+1,CACCACCC,,32396,0.000243,0.000243
+1,TATAAAAA,,32316,0.000243,0.000243
+1,GTTTTTTC,,32293,0.000242,0.000242
+1,CTCCCGCC,,32183,0.000242,0.000242
+1,CGGCCCCC,,32170,0.000241,0.000241
+1,TTATTTCT,,32081,0.000241,0.000241
+1,AAATTAAA,,32067,0.000241,0.000241
+1,AAAATAAT,,32037,0.000240,0.000240
+1,ACCCCCCA,,31904,0.000239,0.000239
+1,TTATTCTT,,31863,0.000239,0.000239
+1,TTACTTTT,,31844,0.000239,0.000239
+1,TATCTTTT,,31780,0.000239,0.000239
+1,CCAAAAAA,,31746,0.000238,0.000238
+1,CACCCCAC,,31570,0.000237,0.000237
+1,AATATAAA,,31457,0.000236,0.000236
+1,CCCCCGCG,,31323,0.000235,0.000235
+1,AAAAACCA,,31298,0.000235,0.000235
+1,CCCCGCGC,,31290,0.000235,0.000235
+1,CCCCGCCG,,31259,0.000235,0.000235
+1,ATTTTTTA,,31192,0.000234,0.000234
+1,TATTCTTT,,31142,0.000234,0.000234
+1,TTATCTTT,,30891,0.000232,0.000232
+1,ACCCCCCT,,30778,0.000231,0.000231
+1,AATAAAAT,,30778,0.000231,0.000231
+1,ATATAAAA,,30733,0.000231,0.000231
+1,AATAAATA,,30717,0.000231,0.000231
+1,CCGGCCCC,,30641,0.000230,0.000230
+1,GTCTTTTT,,30605,0.000230,0.000230
+1,CCCGGCCC,,30521,0.000229,0.000229
+1,AAATAAAT,,30453,0.000229,0.000229
+1,AAATAATA,,30183,0.000227,0.000227
+1,GTTTTTCT,,30175,0.000227,0.000226
+1,CCCCGGCC,,30115,0.000226,0.000226
+1,ACACCCCC,,30084,0.000226,0.000226
+1,AATAATAA,,29842,0.000224,0.000224
+1,AAATATAA,,29800,0.000224,0.000224
+1,ATAAAATA,,29796,0.000224,0.000224
+1,ACCACCCC,,29778,0.000224,0.000223
+1,GTTTTCTT,,29741,0.000223,0.000223
+1,ATAAAAAT,,29678,0.000223,0.000223
+1,ATATTTTT,,29635,0.000222,0.000222
+1,CCGCGCCC,,29563,0.000222,0.000222
+1,ATAATAAA,,29505,0.000221,0.000221
+1,ATAAATAA,,29365,0.000220,0.000220
+1,TAATAAAA,,29348,0.000220,0.000220
+1,TAAAAAAT,,29325,0.000220,0.000220
+1,TTTTTTGC,,29237,0.000219,0.000219
+1,AAAAACAC,,29234,0.000219,0.000219
+1,ATCCCCCC,,29202,0.000219,0.000219
+1,TAAAAATA,,29182,0.000219,0.000219
+1,ATTATTTT,,29179,0.000219,0.000219
+1,ACCCCACC,,28962,0.000217,0.000217
+1,CCCGCCCG,,28863,0.000217,0.000217
+1,TAAATAAA,,28712,0.000216,0.000215
+1,GTTCTTTT,,28688,0.000215,0.000215
+1,ACCCCCTC,,28675,0.000215,0.000215
+1,ACCCACCC,,28612,0.000215,0.000215
+1,CGTTTTTT,,28527,0.000214,0.000214
+1,ATTTTATT,,28508,0.000214,0.000214
+1,CCGCCCCG,,28488,0.000214,0.000214
+1,CTGTTTTT,,28474,0.000214,0.000214
+1,AAAACACA,,28423,0.000213,0.000213
+1,GTTTCTTT,,28381,0.000213,0.000213
+1,ACCCCTCC,,28352,0.000213,0.000213
+1,ACTTTTTT,,28322,0.000213,0.000213
+1,AAAACCAA,,28319,0.000213,0.000213
+1,CCCGCCGC,,28312,0.000213,0.000212
+1,ACTCCCCC,,28309,0.000213,0.000212
+1,AACCAAAA,,28281,0.000212,0.000212
+1,ATTTTTAT,,28222,0.000212,0.000212
+1,CGCGCCCC,,28220,0.000212,0.000212
+1,TCGTTTTT,,28155,0.000211,0.000211
+1,CCGCCCGC,,28149,0.000211,0.000211
+1,ACCCCCAC,,28140,0.000211,0.000211
+1,TAAAATAA,,28136,0.000211,0.000211
+1,ACCCTCCC,,28069,0.000211,0.000211
+1,TTTTTTCG,,28066,0.000211,0.000211
+1,ACAAAAAC,,28048,0.000211,0.000210
+1,ACCTCCCC,,28036,0.000210,0.000210
+1,CGCCCCCG,,27918,0.000210,0.000210
+1,CGCCGCCC,,27811,0.000209,0.000209
+1,CTTTTTTG,,27777,0.000209,0.000208
+1,ACAAAACA,,27743,0.000208,0.000208
+1,AAAACAAC,,27648,0.000208,0.000207
+1,AACAAAAC,,27489,0.000206,0.000206
+1,CTTTTTGT,,27429,0.000206,0.000206
+1,AAACCAAA,,27426,0.000206,0.000206
+1,TCTTTTTG,,27379,0.000206,0.000205
+1,AACAAACA,,27283,0.000205,0.000205
+1,ACACAAAA,,27278,0.000205,0.000205
+1,ATTTATTT,,27235,0.000204,0.000204
+1,TTTTTAAA,,27051,0.000203,0.000203
+1,AACACAAA,,26996,0.000203,0.000203
+1,TTGTTTTC,,26989,0.000203,0.000203
+1,ACAACAAA,,26971,0.000202,0.000202
+1,CGCCCCGC,,26944,0.000202,0.000202
+1,ATTTTTTC,,26936,0.000202,0.000202
+1,CACAAAAA,,26935,0.000202,0.000202
+1,ACAAACAA,,26821,0.000201,0.000201
+1,AACAACAA,,26811,0.000201,0.000201
+1,TTTTTCTG,,26779,0.000201,0.000201
+1,TCTTTTGT,,26425,0.000198,0.000198
+1,AAACAACA,,26411,0.000198,0.000198
+1,TTTTTCGT,,26410,0.000198,0.000198
+1,CCGCCGCC,,26353,0.000198,0.000198
+1,TTTTTGTC,,26261,0.000197,0.000197
+1,CTTTGTTT,,26143,0.000196,0.000196
+1,AAACAAAC,,26110,0.000196,0.000196
+1,CTTGTTTT,,26085,0.000196,0.000196
+1,TTTTGTTC,,26074,0.000196,0.000196
+1,CCCGCGCC,,26054,0.000196,0.000196
+1,CTTTTGTT,,26048,0.000196,0.000195
+1,AAACACAA,,25998,0.000195,0.000195
+1,TGTTTTTC,,25923,0.000195,0.000195
+1,CGCCCGCC,,25349,0.000190,0.000190
+1,TCTTGTTT,,25296,0.000190,0.000190
+1,TTTTTGCT,,25256,0.000190,0.000190
+1,TGCTTTTT,,25242,0.000189,0.000189
+1,ATCTTTTT,,25219,0.000189,0.000189
+1,TCTTTGTT,,25155,0.000189,0.000189
+1,TTCTTTTG,,25141,0.000189,0.000189
+1,TCTGTTTT,,25036,0.000188,0.000188
+1,ATTTTTCT,,24932,0.000187,0.000187
+1,TTGTTCTT,,24893,0.000187,0.000187
+1,TTTTCTTG,,24883,0.000187,0.000187
+1,TTTCTTTG,,24881,0.000187,0.000187
+1,TTTGTTTC,,24745,0.000186,0.000186
+1,TTGTTTCT,,24673,0.000185,0.000185
+1,TTTTCTGT,,24576,0.000184,0.000184
+1,ATTTTCTT,,24553,0.000184,0.000184
+1,TTGCTTTT,,24521,0.000184,0.000184
+1,TTGTCTTT,,24302,0.000182,0.000182
+1,TTTTGTCT,,24224,0.000182,0.000182
+1,TGTTTTCT,,24199,0.000182,0.000182
+1,TTCTTTGT,,24168,0.000181,0.000181
+1,TTTTGCTT,,24083,0.000181,0.000181
+1,TGTTTCTT,,23987,0.000180,0.000180
+1,CAACAAAA,,23964,0.000180,0.000180
+1,CAAAAAAC,,23933,0.000180,0.000180
+1,CAAAAACA,,23924,0.000180,0.000180
+1,ATTCTTTT,,23857,0.000179,0.000179
+1,TTTCTTGT,,23798,0.000179,0.000179
+1,TTCTGTTT,,23778,0.000178,0.000178
+1,ATTTCTTT,,23748,0.000178,0.000178
+1,ACTAAAAA,,23747,0.000178,0.000178
+1,TGTCTTTT,,23554,0.000177,0.000177
+1,TTCGTTTT,,23401,0.000176,0.000176
+1,TTTGTCTT,,23379,0.000175,0.000175
+1,TGTTCTTT,,23370,0.000175,0.000175
+1,AAAAAATC,,23320,0.000175,0.000175
+1,TTTGTTCT,,23314,0.000175,0.000175
+1,CCCCCAAA,,23265,0.000175,0.000175
+1,AAAAAACT,,23245,0.000174,0.000174
+1,TTTTCGTT,,23239,0.000174,0.000174
+1,CAAAACAA,,23185,0.000174,0.000174
+1,CCCCCTTA,,23173,0.000174,0.000174
+1,TTTCGTTT,,23168,0.000174,0.000174
+1,CAAACAAA,,23157,0.000174,0.000174
+1,TTCTTGTT,,23155,0.000174,0.000174
+1,TCAAAAAA,,23154,0.000174,0.000174
+1,TTTAAAAA,,23088,0.000173,0.000173
+1,TTTGCTTT,,22765,0.000171,0.000171
+1,TTCCCCCA,,22755,0.000171,0.000171
+1,TTTCTGTT,,22621,0.000170,0.000170
+1,TTTTATAA,,22527,0.000169,0.000169
+1,CTAAAAAA,,22373,0.000168,0.000168
+1,CCTTTTTA,,22307,0.000167,0.000167
+1,TTTTTCCA,,22228,0.000167,0.000167
+1,TTTTAATA,,22107,0.000166,0.000166
+1,CCCCCATT,,22058,0.000166,0.000166
+1,AAAAATCA,,21532,0.000162,0.000162
+1,TTTATTAA,,21531,0.000162,0.000162
+1,GTTCCCCC,,21516,0.000162,0.000161
+1,ACAAAAAT,,21504,0.000161,0.000161
+1,AAAAACTA,,21470,0.000161,0.000161
+1,ATCAAAAA,,21414,0.000161,0.000161
+1,TCTCCCCA,,21354,0.000160,0.000160
+1,AAAAACAT,,21330,0.000160,0.000160
+1,TTTTAAAT,,21309,0.000160,0.000160
+1,TTTTTACC,,21191,0.000159,0.000159
+1,AAAAATAC,,21153,0.000159,0.000159
+1,GCCCCCTT,,21043,0.000158,0.000158
+1,ACAAAATA,,20947,0.000157,0.000157
+1,CCCCTTCA,,20930,0.000157,0.000157
+1,TCCCCCTA,,20902,0.000157,0.000157
+1,AAAATACA,,20870,0.000157,0.000157
+1,CCCCTCTA,,20783,0.000156,0.000156
+1,AACAAAAT,,20757,0.000156,0.000156
+1,CCCCCTAT,,20683,0.000155,0.000155
+1,AATAAAAC,,20661,0.000155,0.000155
+1,TTTTAAAA,,20628,0.000155,0.000155
+1,ACATAAAA,,20579,0.000154,0.000154
+1,ATAAAAAC,,20576,0.000154,0.000154
+1,AAAATAAC,,20555,0.000154,0.000154
+1,CATAAAAA,,20535,0.000154,0.000154
+1,TTCCCCAC,,20517,0.000154,0.000154
+1,AAAACATA,,20474,0.000154,0.000154
+1,TCCCCTCA,,20440,0.000153,0.000153
+1,TCCTCCCA,,20431,0.000153,0.000153
+1,TACAAAAA,,20427,0.000153,0.000153
+1,AATAAACA,,20393,0.000153,0.000153
+1,TTCCCACC,,20378,0.000153,0.000153
+1,AAAACAAT,,20363,0.000153,0.000153
+1,TTCACCCC,,20339,0.000153,0.000153
+1,TTTTTCAC,,20334,0.000153,0.000153
+1,AACTAAAA,,20331,0.000153,0.000153
+1,ACAATAAA,,20291,0.000152,0.000152
+1,CCATTTTT,,20281,0.000152,0.000152
+1,TCCCTCCA,,20255,0.000152,0.000152
+1,TTTATATA,,20242,0.000152,0.000152
+1,GGTTTTTT,,20212,0.000152,0.000152
+1,AACAAATA,,20188,0.000152,0.000152
+1,CCCTTCCA,,20172,0.000151,0.000151
+1,GCTTCCCC,,20128,0.000151,0.000151
+1,CCCACCTT,,20114,0.000151,0.000151
+1,TTACCCCC,,20100,0.000151,0.000151
+1,CAAAAAAT,,20096,0.000151,0.000151
+1,AAAATCAA,,20084,0.000151,0.000151
+1,AAAAATTT,,20060,0.000151,0.000151
+1,ATAAAACA,,20052,0.000151,0.000150
+1,CCCCACAA,,20040,0.000150,0.000150
+1,TAAAAAAC,,20039,0.000150,0.000150
+1,CCCCACTT,,20035,0.000150,0.000150
+1,AATAACAA,,19970,0.000150,0.000150
+1,CCCAAAAA,,19933,0.000150,0.000150
+1,TTCCACCC,,19916,0.000149,0.000149
+1,ACAAATAA,,19904,0.000149,0.000149
+1,CAAAAATA,,19897,0.000149,0.000149
+1,CTTTTTCA,,19886,0.000149,0.000149
+1,CCCTCCTA,,19866,0.000149,0.000149
+1,AACATAAA,,19841,0.000149,0.000149
+1,AAAACTAA,,19813,0.000149,0.000149
+1,CCTTTATT,,19808,0.000149,0.000149
+1,CCCCCTGT,,19808,0.000149,0.000149
+1,AAATAACA,,19769,0.000148,0.000148
+1,CCTATTTT,,19749,0.000148,0.000148
+1,TTTAATTA,,19744,0.000148,0.000148
+1,TTGCCCCC,,19741,0.000148,0.000148
+1,TTATTTAA,,19716,0.000148,0.000148
+1,TAAAAACA,,19706,0.000148,0.000148
+1,TCCTTTTA,,19694,0.000148,0.000148
+1,TTTAAATT,,19673,0.000148,0.000148
+1,AAATAAAC,,19667,0.000148,0.000148
+1,TTTTCCTA,,19666,0.000148,0.000148
+1,CCTTTTAT,,19650,0.000148,0.000147
+1,GCCTTCCC,,19642,0.000147,0.000147
+1,AATCAAAA,,19635,0.000147,0.000147
+1,AAACAATA,,19617,0.000147,0.000147
+1,TCCCCCAT,,19595,0.000147,0.000147
+1,CCCCCTTG,,19582,0.000147,0.000147
+1,CCCCCGTT,,19561,0.000147,0.000147
+1,TCTACCCC,,19511,0.000146,0.000146
+1,AACAATAA,,19510,0.000146,0.000146
+1,TTTTCTCA,,19499,0.000146,0.000146
+1,CCTTCCCA,,19477,0.000146,0.000146
+1,GTCTCCCC,,19449,0.000146,0.000146
+1,TAAATTTT,,19408,0.000146,0.000146
+1,TTATAAAA,,19396,0.000146,0.000146
+1,CTTCCCCA,,19386,0.000146,0.000145
+1,GTGTTTTT,,19381,0.000145,0.000145
+1,AAACTAAA,,19366,0.000145,0.000145
+1,TATTTTAA,,19358,0.000145,0.000145
+1,ATAAACAA,,19327,0.000145,0.000145
+1,TCTCCACC,,19317,0.000145,0.000145
+1,TTTATAAT,,19302,0.000145,0.000145
+1,CCCACCAA,,19284,0.000145,0.000145
+1,AAACAAAT,,19213,0.000144,0.000144
+1,CCTTATTT,,19208,0.000144,0.000144
+1,AAATACAA,,19180,0.000144,0.000144
+1,CCTCCCTA,,19158,0.000144,0.000144
+1,ATACAAAA,,19153,0.000144,0.000144
+1,GCCCCTCT,,19146,0.000144,0.000144
+1,TCTTTTCA,,19140,0.000144,0.000144
+1,CCCCTCAT,,19126,0.000144,0.000144
+1,ATAACAAA,,19123,0.000144,0.000144
+1,AAATCAAA,,19112,0.000143,0.000143
+1,CTCTTTTA,,19105,0.000143,0.000143
+1,CCCCAACA,,19087,0.000143,0.000143
+1,CCCCTTAC,,19084,0.000143,0.000143
+1,TCCCCACT,,19045,0.000143,0.000143
+1,AATACAAA,,19030,0.000143,0.000143
+1,AAATTTTT,,19027,0.000143,0.000143
+1,CAATAAAA,,19021,0.000143,0.000143
+1,TGTCCCCC,,19001,0.000143,0.000143
+1,GCCCCTTC,,18980,0.000142,0.000142
+1,TCTCCCAC,,18930,0.000142,0.000142
+1,CAAAATAA,,18927,0.000142,0.000142
+1,TTTTATCC,,18913,0.000142,0.000142
+1,CCCTCTCA,,18905,0.000142,0.000142
+1,GCCCTTCC,,18864,0.000142,0.000142
+1,TAAACAAA,,18841,0.000141,0.000141
+1,GCCTCCCT,,18841,0.000141,0.000141
+1,CAAATAAA,,18825,0.000141,0.000141
+1,TCTCACCC,,18768,0.000141,0.000141
+1,GTCCCCCT,,18735,0.000141,0.000141
+1,CCCCAAAC,,18683,0.000140,0.000140
+1,CTTTTCTA,,18675,0.000140,0.000140
+1,CCCTTCAC,,18672,0.000140,0.000140
+1,TAACAAAA,,18670,0.000140,0.000140
+1,GCCCCCCA,,18644,0.000140,0.000140
+1,TCCTACCC,,18639,0.000140,0.000140
+1,TAAAACAA,,18638,0.000140,0.000140
+1,GCCCTCCT,,18626,0.000140,0.000140
+1,AAACATAA,,18626,0.000140,0.000140
+1,CACTTTTT,,18603,0.000140,0.000140
+1,CCCCATCT,,18598,0.000140,0.000140
+1,TATCCCCC,,18596,0.000140,0.000140
+1,CCCTCCAT,,18590,0.000140,0.000140
+1,CCCCTACT,,18580,0.000139,0.000139
+1,CCACCCTT,,18569,0.000139,0.000139
+1,CCCCGCTT,,18558,0.000139,0.000139
+1,TCCTCACC,,18543,0.000139,0.000139
+1,TTAAATTT,,18532,0.000139,0.000139
+1,GCTCTCCC,,18524,0.000139,0.000139
+1,TTTCTTCA,,18461,0.000139,0.000139
+1,TCCCCTAC,,18438,0.000138,0.000138
+1,TCTTTCTA,,18433,0.000138,0.000138
+1,TCCTCCAC,,18406,0.000138,0.000138
+1,TCCCTCAC,,18405,0.000138,0.000138
+1,TATTAAAA,,18388,0.000138,0.000138
+1,GTCCTCCC,,18388,0.000138,0.000138
+1,CCCCATTC,,18388,0.000138,0.000138
+1,CCTCTCCA,,18378,0.000138,0.000138
+1,TTTATTCC,,18365,0.000138,0.000138
+1,TTCCTTTA,,18346,0.000138,0.000138
+1,CTCCCCTA,,18346,0.000138,0.000138
+1,CTTCTTTA,,18333,0.000138,0.000138
+1,TTTATAAA,,18314,0.000137,0.000137
+1,TCCCTACC,,18303,0.000137,0.000137
+1,TTAATTTA,,18276,0.000137,0.000137
+1,TCCCCATC,,18273,0.000137,0.000137
+1,TAATTTTA,,18273,0.000137,0.000137
+1,TCTCTTTA,,18272,0.000137,0.000137
+1,TCCACCCT,,18246,0.000137,0.000137
+1,TATTTATA,,18211,0.000137,0.000137
+1,TCCATTTT,,18200,0.000137,0.000137
+1,GCTCCCCT,,18192,0.000137,0.000137
+1,TCCATCCC,,18187,0.000137,0.000136
+1,TTTAATAT,,18147,0.000136,0.000136
+1,CTCTCCCA,,18130,0.000136,0.000136
+1,CTTTTTAC,,18119,0.000136,0.000136
+1,ATTTAAAA,,18117,0.000136,0.000136
+1,TCCCACCT,,18114,0.000136,0.000136
+1,TTATTATA,,18107,0.000136,0.000136
+1,TTTTCTAC,,18090,0.000136,0.000136
+1,TTTCCTTA,,18088,0.000136,0.000136
+1,TTCCCCCG,,18071,0.000136,0.000136
+1,CTTTCTTA,,18061,0.000136,0.000136
+1,CCCAACAC,,18053,0.000136,0.000135
+1,TCTTCTTA,,18048,0.000135,0.000135
+1,CCCACTCT,,18045,0.000135,0.000135
+1,CCCCTATC,,18040,0.000135,0.000135
+1,TCCCCCGT,,18025,0.000135,0.000135
+1,TTCTTTCA,,18020,0.000135,0.000135
+1,CCCTTACC,,18009,0.000135,0.000135
+1,CCCACACA,,18004,0.000135,0.000135
+1,TGCTCCCC,,18002,0.000135,0.000135
+1,GCCTTTTT,,17994,0.000135,0.000135
+1,TCCCCCTG,,17971,0.000135,0.000135
+1,TTAATAAA,,17908,0.000134,0.000134
+1,GTCCCTCC,,17876,0.000134,0.000134
+1,TCGTCCCC,,17860,0.000134,0.000134
+1,TCTTTTAC,,17848,0.000134,0.000134
+1,AAAATTTA,,17841,0.000134,0.000134
+1,AAAATATT,,17824,0.000134,0.000134
+1,TTTTCCAT,,17811,0.000134,0.000134
+1,GCCTCTCC,,17792,0.000134,0.000134
+1,TATAATTT,,17788,0.000134,0.000133
+1,CCTCCTCA,,17785,0.000134,0.000133
+1,TTCGCCCC,,17778,0.000133,0.000133
+1,CCCACTTC,,17778,0.000133,0.000133
+1,CCCCTCGT,,17741,0.000133,0.000133
+1,CCCTCACT,,17712,0.000133,0.000133
+1,GCCCTCTC,,17701,0.000133,0.000133
+1,CCCAAACC,,17693,0.000133,0.000133
+1,TTTCTCTA,,17680,0.000133,0.000133
+1,TATATTTA,,17676,0.000133,0.000133
+1,TCATCCCC,,17675,0.000133,0.000133
+1,TCCTTATT,,17674,0.000133,0.000133
+1,CACCCCTT,,17674,0.000133,0.000133
+1,CCGCCCTT,,17671,0.000133,0.000133
+1,CCTTCACC,,17666,0.000133,0.000133
+1,CCCCTCTG,,17662,0.000133,0.000133
+1,CTTACCCC,,17646,0.000132,0.000132
+1,CCCATCCT,,17646,0.000132,0.000132
+1,TTTTTTGG,,17637,0.000132,0.000132
+1,TTCTTCTA,,17636,0.000132,0.000132
+1,TCCACTCC,,17625,0.000132,0.000132
+1,TTAATATT,,17613,0.000132,0.000132
+1,CCCAACCA,,17609,0.000132,0.000132
+1,CCCGCCTT,,17601,0.000132,0.000132
+1,TTTTACCT,,17590,0.000132,0.000132
+1,CTCATTTT,,17585,0.000132,0.000132
+1,TTATTAAT,,17571,0.000132,0.000132
+1,TCCTTTAT,,17562,0.000132,0.000132
+1,CTCCCTCA,,17550,0.000132,0.000132
+1,CCCCTTCG,,17535,0.000132,0.000132
+1,GCCTCCTC,,17534,0.000132,0.000132
+1,GCTCCTCC,,17528,0.000132,0.000132
+1,CTTTTCAT,,17516,0.000131,0.000131
+1,TTATAATT,,17507,0.000131,0.000131
+1,CCCATTCC,,17506,0.000131,0.000131
+1,CCTTACCC,,17498,0.000131,0.000131
+1,TTCCCCGC,,17491,0.000131,0.000131
+1,CCCTCCGT,,17489,0.000131,0.000131
+1,GTTTTTCC,,17476,0.000131,0.000131
+1,TTATATTA,,17453,0.000131,0.000131
+1,TATTTAAT,,17431,0.000131,0.000131
+1,CCGTTTTT,,17417,0.000131,0.000131
+1,TTTTCATC,,17416,0.000131,0.000131
+1,CCTCCCAT,,17416,0.000131,0.000131
+1,TTAAAAAT,,17415,0.000131,0.000131
+1,TCCCATCC,,17411,0.000131,0.000131
+1,TTCCGCCC,,17408,0.000131,0.000131
+1,TAATATTT,,17378,0.000130,0.000130
+1,TCCCACTC,,17372,0.000130,0.000130
+1,TTAAAATA,,17371,0.000130,0.000130
+1,GTCCCCTC,,17370,0.000130,0.000130
+1,TCCACCTC,,17356,0.000130,0.000130
+1,TCACCCCT,,17351,0.000130,0.000130
+1,TACCCCCT,,17335,0.000130,0.000130
+1,CTCCTCCA,,17317,0.000130,0.000130
+1,TTAAATAA,,17312,0.000130,0.000130
+1,CTCTTTAT,,17301,0.000130,0.000130
+1,CCCTACCT,,17285,0.000130,0.000130
+1,TGCCCCCT,,17282,0.000130,0.000130
+1,CTCTTATT,,17280,0.000130,0.000130
+1,CTTCCCAC,,17267,0.000130,0.000130
+1,CCTTCCAC,,17264,0.000130,0.000130
+1,CCCCTTGC,,17255,0.000130,0.000129
+1,TTTCTTAC,,17235,0.000129,0.000129
+1,TTTTCACT,,17229,0.000129,0.000129
+1,TAAAAATT,,17217,0.000129,0.000129
+1,TACTCCCC,,17212,0.000129,0.000129
+1,TCCTCCCG,,17201,0.000129,0.000129
+1,TATTATTA,,17198,0.000129,0.000129
+1,TTTTACTC,,17185,0.000129,0.000129
+1,CCCTCTAC,,17160,0.000129,0.000129
+1,TCTGCCCC,,17151,0.000129,0.000129
+1,TTCTCTTA,,17135,0.000129,0.000129
+1,CTCCCCAT,,17131,0.000129,0.000129
+1,TCTCCCCG,,17127,0.000129,0.000129
+1,CTTCCACC,,17125,0.000129,0.000129
+1,CCCCGTTC,,17114,0.000128,0.000128
+1,CCCACAAC,,17110,0.000128,0.000128
+1,CCCCAAAA,,17096,0.000128,0.000128
+1,CTTTTATC,,17080,0.000128,0.000128
+1,CCCCGTCT,,17072,0.000128,0.000128
+1,TTTTTCAA,,17071,0.000128,0.000128
+1,GTTGTTTT,,17068,0.000128,0.000128
+1,TATTAATT,,17054,0.000128,0.000128
+1,CCTCCACT,,17044,0.000128,0.000128
+1,CCACCCAA,,17034,0.000128,0.000128
+1,GCTCCCTC,,17033,0.000128,0.000128
+1,AAAAACCC,,17030,0.000128,0.000128
+1,GTTTTTTG,,17009,0.000128,0.000128
+1,TATTTTCC,,16998,0.000128,0.000128
+1,TAATTATT,,16959,0.000127,0.000127
+1,TCCCCTCG,,16950,0.000127,0.000127
+1,TCCTATTT,,16947,0.000127,0.000127
+1,CCAAACCC,,16935,0.000127,0.000127
+1,TTATTTCC,,16910,0.000127,0.000127
+1,TTAATTAT,,16882,0.000127,0.000127
+1,AATATTTT,,16878,0.000127,0.000127
+1,GTTTTTGT,,16875,0.000127,0.000127
+1,CTTCACCC,,16868,0.000127,0.000127
+1,TCTTTCAT,,16862,0.000127,0.000127
+1,CCGTTCCC,,16841,0.000126,0.000126
+1,TCACTCCC,,16833,0.000126,0.000126
+1,TCTTTATC,,16828,0.000126,0.000126
+1,TCCCCTGC,,16827,0.000126,0.000126
+1,TGCCTCCC,,16826,0.000126,0.000126
+1,TCCCCCAA,,16825,0.000126,0.000126
+1,GGGCCCCC,,16815,0.000126,0.000126
+1,TCGCTCCC,,16814,0.000126,0.000126
+1,TCCTGCCC,,16802,0.000126,0.000126
+1,CTCTATTT,,16798,0.000126,0.000126
+1,CCCATCTC,,16798,0.000126,0.000126
+1,TCCCTCCG,,16783,0.000126,0.000126
+1,CTTTTACT,,16779,0.000126,0.000126
+1,CCTATCCC,,16779,0.000126,0.000126
+1,CCCTATCC,,16777,0.000126,0.000126
+1,TTCTTTAC,,16751,0.000126,0.000126
+1,AATTTAAA,,16741,0.000126,0.000126
+1,TATATATT,,16738,0.000126,0.000126

From d3e6ad053b61f421126e17c0f1599bfbd2d28d4f Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Fri, 29 Mar 2024 09:36:17 -0400
Subject: [PATCH 29/87] fix getting read length issue for single index

---
 demux_run_dag.py                    | 1 +
 scripts/get_sequencing_read_data.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/demux_run_dag.py b/demux_run_dag.py
index 6c85c45..09f010a 100644
--- a/demux_run_dag.py
+++ b/demux_run_dag.py
@@ -171,6 +171,7 @@ def stats(ds, **kwargs):
 
         # check if the run is 10X by read length
         atac, use_bases_mask = scripts.get_sequencing_read_data.main(sequencer_path)
+        print("read length: {}".format(use_bases_mask))
         if use_bases_mask == [29, 89] or atac:
             # if is atac run, demux is using cellranger mkfastq
             if atac:
diff --git a/scripts/get_sequencing_read_data.py b/scripts/get_sequencing_read_data.py
index bdcab04..e8e16f7 100755
--- a/scripts/get_sequencing_read_data.py
+++ b/scripts/get_sequencing_read_data.py
@@ -36,7 +36,7 @@ def get_sequencing_read_data(sequencer_path):
 		use_bases_mask = "Y" + str(reads_tag[0][1]) + ",I" + str(reads_tag[1][1]) + ",Y" + str(reads_tag[2][1]) + ",Y" + str(reads_tag[3][1])
 	else:
 		atac = False
-		use_bases_mask = [reads_tag[0][1], reads_tag[3][1]]
+		use_bases_mask = [reads_tag[0][1], reads_tag[-1][1]]
 		
 	return(atac, use_bases_mask)
 		

From a9f0a1e79104199e209e9295b66ef4d3f52f24db Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Fri, 29 Mar 2024 11:15:42 -0400
Subject: [PATCH 30/87] Update cellranger.py

updating versions of cellranger and spaceranger to have latest versions when 10X database is implemented
---
 scripts/cellranger.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/cellranger.py b/scripts/cellranger.py
index cffe807..ca87b32 100644
--- a/scripts/cellranger.py
+++ b/scripts/cellranger.py
@@ -27,14 +27,14 @@
 ACCESS = 0o775
 config_dict = {
     "count": {
-        "tool": " /igo/work/nabors/tools/cellranger-7.0.0/cellranger count ",
+        "tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger count ",
         "genome": {
             "Human": " --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-GRCh38-2020-A ",
             "Mouse": " --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-mm10-2020-A "
         }
     },
     "vdj": {
-        "tool": " /igo/work/nabors/tools/cellranger-7.0.0/cellranger vdj ",
+        "tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger vdj ",
         "genome": {
             "Human": " --reference=/igo/work/genomes/10X_Genomics/VDJ/refdata-cellranger-vdj-GRCh38-alts-ensembl-7.0.0 ",
             "Mouse": " --reference=/igo/work/genomes/10X_Genomics/VDJ/refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0 "
@@ -55,17 +55,17 @@
         }
     },
     "multi": {
-        "tool": " /igo/work/nabors/tools/cellranger-7.0.0/cellranger multi "
+        "tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger multi "
     },
     "arc": {
-        "tool": " /igo/work/bin/cellranger-arc-2.0.0/cellranger-arc count ",
+        "tool": " /igo/work/bin/cellranger-arc-2.0.2/cellranger-arc count ",
         "genome": {
             "Human": " --reference=/igo/work/nabors/genomes/10X_Genomics/ARC/refdata-cellranger-arc-GRCh38-2020-A-2.0.0 ",
             "Mouse": " --reference=/igo/work/nabors/genomes/10X_Genomics/ARC/refdata-cellranger-arc-mm10-2020-A-2.0.0 "
         }
     },
     "spaceranger": {
-        "tool": " /igo/work/nabors/tools/spaceranger-2.0.0/spaceranger count ",
+        "tool": " /igo/work/nabors/tools/spaceranger-3.0.0/spaceranger count ",
         "genome": {
             "Human": " --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-GRCh38-2020-A ",
             "Mouse": " --transcriptome=/igo/work/nabors/genomes/10X_Genomics/spatial_gex/refdata-gex-mm10-2020-A "
@@ -73,7 +73,7 @@
         "probe": {
             "Human": "/igo/work/nabors/genomes/10X_Genomics/spatial_gex/Visium_Human_Transcriptome_Probe_Set_v1.0_GRCh38-2020-A.csv",
             "Human_CytAssist": "/igo/work/genomes/10X_Genomics/spaceranger/Visium_Human_Transcriptome_Probe_Set_v2.0_GRCh38-2020-A.csv",
-            "Mouse": "/igo/work/nabors/tools/spaceranger-2.0.0/external/tenx_feature_references/targeted_panels/Visium_Mouse_Transcriptome_Probe_Set_v1.0_mm10-2020-A.csv"
+            "Mouse": "/igo/work/nabors/tools/spaceranger-3.0.0/external/tenx_feature_references/targeted_panels/Visium_Mouse_Transcriptome_Probe_Set_v1.0_mm10-2020-A.csv"
         }
     }
 }

From 8fbc4b77894c1d4ee087914f100b8879e4a87af7 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Mon, 1 Apr 2024 08:46:19 -0400
Subject: [PATCH 31/87] put 10X config parameter into separate file

---
 scripts/cellranger_config.py  | 78 +++++++++++++++++++++++++++++++++++
 scripts/cellranger_spatial.py | 19 +++------
 2 files changed, 84 insertions(+), 13 deletions(-)
 create mode 100644 scripts/cellranger_config.py

diff --git a/scripts/cellranger_config.py b/scripts/cellranger_config.py
new file mode 100644
index 0000000..801ea70
--- /dev/null
+++ b/scripts/cellranger_config.py
@@ -0,0 +1,78 @@
+# work folder
+STATS_AREA = "/igo/stats/CELLRANGER/"
+
+# config info 
+ACCESS = 0o775
+config_dict = {
+    "count": {
+        "tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger count ",
+        "genome": {
+            "Human": " --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-GRCh38-2020-A ",
+            "Mouse": " --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-mm10-2020-A "
+        }
+    },
+    "vdj": {
+        "tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger vdj ",
+        "genome": {
+            "Human": " --reference=/igo/work/genomes/10X_Genomics/VDJ/refdata-cellranger-vdj-GRCh38-alts-ensembl-7.0.0 ",
+            "Mouse": " --reference=/igo/work/genomes/10X_Genomics/VDJ/refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0 "
+        }
+    },
+    "atac_count": {
+        "tool": " /igo/work/nabors/tools/cellranger-atac-2.1.0/cellranger-atac count ",
+        "genome": {
+            "Human": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-GRCh38-1.0.1 ",
+            "Mouse": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-mm10-1.1.0 "
+        }
+    },
+    "cnv": {
+        "tool": " /igo/work/nabors/tools/cellranger-dna-1.1.0/cellranger-dna cnv ",
+        "genome": {
+            "Human": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCh38-1.0.0 ",
+            "Mouse": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCm38-1.0.0 "
+        }
+    },
+    "multi": {
+        "tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger multi "
+    },
+    "arc": {
+        "tool": " /igo/work/bin/cellranger-arc-2.0.2/cellranger-arc count ",
+        "genome": {
+            "Human": " --reference=/igo/work/nabors/genomes/10X_Genomics/ARC/refdata-cellranger-arc-GRCh38-2020-A-2.0.0 ",
+            "Mouse": " --reference=/igo/work/nabors/genomes/10X_Genomics/ARC/refdata-cellranger-arc-mm10-2020-A-2.0.0 "
+        }
+    },
+    "spaceranger": {
+        "tool": " /igo/work/nabors/tools/spaceranger-3.0.0/spaceranger count ",
+        "genome": {
+            "Human": " --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-GRCh38-2020-A ",
+            "Mouse": " --transcriptome=/igo/work/nabors/genomes/10X_Genomics/spatial_gex/refdata-gex-mm10-2020-A "
+        },
+        "probe": {
+            "Human": "/igo/work/nabors/genomes/10X_Genomics/spatial_gex/Visium_Human_Transcriptome_Probe_Set_v1.0_GRCh38-2020-A.csv",
+            "Human_CytAssist": "/igo/work/genomes/10X_Genomics/spaceranger/Visium_Human_Transcriptome_Probe_Set_v2.0_GRCh38-2020-A.csv",
+            "Mouse": "/igo/work/nabors/tools/spaceranger-3.0.0/external/tenx_feature_references/targeted_panels/Visium_Mouse_Transcriptome_Probe_Set_v1.0_mm10-2020-A.csv",
+            "Mouse_HD": "/igo/work/nabors/tools/spaceranger-3.0.0/external/tenx_feature_references/targeted_panels/Visium_Mouse_Transcriptome_Probe_Set_v2.0_mm10-2020-A.csv"
+        }
+    }
+}
+
+# cellranger command line options
+OPTIONS = " --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"
+
+# 10X recipe list for different pipelines
+COUNT_FLAVORS = ["10X_Genomics_GeneExpression-3", "10X_Genomics_GeneExpression-5"]
+VDJ_FLAVORS = ["10X_Genomics_VDJ"]
+ATAC_FLAVORS = ["10X_Genomics_ATAC"]
+CNV_FLAVORS = ["10X_Genomics_CNV"]
+ARC_FLAVORS = ["10X_Genomics_Multiome", "10X_Genomics_Multiome_ATAC", "10X_Genomics_Multiome_GeneExpression"]
+SPATIAL_FLAVORS = ["10X_Genomics_Visium"]
+
+# we do not want to PROCESS SAIL (15500) or SCRI (12437) projects
+SCRI = "12437"
+SAIL = "15500"
+DO_NOT_PROCESS = [SCRI, SAIL]
+
+VISIUM_ENDPOINT = "https://igolims.mskcc.org:8443/LimsRest/getConfig?igoId="
+original_tiff_images_directory = "/rtssdc/mohibullahlab/IGO_Pipeline_Results/Single_Cell/10X_Genomics/TIFF_Images/"
+tiff_images_directory = "/igo/work/igo/TIFF_Images/"
diff --git a/scripts/cellranger_spatial.py b/scripts/cellranger_spatial.py
index 70b27a3..f28e9de 100644
--- a/scripts/cellranger_spatial.py
+++ b/scripts/cellranger_spatial.py
@@ -1,17 +1,10 @@
 import pandas as pd
-import sys
 import os
 import json
 import os.path
 import requests
 import shutil
-import glob
-
-
-ENDPOINT = "https://igolims.mskcc.org:8443/LimsRest/getConfig?igoId="
-original_tiff_images_directory = "/rtssdc/mohibullahlab/IGO_Pipeline_Results/Single_Cell/10X_Genomics/TIFF_Images/"
-tiff_images_directory = "/igo/work/igo/TIFF_Images/"
-
+import scripts.cellranger_config as CONFIG
 
 # sample_id can be get from sample sheet, will be the part in front of _IGO_
 class Spatial_sample:
@@ -28,7 +21,7 @@ def __init__(self, sample, project_id):
         self.copy_json(project_id)
 
     def get_info_from_LIMS(self):
-        response = requests.get(ENDPOINT + self.IGO_ID , auth = ("pms", "tiagostarbuckslightbike"), verify = False)
+        response = requests.get(CONFIG.VISIUM_ENDPOINT + self.IGO_ID , auth = ("pms", "tiagostarbuckslightbike"), verify = False)
         response_data = json.loads(response.text.encode("utf8"))
         self.chip_position = response_data["chipPosition"]
         self.chip_id = response_data["chipID"]
@@ -37,8 +30,8 @@ def get_info_from_LIMS(self):
     
     def copy_tiff(self, project_id):
         # project_id format as Project_12345
-        source_loc_dir = original_tiff_images_directory + project_id
-        destination_loc = tiff_images_directory + project_id
+        source_loc_dir = CONFIG.original_tiff_images_directory + project_id
+        destination_loc = CONFIG.tiff_images_directory + project_id
         destination_file = destination_loc + "/" + self.sample_name + ".tif"
         # create TIFF_images director if not exists
         if not os.path.exists(destination_loc):
@@ -56,8 +49,8 @@ def copy_tiff(self, project_id):
     # copy json file if exists
     def copy_json(self, project_id):
         # project_id format as Project_12345
-        source_loc = original_tiff_images_directory + project_id + "/json/" + self.sample_name + ".json"
-        destination_loc = tiff_images_directory + project_id
+        source_loc = CONFIG.original_tiff_images_directory + project_id + "/json/" + self.sample_name + ".json"
+        destination_loc = CONFIG.tiff_images_directory + project_id
         destination_file = destination_loc + "/" + self.sample_name + ".json"
 
         # create director if not exists

From 8c4d027f2512e11ac27fe84ffa8d579f7bdaa988 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Mon, 1 Apr 2024 08:52:07 -0400
Subject: [PATCH 32/87] update test reflecting new cellranger version

---
 test_scripts.py | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/test_scripts.py b/test_scripts.py
index e9174b0..fe372d6 100644
--- a/test_scripts.py
+++ b/test_scripts.py
@@ -21,26 +21,13 @@ def testCellranger_generate_cellranger_cmd():
         if genome_dict[sample] != "Human" and genome_dict[sample] != "Mouse":
             genome_dict[sample] = "Mouse"
         cmd.append(cellranger.generate_cellranger_cmd(sample, "count", genome_dict[sample], fastq_file_list_dict[sample], "DIANA_0453_AHFKJ5DRXY"))
-    test_result = ["bsub -J DIANA_0453_AHFKJ5DRXY_Project_06265_AG_06265_8869_1_IGO_06265_AG_3_count_cellranger -o DIANA_0453_AHFKJ5DRXY_Project_06265_AG_06265_8869_1_IGO_06265_AG_3_count_cellranger.out /igo/work/nabors/tools/cellranger-7.0.0/cellranger count --id=Sample_06265_8869_1_IGO_06265_AG_3__count --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-GRCh38-2020-A --fastqs=/igo/staging/FASTQ/DIANA_0453_AHFKJ5DRXY/Project_06265_AG/Sample_06265_8869_1_IGO_06265_AG_3 --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200",   
-    "bsub -J DIANA_0453_AHFKJ5DRXY_Project_11969_E_Third-Transcriptome_IGO_11969_E_3_count_cellranger -o DIANA_0453_AHFKJ5DRXY_Project_11969_E_Third-Transcriptome_IGO_11969_E_3_count_cellranger.out /igo/work/nabors/tools/cellranger-7.0.0/cellranger count --id=Sample_Third-Transcriptome_IGO_11969_E_3__count --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-mm10-2020-A --fastqs=/igo/staging/FASTQ/DIANA_0450_AH3JL3DSX3/Project_11969_E/Sample_Third-Transcriptome_IGO_11969_E_3,/igo/staging/FASTQ/DIANA_0454_BH555MDMXY/Project_11969_E/Sample_Third-Transcriptome_IGO_11969_E_3 --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200",
-    "bsub -J DIANA_0453_AHFKJ5DRXY_Project_11969_E_Second_IGO_11969_E_2_count_cellranger -o DIANA_0453_AHFKJ5DRXY_Project_11969_E_Second_IGO_11969_E_2_count_cellranger.out /igo/work/nabors/tools/cellranger-7.0.0/cellranger count --id=Sample_Second_IGO_11969_E_2__count --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-mm10-2020-A --fastqs=/igo/staging/FASTQ/DIANA_0453_AHFKJ5DRXY/Project_11969_E/Sample_Second_IGO_11969_E_2,/igo/staging/FASTQ/DIANA_0450_AH3JL3DSX3/Project_11969_E/Sample_Second_IGO_11969_E_2 --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"]
+    test_result = ["bsub -J DIANA_0453_AHFKJ5DRXY_Project_06265_AG_06265_8869_1_IGO_06265_AG_3_count_cellranger -o DIANA_0453_AHFKJ5DRXY_Project_06265_AG_06265_8869_1_IGO_06265_AG_3_count_cellranger.out /igo/work/nabors/tools/cellranger-8.0.0/cellranger count --id=Sample_06265_8869_1_IGO_06265_AG_3__count --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-GRCh38-2020-A --fastqs=/igo/staging/FASTQ/DIANA_0453_AHFKJ5DRXY/Project_06265_AG/Sample_06265_8869_1_IGO_06265_AG_3 --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200",   
+    "bsub -J DIANA_0453_AHFKJ5DRXY_Project_11969_E_Third-Transcriptome_IGO_11969_E_3_count_cellranger -o DIANA_0453_AHFKJ5DRXY_Project_11969_E_Third-Transcriptome_IGO_11969_E_3_count_cellranger.out /igo/work/nabors/tools/cellranger-8.0.0/cellranger count --id=Sample_Third-Transcriptome_IGO_11969_E_3__count --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-mm10-2020-A --fastqs=/igo/staging/FASTQ/DIANA_0450_AH3JL3DSX3/Project_11969_E/Sample_Third-Transcriptome_IGO_11969_E_3,/igo/staging/FASTQ/DIANA_0454_BH555MDMXY/Project_11969_E/Sample_Third-Transcriptome_IGO_11969_E_3 --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200",
+    "bsub -J DIANA_0453_AHFKJ5DRXY_Project_11969_E_Second_IGO_11969_E_2_count_cellranger -o DIANA_0453_AHFKJ5DRXY_Project_11969_E_Second_IGO_11969_E_2_count_cellranger.out /igo/work/nabors/tools/cellranger-8.0.0/cellranger count --id=Sample_Second_IGO_11969_E_2__count --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-mm10-2020-A --fastqs=/igo/staging/FASTQ/DIANA_0453_AHFKJ5DRXY/Project_11969_E/Sample_Second_IGO_11969_E_2,/igo/staging/FASTQ/DIANA_0450_AH3JL3DSX3/Project_11969_E/Sample_Second_IGO_11969_E_2 --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"]
     
     for i in range (3): 
         assert(cmd[i] == test_result[i])
 
-def testCellranger_get_SCRI_tag():
-    sample1 = "SD-1680_Patient_D_nucseq_H_VDJ_IGO_12437_AN_5"
-    sample2 = "SDtest_IGO_12437_AN_4"
-    sample3 = "SDtest_GE_IGO_12437_AN_4"
-
-    tag_genome1 = cellranger.get_SCRI_tag(sample1)
-    tag_genome2 = cellranger.get_SCRI_tag(sample2)
-    tag_genome3 = cellranger.get_SCRI_tag(sample3)
-    
-    assert(tag_genome1 == ("vdj", "Human"))
-    assert(tag_genome2 == ("Skip", "na"))
-    assert(tag_genome3 == ("Skip", "na"))
-
 def testCellranger_get_tag():
     assert(cellranger.get_tag("10X_genomic") == "Skip")
     assert(cellranger.get_tag("10X_Genomics_GeneExpression-3") == "count")

From 85dd23199bc5e63f63a9968a0a20ee5062acdefe Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Mon, 1 Apr 2024 09:35:42 -0400
Subject: [PATCH 33/87] refactor cellranger code

---
 demux_run_dag.py        |   2 +-
 scripts/cellranger.py   | 351 ++++++++++------------------------------
 stats_by_project_dag.py |   2 +-
 3 files changed, 86 insertions(+), 269 deletions(-)

diff --git a/demux_run_dag.py b/demux_run_dag.py
index 09f010a..802769d 100644
--- a/demux_run_dag.py
+++ b/demux_run_dag.py
@@ -190,7 +190,7 @@ def stats(ds, **kwargs):
                 # step 2, start cell ranger based on recipe/barcode, check whether multiple fastq files existing
                 # trim sequencer_and_run if postfix like _10X exsiting
                 sequencer_and_run_prefix = "_".join(sequencer_and_run.split("_")[0:3])
-                scripts.cellranger.launch_cellranger(sample_sheet, sequencer_and_run_prefix)
+                scripts.cellranger.launch_cellranger_by_sample_sheet(sample_sheet, sequencer_and_run_prefix)
 
                 # add DONE file when all the 10X pipeline finished, -K to wait until finish
                 cmd = 'bsub -K -J wait_stats_done_for_{} -w \"ended(create_json___{}*)\" touch /igo/stats/CELLRANGER/{}/DONE'.format(sequencer_and_run_prefix, sequencer_and_run_prefix, sequencer_and_run_prefix)
diff --git a/scripts/cellranger.py b/scripts/cellranger.py
index ca87b32..5c5149e 100644
--- a/scripts/cellranger.py
+++ b/scripts/cellranger.py
@@ -1,100 +1,19 @@
 # launch cell ranger pipeline (GE, VDJ, ATAC....) for 10X samples by recipe
-# put result in /igo/stats/CELLRANGER/<run_ID>
-
 import pandas as pd
 import re
 import sys
 import os
 import json
 import subprocess
-from os.path import join
-from os.path import basename
-from os.path import abspath
-from os.path import isdir
-from subprocess import call
+import os.path
 import scripts.get_sequencing_read_data
 import scripts.cellranger_spatial
+import scripts.cellranger_config as CONFIG
 
 """
 input: sample_sheet object(for sample list and essential info), sequencer_and_run(for stats folder and fastq file location)
 output: running cmd for cellranger by sample
-"""
-
-# work folder
-STATS_AREA = "/igo/stats/CELLRANGER/"
-
-# config info 
-ACCESS = 0o775
-config_dict = {
-    "count": {
-        "tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger count ",
-        "genome": {
-            "Human": " --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-GRCh38-2020-A ",
-            "Mouse": " --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-mm10-2020-A "
-        }
-    },
-    "vdj": {
-        "tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger vdj ",
-        "genome": {
-            "Human": " --reference=/igo/work/genomes/10X_Genomics/VDJ/refdata-cellranger-vdj-GRCh38-alts-ensembl-7.0.0 ",
-            "Mouse": " --reference=/igo/work/genomes/10X_Genomics/VDJ/refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0 "
-        }
-    },
-    "atac_count": {
-        "tool": " /igo/work/nabors/tools/cellranger-atac-2.1.0/cellranger-atac count ",
-        "genome": {
-            "Human": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-GRCh38-1.0.1 ",
-            "Mouse": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-mm10-1.1.0 "
-        }
-    },
-    "cnv": {
-        "tool": " /igo/work/nabors/tools/cellranger-dna-1.1.0/cellranger-dna cnv ",
-        "genome": {
-            "Human": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCh38-1.0.0 ",
-            "Mouse": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCm38-1.0.0 "
-        }
-    },
-    "multi": {
-        "tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger multi "
-    },
-    "arc": {
-        "tool": " /igo/work/bin/cellranger-arc-2.0.2/cellranger-arc count ",
-        "genome": {
-            "Human": " --reference=/igo/work/nabors/genomes/10X_Genomics/ARC/refdata-cellranger-arc-GRCh38-2020-A-2.0.0 ",
-            "Mouse": " --reference=/igo/work/nabors/genomes/10X_Genomics/ARC/refdata-cellranger-arc-mm10-2020-A-2.0.0 "
-        }
-    },
-    "spaceranger": {
-        "tool": " /igo/work/nabors/tools/spaceranger-3.0.0/spaceranger count ",
-        "genome": {
-            "Human": " --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-GRCh38-2020-A ",
-            "Mouse": " --transcriptome=/igo/work/nabors/genomes/10X_Genomics/spatial_gex/refdata-gex-mm10-2020-A "
-        },
-        "probe": {
-            "Human": "/igo/work/nabors/genomes/10X_Genomics/spatial_gex/Visium_Human_Transcriptome_Probe_Set_v1.0_GRCh38-2020-A.csv",
-            "Human_CytAssist": "/igo/work/genomes/10X_Genomics/spaceranger/Visium_Human_Transcriptome_Probe_Set_v2.0_GRCh38-2020-A.csv",
-            "Mouse": "/igo/work/nabors/tools/spaceranger-3.0.0/external/tenx_feature_references/targeted_panels/Visium_Mouse_Transcriptome_Probe_Set_v1.0_mm10-2020-A.csv"
-        }
-    }
-}
-
-# cellranger command line options
-OPTIONS = " --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"
-
-# 10X recipe list for different pipelines
-COUNT_FLAVORS = ["10X_Genomics_GeneExpression-3", "10X_Genomics_GeneExpression-5"]
-VDJ_FLAVORS = ["10X_Genomics_VDJ"]
-ATAC_FLAVORS = ["10X_Genomics_ATAC"]
-CNV_FLAVORS = ["10X_Genomics_CNV"]
-ARC_FLAVORS = ["10X_Genomics_Multiome", "10X_Genomics_Multiome_ATAC", "10X_Genomics_Multiome_GeneExpression"]
-SPATIAL_FLAVORS = ["10X_Genomics_Visium"]
 
-# we do not want to PROCESS SAIL (15500) or SCRI (12437) projects
-SCRI = "12437"
-SAIL = "15500"
-DO_NOT_PROCESS = [SCRI, SAIL]
-
-"""
 steps:
 1. check whether there is previous fastq existing under /igo/staging/FASTQ (find_fastq_file)
 2. get tag by recipe, if recipe not in the list above, skip for now (get_tag)
@@ -134,58 +53,30 @@ def find_fastq_file(sample_ID_list):
 
 def get_tag(recipe):
     tag = "Skip"
-    if recipe in COUNT_FLAVORS:
+    if recipe in CONFIG.COUNT_FLAVORS:
         tag = "count"
-    if recipe in CNV_FLAVORS:
+    if recipe in CONFIG.CNV_FLAVORS:
         tag = "cnv"    
-    if recipe in VDJ_FLAVORS:
+    if recipe in CONFIG.VDJ_FLAVORS:
         tag = "vdj"
-    if recipe in ATAC_FLAVORS:
+    if recipe in CONFIG.ATAC_FLAVORS:
         tag = "atac_count"
-    if recipe in ARC_FLAVORS:
+    if recipe in CONFIG.ARC_FLAVORS:
         tag = "arc"
-    if recipe in SPATIAL_FLAVORS:
+    if recipe in CONFIG.SPATIAL_FLAVORS:
         tag = "spaceranger"
     return tag
 
-# return tag and genome according to sample_ID for SCRI samples, all SCRI samples are starting with Project_12437
-# eg: SD-1680_Patient_D_nucseq_H_VDJ_IGO_12437_AN_5 will given tag as vdj, genome as Human
-# eg: SDtest_IGO_12437_AN_4 will given tag as Skip, genome as na
-# _H: Human, _M: Mouse
-# _VDJ: vdj, _GE: count, _ATAC: "atac_count"
-def get_SCRI_tag(sample_ID):
-    tag_orig = sample_ID.split("_")[sample_ID.split("_").index("IGO") - 1]
-    tag = "Skip"
-    if tag_orig == "VDJ":
-        tag = "vdj"
-    if tag_orig == "GE":
-        tag = "count"
-    if tag_orig == "ATAC":
-        tag = "atac_count"
-    
-    genome = "na"
-    if tag != "Skip":
-        genome_orig = sample_ID.split("_")[sample_ID.split("_").index("IGO") - 2]
-        if genome_orig == "H":
-            genome = "Human"
-        if genome_orig == "M":
-            genome = "Mouse"
-    # if genome parameter couldn't detected, set tag back to skip
-    if genome == "na":
-        tag = "Skip"
-
-    return tag, genome
-
 def generate_cellranger_cmd(sample_ID, tag, genome, fastq_file_path, sequencer_and_run):
-    tool = config_dict[tag]["tool"]
-    transcriptome = config_dict[tag]["genome"][genome]
+    tool = CONFIG.config_dict[tag]["tool"]
+    transcriptome = CONFIG.config_dict[tag]["genome"][genome]
     project_ID = "Project_" + "_".join(sample_ID.split("_")[sample_ID.split("_").index("IGO") + 1:-1])
-    cellranger_cmd = "{}--id=Sample_{}__{}".format(tool, sample_ID, tag) + transcriptome + "--fastqs=" + ",".join(fastq_file_path) + OPTIONS
+    cellranger_cmd = "{}--id=Sample_{}__{}".format(tool, sample_ID, tag) + transcriptome + "--fastqs=" + ",".join(fastq_file_path) + CONFIG.OPTIONS
     job_name = "{}_{}_{}_{}_cellranger".format(sequencer_and_run, project_ID, sample_ID, tag)
     bsub_cmd = "bsub -J {} -o {}.out{}".format(job_name, job_name, cellranger_cmd) 
     return bsub_cmd
         
-def create_json(send_json, sequencer_and_run, project, tag, work_area):  
+def create_json(send_json, sequencer_and_run, project, work_area):  
     job_id = sequencer_and_run + "_" + project            
     json_data_file = "cellranger_json___" + sequencer_and_run + "__" + project + ".json"
     with open(json_data_file, "w") as jfile:
@@ -250,197 +141,123 @@ def multiome_valid(fastq_list):
     
     return [is_valid, ge_list, atac_list]
 
-# Main function: launch cellranger cmd by given samplesheet object and sequencer_and_run
-def launch_cellranger(sample_sheet, sequencer_and_run):
-    # get parameters from sample_sheet
-    # dictionary of Sample_ID->Project
-    sample_project_dict = pd.Series(sample_sheet.df_ss_data["Sample_Project"].values,index=sample_sheet.df_ss_data["Sample_ID"]).to_dict()
-    # dictionary of project->sample_ID
-    project_sample_dict = {}
-    for sample_ID, project_ID in sample_project_dict.items():
-        if project_ID in project_sample_dict.keys():
-            project_sample_dict[project_ID].append(sample_ID)
-        else:
-            project_sample_dict[project_ID] = [sample_ID]
-    # dictionary of sample_ID->recipe
-    sample_recipe_dict = pd.Series(sample_sheet.df_ss_data["Sample_Well"].values,index=sample_sheet.df_ss_data["Sample_ID"]).to_dict()
-    # dictionary of sample_ID->genome
-    sample_genome_dict = pd.Series(sample_sheet.df_ss_data["Sample_Plate"].values,index=sample_sheet.df_ss_data["Sample_ID"]).to_dict()
-    # dictionary of sample_ID->fastq_list
-    sample_ID_list = list(sample_project_dict.keys())
-    sample_fastqfile_dict = find_fastq_file(sample_ID_list)
-
-    for project in project_sample_dict.keys():
-        send_json = {}
-        send_json["samples"] = []
-        # CREATE RUN FOLDER AND PROJECT FOLDER IF NOT ALREADY THERE
-        os.chdir(STATS_AREA)
-        runs = next(os.walk("."))[1]
-        if sequencer_and_run not in runs:
-            os.mkdir(sequencer_and_run, ACCESS)
-                    
-        stats_and_run = STATS_AREA + sequencer_and_run
-        os.chdir(stats_and_run)
-        projects = next(os.walk("."))[1]
-        if project not in projects:
-            os.mkdir(project, ACCESS)
-        work_area = stats_and_run + "/" + project + "/" 
-        # GO TO project ID LOCATION to start cellranger command
-        os.chdir(work_area)
-
-        
-        # SCRI or SAIL samples don't need to be pushed onto qc website
-        if (not any(prj in project for prj in DO_NOT_PROCESS)):
-            sample_list = project_sample_dict[project]
-            # call cellranger for each sample and append info to json dict
-            for sample in sample_list:
-                if sample_genome_dict[sample] != "Human" and sample_genome_dict[sample] != "Mouse":
-                    sample_genome_dict[sample] = "Mouse"
-                tag = get_tag(sample_recipe_dict[sample])
-                # if recipe within the tool being set up, lanuch cellranger
-                if tag == "arc":
-                    validation = multiome_valid(sample_fastqfile_dict[sample])
-                    if validation[0] == "YES":
-                        create_library_csv_file(validation[1], validation[2], sample)
-                        tool = config_dict[tag]["tool"]
-                        transcriptome = config_dict[tag]["genome"][sample_genome_dict[sample]]
-                        cmd = "{}--id=Sample_{}{}".format(tool, sample, transcriptome) + "--libraries={}Sample_{}.csv".format(work_area, sample) + OPTIONS
-                        bsub_cmd = "bsub -J {}_{}_{}_ARC -o {}_ARC.out{}".format(sequencer_and_run, project, sample, sample, cmd)
-                        print(bsub_cmd)
-                        subprocess.run(bsub_cmd, shell=True)
-                    else:
-                        print("Multiome sample set not complete yet")
-                elif tag == "spaceranger":
-                    sample_info = scripts.cellranger_spatial.Spatial_sample(sample, project)
-                    if sample_info.tiff_image == "EMPTY":
-                        print("check tif image")
-                    else:
-                        tool = config_dict[tag]["tool"]
-                        transcriptome = config_dict[tag]["genome"][sample_genome_dict[sample]]
-                        cmd = "{}--id=Sample_{}{}".format(tool, sample, transcriptome) + "--fastqs=" + ",".join(sample_fastqfile_dict[sample]) + " --image={} --slide={} --area={}".format(sample_info.tiff_image, sample_info.chip_id, sample_info.chip_position)
-                        
-                        if sample_info.cytAssist:
-                            cmd = "{}--id=Sample_{}{}".format(tool, sample, transcriptome) + "--fastqs=" + ",".join(sample_fastqfile_dict[sample]) + " --cytaimage={} --slide={} --area={}".format(sample_info.tiff_image, sample_info.chip_id, sample_info.chip_position)
-                            if sample_genome_dict[sample] == "Human":
-                                probe = config_dict[tag]["probe"]["Human_CytAssist"]
-                                cmd = cmd + " --probe-set={}".format(probe)
-                            elif sample_genome_dict[sample] == "Mouse":
-                                probe = config_dict[tag]["probe"][sample_genome_dict[sample]]
-                                cmd = cmd + " --probe-set={}".format(probe)
-                                
-                        elif sample_info.preservation == "FFPE":
-                            probe = config_dict[tag]["probe"][sample_genome_dict[sample]]
-                            cmd = cmd + " --probe-set={}".format(probe)
-                        
-                        # if there is manual alignment json file availabe, add that to the cmd
-                        if sample_info.json != "EMPTY":
-                            cmd = cmd + " --loupe-alignment={}".format(sample_info.json)
-
-                        bsub_cmd = "bsub -J {}_{}_{}_SPATIAL -o {}_SPATIAL.out{}{}".format(sequencer_and_run, project, sample, sample, cmd, OPTIONS)
-                        print(bsub_cmd)
-                        subprocess.run(bsub_cmd, shell=True)
-                
-                elif tag != "Skip":
-                    cmd = generate_cellranger_cmd(sample, tag, sample_genome_dict[sample], sample_fastqfile_dict[sample], sequencer_and_run)
-                    print(cmd)
-                    subprocess.run(cmd, shell=True)
-                    send_json["samples"].append({"sample":"Sample_" + sample, "type":tag, "project":project, "run":sequencer_and_run})
-            if send_json["samples"]:
-                create_json(send_json, sequencer_and_run, project, tag, work_area)
-        else:
-            sample_list = project_sample_dict[project]
-            # call cellranger for each sample
-            for sample in sample_list:
-                tag, genome = get_SCRI_tag(sample)
-                # if recipe within the tool being set up, lanuch cellranger
-                if tag != "Skip" and genome != "na":
-                    cmd = generate_cellranger_cmd(sample, tag, genome, sample_fastqfile_dict[sample], sequencer_and_run)
-                    print(cmd)
-                    subprocess.run(cmd, shell=True)
-
-# lanuch cellranger by given project_directory eg: /igo/staging/FASTQ/RUTH_0141_AH27NGDSX5/Project_13586_B
-def lanuch_by_project(project_directory, recipe, species):
-    # get sample_ID list
-    sample_list_ori = os.listdir(project_directory)
-    sample_list = []
-    for sample in sample_list_ori:
-        # remove Sample_ prefix
-        sample_list.append(sample[7:])
-    # get project and run info from project_directory
-    project = project_directory.split("/")[5]
-    sequencer_and_run = project_directory.split("/")[4]
-    sample_fastqfile_dict = find_fastq_file(sample_list)
-    tag = get_tag(recipe)
+# lanuch cellranger per project
+def lanuch_by_project(sequencer_and_run, project, sample_id_list, sample_genome_dict, sample_recipe_dict):
+    sample_fastqfile_dict = find_fastq_file(sample_id_list)
     send_json = {}
     send_json["samples"] = []
     # CREATE RUN FOLDER AND PROJECT FOLDER IF NOT ALREADY THERE
-    os.chdir(STATS_AREA)
+    os.chdir(CONFIG.STATS_AREA)
     runs = next(os.walk("."))[1]
     if sequencer_and_run not in runs:
-        os.mkdir(sequencer_and_run, ACCESS)
-                    
-    stats_and_run = STATS_AREA + sequencer_and_run
+        os.mkdir(sequencer_and_run, CONFIG.ACCESS)
+                
+    stats_and_run = CONFIG.STATS_AREA + sequencer_and_run
     os.chdir(stats_and_run)
     projects = next(os.walk("."))[1]
     if project not in projects:
-        os.mkdir(project, ACCESS)
+        os.mkdir(project, CONFIG.ACCESS)
     work_area = stats_and_run + "/" + project + "/" 
     # GO TO project ID LOCATION to start cellranger command
     os.chdir(work_area)
 
     # call cellranger for each sample and append info to json dict
-    for sample in sample_list:
+    for sample in sample_id_list:
+        if sample_genome_dict[sample] != "Human" and sample_genome_dict[sample] != "Mouse":
+            sample_genome_dict[sample] = "Mouse"
+        tag = get_tag(sample_recipe_dict[sample])
         # if recipe within the tool being set up, lanuch cellranger
         if tag == "arc":
             validation = multiome_valid(sample_fastqfile_dict[sample])
             if validation[0] == "YES":
                 create_library_csv_file(validation[1], validation[2], sample)
-                tool = config_dict[tag]["tool"]
-                transcriptome = config_dict[tag]["genome"][species]
-                cmd = "{}--id=Sample_{}{}".format(tool, sample, transcriptome) + "--libraries={}/Sample_{}.csv".format(work_area, sample) + OPTIONS
+                tool = CONFIG.config_dict[tag]["tool"]
+                transcriptome = CONFIG.config_dict[tag]["genome"][sample_genome_dict[sample]]
+                cmd = "{}--id=Sample_{}{}".format(tool, sample, transcriptome) + "--libraries={}Sample_{}.csv".format(work_area, sample) + OPTIONS
                 bsub_cmd = "bsub -J {}_{}_{}_ARC -o {}_ARC.out{}".format(sequencer_and_run, project, sample, sample, cmd)
                 print(bsub_cmd)
                 subprocess.run(bsub_cmd, shell=True)
             else:
-                print("Multiome sample not finished yet")
-                print(validation)
+                print("Multiome sample set not complete yet")
         elif tag == "spaceranger":
             sample_info = scripts.cellranger_spatial.Spatial_sample(sample, project)
             if sample_info.tiff_image == "EMPTY":
-                print("check tif image")
+                print("check tif image for sample {}".format(sample))
             else:
-                tool = config_dict[tag]["tool"]
-                transcriptome = config_dict[tag]["genome"][species]
+                tool = CONFIG.config_dict[tag]["tool"]
+                transcriptome = CONFIG.config_dict[tag]["genome"][sample_genome_dict[sample]]
                 cmd = "{}--id=Sample_{}{}".format(tool, sample, transcriptome) + "--fastqs=" + ",".join(sample_fastqfile_dict[sample]) + " --image={} --slide={} --area={}".format(sample_info.tiff_image, sample_info.chip_id, sample_info.chip_position)
                 
                 if sample_info.cytAssist:
                     cmd = "{}--id=Sample_{}{}".format(tool, sample, transcriptome) + "--fastqs=" + ",".join(sample_fastqfile_dict[sample]) + " --cytaimage={} --slide={} --area={}".format(sample_info.tiff_image, sample_info.chip_id, sample_info.chip_position)
-                    if species == "Human":
-                        probe = config_dict[tag]["probe"]["Human_CytAssist"]
+                    if sample_genome_dict[sample] == "Human":
+                        probe = CONFIG.config_dict[tag]["probe"]["Human_CytAssist"]
                         cmd = cmd + " --probe-set={}".format(probe)
-                    elif species == "Mouse":
-                        probe = config_dict[tag]["probe"][species]
+                    elif sample_genome_dict[sample] == "Mouse":
+                        probe = CONFIG.config_dict[tag]["probe"][sample_genome_dict[sample]]
                         cmd = cmd + " --probe-set={}".format(probe)
                         
                 elif sample_info.preservation == "FFPE":
-                    probe = config_dict[tag]["probe"][species]
+                    probe = CONFIG.config_dict[tag]["probe"][sample_genome_dict[sample]]
                     cmd = cmd + " --probe-set={}".format(probe)
                 
                 # if there is manual alignment json file availabe, add that to the cmd
                 if sample_info.json != "EMPTY":
                     cmd = cmd + " --loupe-alignment={}".format(sample_info.json)
-                
-                bsub_cmd = "bsub -J {}_{}_{}_SPATIAL -o {}_SPATIAL.out{}{}".format(sequencer_and_run, project, sample, sample, cmd, OPTIONS)
+
+                bsub_cmd = "bsub -J {}_{}_{}_SPATIAL -o {}_SPATIAL.out{}{}".format(sequencer_and_run, project, sample, sample, cmd, CONFIG.OPTIONS)
                 print(bsub_cmd)
                 subprocess.run(bsub_cmd, shell=True)
-
+        
         elif tag != "Skip":
-            cmd = generate_cellranger_cmd(sample, tag, species, sample_fastqfile_dict[sample], sequencer_and_run)
+            cmd = generate_cellranger_cmd(sample, tag, sample_genome_dict[sample], sample_fastqfile_dict[sample], sequencer_and_run)
             print(cmd)
             subprocess.run(cmd, shell=True)
             send_json["samples"].append({"sample":"Sample_" + sample, "type":tag, "project":project, "run":sequencer_and_run})
+    
     if send_json["samples"]:
-        create_json(send_json, sequencer_and_run, project, tag, work_area)
+        create_json(send_json, sequencer_and_run, project, work_area)
+
+# Main function: launch cellranger cmd by given samplesheet object and sequencer_and_run
+def launch_cellranger_by_sample_sheet(sample_sheet, sequencer_and_run):
+    # get parameters from sample_sheet
+    # dictionary of Sample_ID->Project
+    sample_project_dict = pd.Series(sample_sheet.df_ss_data["Sample_Project"].values,index=sample_sheet.df_ss_data["Sample_ID"]).to_dict()
+    # dictionary of project->sample_ID
+    project_sample_dict = {}
+    for sample_ID, project_ID in sample_project_dict.items():
+        if project_ID in project_sample_dict.keys():
+            project_sample_dict[project_ID].append(sample_ID)
+        else:
+            project_sample_dict[project_ID] = [sample_ID]
+    # dictionary of sample_ID->recipe
+    sample_recipe_dict = pd.Series(sample_sheet.df_ss_data["Sample_Well"].values,index=sample_sheet.df_ss_data["Sample_ID"]).to_dict()
+    # dictionary of sample_ID->genome
+    sample_genome_dict = pd.Series(sample_sheet.df_ss_data["Sample_Plate"].values,index=sample_sheet.df_ss_data["Sample_ID"]).to_dict()
+    # launch cellranger cmd for each project
+    for project in project_sample_dict.keys():
+        # SCRI or SAIL samples don't need to run cellranger
+        if (not any(prj in project for prj in CONFIG.DO_NOT_PROCESS)):
+            sample_list = project_sample_dict[project]
+            lanuch_by_project(sequencer_and_run, project, sample_list, sample_genome_dict, sample_recipe_dict)
+
+def launch_cellranger_by_project_location(project_directory, recipe, species):
+    # get sample_ID list
+    sample_list_ori = os.listdir(project_directory)
+    sample_list = []
+    for sample in sample_list_ori:
+        # remove Sample_ prefix
+        sample_list.append(sample[7:])
+    # get project and run info from project_directory
+    project = project_directory.split("/")[5]
+    sequencer_and_run = project_directory.split("/")[4]
+    sample_genome_dict = {}
+    sample_recipe_dict = {}
+    for sample in sample_list:
+        sample_genome_dict[sample] = species
+        sample_recipe_dict[sample] = recipe
+
+    lanuch_by_project(sequencer_and_run, project, sample_list, sample_genome_dict, sample_recipe_dict)
 
 
 if __name__ == '__main__':
@@ -450,4 +267,4 @@ def lanuch_by_project(project_directory, recipe, species):
     project_directory = sys.argv[1]
     recipe = sys.argv[2]
     species = sys.argv[3]
-    lanuch_by_project(project_directory, recipe, species)
+    launch_cellranger_by_project_location(project_directory, recipe, species)
diff --git a/stats_by_project_dag.py b/stats_by_project_dag.py
index b56366d..2f21d2a 100644
--- a/stats_by_project_dag.py
+++ b/stats_by_project_dag.py
@@ -57,7 +57,7 @@ def run_stats(ds, **kwargs):
                 subprocess.run(cmd, shell=True)
 
         elif "10X_" in recipe:
-            scripts.cellranger.lanuch_by_project(project_directory, recipe, species)
+            scripts.cellranger.launch_cellranger_by_project_location(project_directory, recipe, species)
         elif "ONT" in recipe:
             cmd = "bsub -J ont_stats_{} -n 16 -M 16 /igo/work/nabors/tools/venvpy3/bin/python /igo/work/igo/igo-demux/scripts/ont_stats.py {}".format(project_id, project_directory)
             print(cmd)

From a1ab25e88065a39aea4926f66ed81f6f1fb42ffa Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Mon, 1 Apr 2024 09:39:55 -0400
Subject: [PATCH 34/87] Update cellranger.py

---
 scripts/cellranger.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/cellranger.py b/scripts/cellranger.py
index 5c5149e..d5ed8fa 100644
--- a/scripts/cellranger.py
+++ b/scripts/cellranger.py
@@ -173,7 +173,7 @@ def lanuch_by_project(sequencer_and_run, project, sample_id_list, sample_genome_
                 create_library_csv_file(validation[1], validation[2], sample)
                 tool = CONFIG.config_dict[tag]["tool"]
                 transcriptome = CONFIG.config_dict[tag]["genome"][sample_genome_dict[sample]]
-                cmd = "{}--id=Sample_{}{}".format(tool, sample, transcriptome) + "--libraries={}Sample_{}.csv".format(work_area, sample) + OPTIONS
+                cmd = "{}--id=Sample_{}{}".format(tool, sample, transcriptome) + "--libraries={}Sample_{}.csv".format(work_area, sample) + CONFIG.OPTIONS
                 bsub_cmd = "bsub -J {}_{}_{}_ARC -o {}_ARC.out{}".format(sequencer_and_run, project, sample, sample, cmd)
                 print(bsub_cmd)
                 subprocess.run(bsub_cmd, shell=True)

From 18325f9dda1a42d464f81b0f0e92754658be08f6 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Tue, 2 Apr 2024 11:09:20 -0400
Subject: [PATCH 35/87] add create bam option for new version

---
 scripts/cellranger_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/cellranger_config.py b/scripts/cellranger_config.py
index 801ea70..73e20ac 100644
--- a/scripts/cellranger_config.py
+++ b/scripts/cellranger_config.py
@@ -58,7 +58,7 @@
 }
 
 # cellranger command line options
-OPTIONS = " --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"
+OPTIONS = " --create-bam=true --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"
 
 # 10X recipe list for different pipelines
 COUNT_FLAVORS = ["10X_Genomics_GeneExpression-3", "10X_Genomics_GeneExpression-5"]

From 43aab4862dfed7c6339ecfbd28cf8c6bdddfd4af Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Tue, 2 Apr 2024 11:12:53 -0400
Subject: [PATCH 36/87] Update test_scripts.py

---
 test_scripts.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test_scripts.py b/test_scripts.py
index fe372d6..8feba1f 100644
--- a/test_scripts.py
+++ b/test_scripts.py
@@ -21,9 +21,9 @@ def testCellranger_generate_cellranger_cmd():
         if genome_dict[sample] != "Human" and genome_dict[sample] != "Mouse":
             genome_dict[sample] = "Mouse"
         cmd.append(cellranger.generate_cellranger_cmd(sample, "count", genome_dict[sample], fastq_file_list_dict[sample], "DIANA_0453_AHFKJ5DRXY"))
-    test_result = ["bsub -J DIANA_0453_AHFKJ5DRXY_Project_06265_AG_06265_8869_1_IGO_06265_AG_3_count_cellranger -o DIANA_0453_AHFKJ5DRXY_Project_06265_AG_06265_8869_1_IGO_06265_AG_3_count_cellranger.out /igo/work/nabors/tools/cellranger-8.0.0/cellranger count --id=Sample_06265_8869_1_IGO_06265_AG_3__count --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-GRCh38-2020-A --fastqs=/igo/staging/FASTQ/DIANA_0453_AHFKJ5DRXY/Project_06265_AG/Sample_06265_8869_1_IGO_06265_AG_3 --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200",   
-    "bsub -J DIANA_0453_AHFKJ5DRXY_Project_11969_E_Third-Transcriptome_IGO_11969_E_3_count_cellranger -o DIANA_0453_AHFKJ5DRXY_Project_11969_E_Third-Transcriptome_IGO_11969_E_3_count_cellranger.out /igo/work/nabors/tools/cellranger-8.0.0/cellranger count --id=Sample_Third-Transcriptome_IGO_11969_E_3__count --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-mm10-2020-A --fastqs=/igo/staging/FASTQ/DIANA_0450_AH3JL3DSX3/Project_11969_E/Sample_Third-Transcriptome_IGO_11969_E_3,/igo/staging/FASTQ/DIANA_0454_BH555MDMXY/Project_11969_E/Sample_Third-Transcriptome_IGO_11969_E_3 --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200",
-    "bsub -J DIANA_0453_AHFKJ5DRXY_Project_11969_E_Second_IGO_11969_E_2_count_cellranger -o DIANA_0453_AHFKJ5DRXY_Project_11969_E_Second_IGO_11969_E_2_count_cellranger.out /igo/work/nabors/tools/cellranger-8.0.0/cellranger count --id=Sample_Second_IGO_11969_E_2__count --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-mm10-2020-A --fastqs=/igo/staging/FASTQ/DIANA_0453_AHFKJ5DRXY/Project_11969_E/Sample_Second_IGO_11969_E_2,/igo/staging/FASTQ/DIANA_0450_AH3JL3DSX3/Project_11969_E/Sample_Second_IGO_11969_E_2 --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"]
+    test_result = ["bsub -J DIANA_0453_AHFKJ5DRXY_Project_06265_AG_06265_8869_1_IGO_06265_AG_3_count_cellranger -o DIANA_0453_AHFKJ5DRXY_Project_06265_AG_06265_8869_1_IGO_06265_AG_3_count_cellranger.out /igo/work/nabors/tools/cellranger-8.0.0/cellranger count --id=Sample_06265_8869_1_IGO_06265_AG_3__count --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-GRCh38-2020-A --fastqs=/igo/staging/FASTQ/DIANA_0453_AHFKJ5DRXY/Project_06265_AG/Sample_06265_8869_1_IGO_06265_AG_3 --create-bam=true --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200",   
+    "bsub -J DIANA_0453_AHFKJ5DRXY_Project_11969_E_Third-Transcriptome_IGO_11969_E_3_count_cellranger -o DIANA_0453_AHFKJ5DRXY_Project_11969_E_Third-Transcriptome_IGO_11969_E_3_count_cellranger.out /igo/work/nabors/tools/cellranger-8.0.0/cellranger count --id=Sample_Third-Transcriptome_IGO_11969_E_3__count --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-mm10-2020-A --fastqs=/igo/staging/FASTQ/DIANA_0450_AH3JL3DSX3/Project_11969_E/Sample_Third-Transcriptome_IGO_11969_E_3,/igo/staging/FASTQ/DIANA_0454_BH555MDMXY/Project_11969_E/Sample_Third-Transcriptome_IGO_11969_E_3 --create-bam=true --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200",
+    "bsub -J DIANA_0453_AHFKJ5DRXY_Project_11969_E_Second_IGO_11969_E_2_count_cellranger -o DIANA_0453_AHFKJ5DRXY_Project_11969_E_Second_IGO_11969_E_2_count_cellranger.out /igo/work/nabors/tools/cellranger-8.0.0/cellranger count --id=Sample_Second_IGO_11969_E_2__count --transcriptome=/igo/work/nabors/genomes/10X_Genomics/GEX/refdata-gex-mm10-2020-A --fastqs=/igo/staging/FASTQ/DIANA_0453_AHFKJ5DRXY/Project_11969_E/Sample_Second_IGO_11969_E_2,/igo/staging/FASTQ/DIANA_0450_AH3JL3DSX3/Project_11969_E/Sample_Second_IGO_11969_E_2 --create-bam=true --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"]
     
     for i in range (3): 
         assert(cmd[i] == test_result[i])

From 54128a616ea3b92a886ecbff5ce9f2774fa0fd0d Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Thu, 4 Apr 2024 07:34:49 -0400
Subject: [PATCH 37/87] Update demux_run_dag.py

new script could not find launch_cellranger.  new script is launch_cellranger_by_sample_sheet
---
 demux_run_dag.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demux_run_dag.py b/demux_run_dag.py
index 802769d..7a743f4 100644
--- a/demux_run_dag.py
+++ b/demux_run_dag.py
@@ -180,7 +180,7 @@ def stats(ds, **kwargs):
 
                 # launch cell ranger based on recipe
                 sequencer_and_run_prefix = "_".join(sequencer_and_run.split("_")[0:3])
-                scripts.cellranger.launch_cellranger(sample_sheet, sequencer_and_run_prefix)
+                scripts.cellranger.launch_cellranger_by_sample_sheet(sample_sheet, sequencer_and_run_prefix)
 
             else:
                 # step 1, generate txt files containing total reads and upload to qc website

From e22ea68b06d6438b3c6afd6b34ab7b58fd237229 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Thu, 4 Apr 2024 09:07:43 -0400
Subject: [PATCH 38/87] update cellranger arc path

---
 scripts/cellranger_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/cellranger_config.py b/scripts/cellranger_config.py
index 73e20ac..a45d3c1 100644
--- a/scripts/cellranger_config.py
+++ b/scripts/cellranger_config.py
@@ -36,7 +36,7 @@
         "tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger multi "
     },
     "arc": {
-        "tool": " /igo/work/bin/cellranger-arc-2.0.2/cellranger-arc count ",
+        "tool": " /igo/work/nabors/tools/cellranger-arc-2.0.2/cellranger-arc count ",
         "genome": {
             "Human": " --reference=/igo/work/nabors/genomes/10X_Genomics/ARC/refdata-cellranger-arc-GRCh38-2020-A-2.0.0 ",
             "Mouse": " --reference=/igo/work/nabors/genomes/10X_Genomics/ARC/refdata-cellranger-arc-mm10-2020-A-2.0.0 "

From 189c214e2d55942ca5d907b5f0d62df37e79abc1 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Tue, 9 Apr 2024 14:17:54 -0400
Subject: [PATCH 39/87] CELLRANGER AND PIPELINE directories

moving CELLRANGER and PIPELINE directories from STATS to STAGING directory
---
 demux_run_dag.py              | 2 +-
 scripts/cellranger_config.py  | 2 +-
 scripts/cellranger_multi.py   | 8 ++++----
 scripts/deliver_cellranger.py | 2 +-
 scripts/deliver_pipeline.py   | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/demux_run_dag.py b/demux_run_dag.py
index 7a743f4..12e3733 100644
--- a/demux_run_dag.py
+++ b/demux_run_dag.py
@@ -193,7 +193,7 @@ def stats(ds, **kwargs):
                 scripts.cellranger.launch_cellranger_by_sample_sheet(sample_sheet, sequencer_and_run_prefix)
 
                 # add DONE file when all the 10X pipeline finished, -K to wait until finish
-                cmd = 'bsub -K -J wait_stats_done_for_{} -w \"ended(create_json___{}*)\" touch /igo/stats/CELLRANGER/{}/DONE'.format(sequencer_and_run_prefix, sequencer_and_run_prefix, sequencer_and_run_prefix)
+                cmd = 'bsub -K -J wait_stats_done_for_{} -w \"ended(create_json___{}*)\" touch /igo/staging/CELLRANGER/{}/DONE'.format(sequencer_and_run_prefix, sequencer_and_run_prefix, sequencer_and_run_prefix)
                 print(cmd)
                 subprocess.run(cmd, shell=True)
 
diff --git a/scripts/cellranger_config.py b/scripts/cellranger_config.py
index a45d3c1..0d4a590 100644
--- a/scripts/cellranger_config.py
+++ b/scripts/cellranger_config.py
@@ -1,5 +1,5 @@
 # work folder
-STATS_AREA = "/igo/stats/CELLRANGER/"
+STATS_AREA = "/igo/staging/CELLRANGER/"
 
 # config info 
 ACCESS = 0o775
diff --git a/scripts/cellranger_multi.py b/scripts/cellranger_multi.py
index 583f6a3..3bead5c 100644
--- a/scripts/cellranger_multi.py
+++ b/scripts/cellranger_multi.py
@@ -66,7 +66,7 @@ def find_fastq_file(sample_ID_list):
 DRIVE_LOCATION = "/igo/work/igo/Cellranger_Multi_Config/"
 ORIGIN_DRIVE_LOCATION = "/rtssdc/mohibullahlab/LIMS/LIMS_cellranger_multi/"
 BAMTOFASTQ = "/igo/work/nabors/tools/cellranger-7.0.0/lib/bin/bamtofastq"
-STATS_AREA = "/igo/stats/PIPELINE/"
+STATS_AREA = "/igo/staging/PIPELINE/"
 # endpoint for cellranger multi
 ENDPOINT= "https://igolims.mskcc.org:8443/LimsRest/getTenxSampleInfo?requestId="
 
@@ -157,7 +157,7 @@ def new_config_and_generate_cmd(self):
     # get reads number and sub sample cell number
     def update_info_from_step1(self, fb_project_id):
         # get total reads number for gene expression library
-        reads_file = "/igo/stats/PIPELINE/Project_{}_step1/{}/outs/per_sample_outs/{}/metrics_summary.csv".format(fb_project_id, self.name, list(self.samples.keys())[0])
+        reads_file = "/igo/staging/PIPELINE/Project_{}_step1/{}/outs/per_sample_outs/{}/metrics_summary.csv".format(fb_project_id, self.name, list(self.samples.keys())[0])
         summary_metrix = pd.read_csv(reads_file)
         ind = summary_metrix.index[(summary_metrix["Category"] == "Library") & (summary_metrix["Metric Name"] == "Number of reads") & (summary_metrix["Library Type"] == "Gene Expression") & (summary_metrix["Grouped By"] == "Physical library ID")].tolist()
         reads_number = summary_metrix.iloc[ind[0]]["Metric Value"]
@@ -165,7 +165,7 @@ def update_info_from_step1(self, fb_project_id):
         self.ge_reads_number = reads_number
 
         # update sub sample cell number
-        cell_file = "/igo/stats/PIPELINE/Project_{}_step1/{}/outs/multi/multiplexing_analysis/tag_calls_summary.csv".format(fb_project_id, self.name)
+        cell_file = "/igo/staging/PIPELINE/Project_{}_step1/{}/outs/multi/multiplexing_analysis/tag_calls_summary.csv".format(fb_project_id, self.name)
         cell_matrix = pd.read_csv(cell_file)
         for key, value in self.samples.items():
             if value in cell_matrix["Category"].values:
@@ -286,7 +286,7 @@ def cellragner_ch_vdj(config, file_name, ch_project_ID, project_ID, ge):
     # create bam2fastq cmd per sub sample
     for key in config.sub_sample_info.keys():
         name2 = ge + "_" + key
-        source_bam = "/igo/stats/PIPELINE/Project_{}_step1/{}/outs/per_sample_outs/{}/count/sample_alignments.bam".format(ch_project_ID, ge, key)
+        source_bam = "/igo/staging/PIPELINE/Project_{}_step1/{}/outs/per_sample_outs/{}/count/sample_alignments.bam".format(ch_project_ID, ge, key)
         destination_bam = "{}Project_{}/bamtofastq/{}".format(CONFIG_AREA, project_ID, name2)
         cmd = "bsub -K -J {}_bamtofastq -o {}_bamtofastq.out -n 8 -M 8 {} --reads-per-fastq={} {} {}".format(name2, name2, BAMTOFASTQ, config.ge_reads_number, source_bam, destination_bam)
         print(cmd)
diff --git a/scripts/deliver_cellranger.py b/scripts/deliver_cellranger.py
index 13d80cd..f1c946a 100644
--- a/scripts/deliver_cellranger.py
+++ b/scripts/deliver_cellranger.py
@@ -4,7 +4,7 @@
 
 # given project ID, look through cellranger folder and return a list of path of folders need to copy
 
-CELLRANGER_DIR = '/igo/stats/CELLRANGER/'
+CELLRANGER_DIR = '/igo/staging/CELLRANGER/'
 # structure '/igo/stats/CELLRANGER/RUNNAME/PROJECTID/SAMPLEFOLDER
 
 # find all the cellranger result given project ID, return a list of address
diff --git a/scripts/deliver_pipeline.py b/scripts/deliver_pipeline.py
index d72b158..26e8af8 100644
--- a/scripts/deliver_pipeline.py
+++ b/scripts/deliver_pipeline.py
@@ -7,7 +7,7 @@
 - Re-run setaccess.py (on a separate server)
 
 At time of delivery for all 10X projects:
-- Search under folder /igo/stats/CELLRANGER/ for any possible cell ranger output
+- Search under folder /igo/staging/CELLRANGER/ for any possible cell ranger output
 - If existing, then copy to delivery/pipeline/cellranger directory
 """
 
@@ -64,7 +64,7 @@ def deliver_pipeline_output(project, pi, recipe):
     # if recipe is CRISPRSeq or GeoMx, go to pipeline folder and find output, if exists the copy
     # add cellranger multi output for featurebarcoding project here for now
     elif recipe == "CRISPRSeq" or recipe == "GeoMx" or recipe == "GeoMX" or recipe == "10XGenomics_FeatureBarcoding":
-        pipeline_path = "/igo/stats/PIPELINE/Project_" + project
+        pipeline_path = "/igo/staging/PIPELINE/Project_" + project
         if not os.path.exists(pipeline_path):
             print("No pipeline result available")
         else:

From 9cd961c8b17be65f0360259a06989171cae115ae Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Fri, 12 Apr 2024 09:09:32 -0400
Subject: [PATCH 40/87] fixed arc cmd option issue

---
 scripts/cellranger.py        | 2 +-
 scripts/cellranger_config.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/cellranger.py b/scripts/cellranger.py
index d5ed8fa..d238edd 100644
--- a/scripts/cellranger.py
+++ b/scripts/cellranger.py
@@ -173,7 +173,7 @@ def lanuch_by_project(sequencer_and_run, project, sample_id_list, sample_genome_
                 create_library_csv_file(validation[1], validation[2], sample)
                 tool = CONFIG.config_dict[tag]["tool"]
                 transcriptome = CONFIG.config_dict[tag]["genome"][sample_genome_dict[sample]]
-                cmd = "{}--id=Sample_{}{}".format(tool, sample, transcriptome) + "--libraries={}Sample_{}.csv".format(work_area, sample) + CONFIG.OPTIONS
+                cmd = "{}--id=Sample_{}{}".format(tool, sample, transcriptome) + "--libraries={}Sample_{}.csv".format(work_area, sample) + CONFIG.ARC_OPTIONS
                 bsub_cmd = "bsub -J {}_{}_{}_ARC -o {}_ARC.out{}".format(sequencer_and_run, project, sample, sample, cmd)
                 print(bsub_cmd)
                 subprocess.run(bsub_cmd, shell=True)
diff --git a/scripts/cellranger_config.py b/scripts/cellranger_config.py
index 0d4a590..235488f 100644
--- a/scripts/cellranger_config.py
+++ b/scripts/cellranger_config.py
@@ -59,6 +59,7 @@
 
 # cellranger command line options
 OPTIONS = " --create-bam=true --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"
+ARC_OPTIONS = " --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"
 
 # 10X recipe list for different pipelines
 COUNT_FLAVORS = ["10X_Genomics_GeneExpression-3", "10X_Genomics_GeneExpression-5"]

From 7633e51991e40b384776f9e4f2ff0f9b21c636ec Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Fri, 12 Apr 2024 14:47:47 -0400
Subject: [PATCH 41/87] fix vdj cmd

---
 scripts/cellranger.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/cellranger.py b/scripts/cellranger.py
index d238edd..73b7737 100644
--- a/scripts/cellranger.py
+++ b/scripts/cellranger.py
@@ -72,6 +72,8 @@ def generate_cellranger_cmd(sample_ID, tag, genome, fastq_file_path, sequencer_a
     transcriptome = CONFIG.config_dict[tag]["genome"][genome]
     project_ID = "Project_" + "_".join(sample_ID.split("_")[sample_ID.split("_").index("IGO") + 1:-1])
     cellranger_cmd = "{}--id=Sample_{}__{}".format(tool, sample_ID, tag) + transcriptome + "--fastqs=" + ",".join(fastq_file_path) + CONFIG.OPTIONS
+    if tag == "vdj":
+        cellranger_cmd = cellranger_cmd.replace(" --create-bam=true", "")
     job_name = "{}_{}_{}_{}_cellranger".format(sequencer_and_run, project_ID, sample_ID, tag)
     bsub_cmd = "bsub -J {} -o {}.out{}".format(job_name, job_name, cellranger_cmd) 
     return bsub_cmd

From 9c72cf417f85551ff4cfa4f9d6d28e5f46a25404 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Sat, 13 Apr 2024 11:24:10 -0700
Subject: [PATCH 42/87] Update LaunchMetrics.py

bringing dragen servers ID02 and ID03 back online
---
 scripts/LaunchMetrics.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index db0fd1c..0573165 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -138,7 +138,6 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		# get the correct path for the reference
 		if (sample_parameters["GTAG"] == "GRCh38"):
-			# rna_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
 			rna_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
 		else:
 			rna_path = "/igo/work/igo/dragen_hash_tables/4.2/{}".format(sample_parameters["GTAG"])
@@ -147,7 +146,7 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		
 		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} ".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
-		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
+		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
 		
@@ -176,14 +175,13 @@ def dragen(sample, run, sample_parameters, work_directory, dragen_directory, fas
 		
 		# get the correct path for the reference
 		if (sample_parameters["GTAG"] == "GRCh38"):
-			# dragen_path = "/igo/work/igo/dragen_hash_tables/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
 			dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38-alt_masked.cnv.graph.hla.rna-9-r3.0-1"
 		else:
 			dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/{}".format(sample_parameters["GTAG"])
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen = "/opt/edico/bin/dragen --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		

From 47dff759e73e50df69c471a465af333d0f3ef132 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Sun, 14 Apr 2024 21:52:09 -0400
Subject: [PATCH 43/87] Update cellranger_multi.py

---
 scripts/cellranger_multi.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/scripts/cellranger_multi.py b/scripts/cellranger_multi.py
index 3bead5c..c2def93 100644
--- a/scripts/cellranger_multi.py
+++ b/scripts/cellranger_multi.py
@@ -2,7 +2,6 @@
 import os
 import subprocess
 import glob
-from subprocess import call
 import argparse
 from collections import OrderedDict
 import requests
@@ -191,7 +190,7 @@ def ch_file_generation(project_id, sample_name):
     tag_seq_dict = pd.Series(df['Hashtag sequence'].values,index=df['Hashtag Name']).to_dict()
 
     sub_sample_dict = {}
-    sub_sample_lst = df[df["Sample Name in IGO"] == sample_name]["Sample Name"].tolist()
+    sub_sample_lst = df[str(df["Sample Name in IGO"]) == sample_name]["Sample Name"].tolist()
     for item in sub_sample_lst:
         sub_sample_dict[item] = sample_tag_dict[item]
 
@@ -401,7 +400,10 @@ def gather_sample_set_info(sample_name):
                     fb_type.append("Cell Hashing")
                 if "Feature Barcoding" in tag_lst:
                     fb_type.append("Feature Barcoding")
-                # TODO add vdj type
+                if "T Cells" in tag_lst:
+                    vdj_type.append("VDJ-T")
+                if "B Cells" in tag_lst:
+                    vdj_type.append("VDJ-B")
                 print(fb_type, vdj_type)
                 break
 
@@ -417,7 +419,7 @@ def gather_sample_set_info(sample_name):
                         sample_set["ch"] = "_IGO_".join([value[1], key])
                 if "10X_Genomics_VDJ" in value[2][0]:
                     sample_set["vdj"] = "_IGO_".join([value[1], key])
-
+    # TODO add vdj type to the whole pipeline
     return sample_set
 
 # TODO check whether a project set is complete to launch pipeline

From b809d8127a68e848a2bf888722abae8bf6b21368 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Sun, 14 Apr 2024 21:57:43 -0400
Subject: [PATCH 44/87] Update cellranger_multi.py

---
 scripts/cellranger_multi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/cellranger_multi.py b/scripts/cellranger_multi.py
index c2def93..ebe9ed6 100644
--- a/scripts/cellranger_multi.py
+++ b/scripts/cellranger_multi.py
@@ -190,7 +190,7 @@ def ch_file_generation(project_id, sample_name):
     tag_seq_dict = pd.Series(df['Hashtag sequence'].values,index=df['Hashtag Name']).to_dict()
 
     sub_sample_dict = {}
-    sub_sample_lst = df[str(df["Sample Name in IGO"]) == sample_name]["Sample Name"].tolist()
+    sub_sample_lst = df[df["Sample Name in IGO"].astype(str) == str(sample_name)]["Sample Name"].tolist()
     for item in sub_sample_lst:
         sub_sample_dict[item] = sample_tag_dict[item]
 

From 90da28e3883b8c6cd5939c5fb56af27c137b6dc8 Mon Sep 17 00:00:00 2001
From: David McManamon <dmcmanam@gmail.com>
Date: Mon, 15 Apr 2024 10:57:18 -0400
Subject: [PATCH 45/87] Update LaunchMetrics.py

only host id01 is working currently
---
 scripts/LaunchMetrics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 0573165..16bd85c 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -146,7 +146,7 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		
 		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} ".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
-		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
+		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
 		
@@ -181,7 +181,7 @@ def dragen(sample, run, sample_parameters, work_directory, dragen_directory, fas
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen = "/opt/edico/bin/dragen --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		
@@ -300,4 +300,4 @@ def launch_picard(bams_by_lane, run, sample, sample_parameters, work_directory):
 			
 
 	
-			
\ No newline at end of file
+			

From 9341c6c63a3a4ba5bb4476f4678143b84b56e4be Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Mon, 15 Apr 2024 15:38:13 -0400
Subject: [PATCH 46/87] add correct fastq list step after create folder

---
 scripts/organise_fastq_split_by_lane.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/organise_fastq_split_by_lane.py b/scripts/organise_fastq_split_by_lane.py
index f0d81a7..bb7c108 100644
--- a/scripts/organise_fastq_split_by_lane.py
+++ b/scripts/organise_fastq_split_by_lane.py
@@ -92,7 +92,7 @@ def correct_fastq_list_csv(demux_reports_dir):
     demux_dir = sys.argv[2]
     if demux_type == "create":
         create_fastq_folders(demux_dir)
-        # add correct fastq list step?
+        correct_fastq_list_csv(demux_dir+"/Reports")
     elif demux_type == "correct":
         correct_sample_folder_name(demux_dir)
     else:

From cc2e1a43c63a37bf8ce6527c560998cba113ed54 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Tue, 16 Apr 2024 15:05:47 -0400
Subject: [PATCH 47/87] add demux stats option for stats dag

---
 scripts/get_total_reads_from_demux.py | 28 ++++++++++++++-------------
 stats_by_project_dag.py               |  3 +++
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/scripts/get_total_reads_from_demux.py b/scripts/get_total_reads_from_demux.py
index 612ef5e..87c0419 100644
--- a/scripts/get_total_reads_from_demux.py
+++ b/scripts/get_total_reads_from_demux.py
@@ -3,6 +3,7 @@
 import numpy
 import json
 import re
+import os
 
 # get total reads number from Demultiplex_Stats.csv file or json file and generate txt files for each sample
 # add DLP type function. For DLP, only total reads for each project is needed
@@ -98,22 +99,23 @@ def run(sample_sheet, sequencer_and_run):
     print("generate AM txt files to folder: {}".format(stats_done_dir))
 
 # generate AM txt files containing total reads by project ID such as "Project_12754_E"
-def by_project(sample_sheet, project_id, sequencer_and_run):
+def by_project_location(project_directory):
+    # get sample_ID list
+    sample_list_ori = os.listdir(project_directory)
+    sample_list = []
+    for sample in sample_list_ori:
+        # remove Sample_ prefix
+        sample_list.append(sample[7:])
+    # get run info from project_directory
+    sequencer_and_run = project_directory.split("/")[4]
+    
     sequencer_and_run_prefix = "_".join(sequencer_and_run.split("_")[0:3])
     sequencer = sequencer_and_run.split("_")[0]
     stats_done_dir = STATS_DONE_DIR_PREFIX + sequencer + "/"
-    demux_report_file = "/igo/staging/FASTQ/" + sequencer_and_run + "/Reports/Demultiplex_Stats.csv"
-    # dictionary of Sample_ID->Project
-    sample_project_dict = pd.Series(sample_sheet.df_ss_data['Sample_Project'].values,index=sample_sheet.df_ss_data['Sample_ID']).to_dict()
-    
-    sample_ID_list = []
-    # filter sample_ID by projectID and append to sample_ID_list
-    for sample, project in sample_project_dict.items():
-        if project == project_id:
-            sample_ID_list.append(sample)
-
-    total_reads_dict = get_total_reads(sample_ID_list, demux_report_file)
-    for sample in sample_ID_list:
+    demux_report_file = project_directory + "/Reports/Demultiplex_Stats.csv"
+ 
+    total_reads_dict = get_total_reads(sample_list, demux_report_file)
+    for sample in sample_list:
         write_to_am_txt(sequencer_and_run_prefix, sample, total_reads_dict[sample], stats_done_dir)
 
     print("generate AM txt files to folder: {}".format(stats_done_dir))
diff --git a/stats_by_project_dag.py b/stats_by_project_dag.py
index 2f21d2a..b99dc9e 100644
--- a/stats_by_project_dag.py
+++ b/stats_by_project_dag.py
@@ -23,6 +23,7 @@ def run_stats(ds, **kwargs):
         import subprocess
         import scripts.cellranger_multi
         import os
+        import scripts.get_total_reads_from_demux
 
         project_directory = kwargs["params"]["project_directory"]
         recipe = kwargs["params"]["recipe"]
@@ -62,6 +63,8 @@ def run_stats(ds, **kwargs):
             cmd = "bsub -J ont_stats_{} -n 16 -M 16 /igo/work/nabors/tools/venvpy3/bin/python /igo/work/igo/igo-demux/scripts/ont_stats.py {}".format(project_id, project_directory)
             print(cmd)
             subprocess.run(cmd, shell=True)
+        elif recipe == "demux_stats":
+            scripts.get_total_reads_from_demux.by_project_location(project_directory)
         else:
             scripts.calculate_stats.main([project_directory, recipe, species])
 

From 1ec9c3bb77d20c9978b92218ebb2b3008e79687b Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Tue, 16 Apr 2024 15:13:29 -0400
Subject: [PATCH 48/87] Update get_total_reads_from_demux.py

---
 scripts/get_total_reads_from_demux.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/get_total_reads_from_demux.py b/scripts/get_total_reads_from_demux.py
index 87c0419..9cf02b8 100644
--- a/scripts/get_total_reads_from_demux.py
+++ b/scripts/get_total_reads_from_demux.py
@@ -112,7 +112,7 @@ def by_project_location(project_directory):
     sequencer_and_run_prefix = "_".join(sequencer_and_run.split("_")[0:3])
     sequencer = sequencer_and_run.split("_")[0]
     stats_done_dir = STATS_DONE_DIR_PREFIX + sequencer + "/"
-    demux_report_file = project_directory + "/Reports/Demultiplex_Stats.csv"
+    demux_report_file = "/igo/staging/FASTQ/" + sequencer_and_run + "/Reports/Demultiplex_Stats.csv"
  
     total_reads_dict = get_total_reads(sample_list, demux_report_file)
     for sample in sample_list:

From 326971a511ad0d7324c4cade4686273ef1629c03 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Wed, 17 Apr 2024 14:33:33 -0400
Subject: [PATCH 49/87] Update cellranger_multi.py

---
 scripts/cellranger_multi.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/cellranger_multi.py b/scripts/cellranger_multi.py
index ebe9ed6..b04a987 100644
--- a/scripts/cellranger_multi.py
+++ b/scripts/cellranger_multi.py
@@ -234,8 +234,8 @@ def gather_config_info(sample_dict, genome, IGO_ID):
         config.gene_expression["cmo-set"] = CONFIG_AREA + "Project_{}/Project_{}_ch_{}.csv".format(project_ID, project_ID, sample_name)
         config.samples = ch_file_generation(project_ID, sample_name)
 
-    # if both ch and fb are there, change the ch name
-    if "ch" in sample_dict.keys() and "fb" in sample_dict.keys():
+    # if both ch and fb are there and vdj not there, change the ch name
+    if "ch" in sample_dict.keys() and "fb" in sample_dict.keys() and "vdj" not in sample_dict.keys():
         sample_dict["ch"] = sample_dict["ch"].replace("FB_IGO", "CH_IGO")
 
     # find fastq files for each sample and append information into config["libraries"]

From 177a0ee1377217516e443d8c2ab1e9e88102428a Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Wed, 17 Apr 2024 14:46:38 -0400
Subject: [PATCH 50/87] Update cellranger_multi.py

---
 scripts/cellranger_multi.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/cellranger_multi.py b/scripts/cellranger_multi.py
index b04a987..63562fe 100644
--- a/scripts/cellranger_multi.py
+++ b/scripts/cellranger_multi.py
@@ -449,6 +449,7 @@ def gather_sample_set_info(sample_name):
     
     genome = args.genome
     config = gather_config_info(sample_dict, genome, args.ge)
+    print(config.lirbaries)
     project_ID = "_".join(args.ge.split("IGO_")[1].split("_")[:-1])
     file_name = "{}Project_{}/{}.csv".format(CONFIG_AREA, project_ID, args.ge)
 

From ba8b9ed1d7f23bb48fa6ed517674499a685dbfb6 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Wed, 17 Apr 2024 14:56:21 -0400
Subject: [PATCH 51/87] Update cellranger_multi.py

---
 scripts/cellranger_multi.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/cellranger_multi.py b/scripts/cellranger_multi.py
index 63562fe..47f9b1d 100644
--- a/scripts/cellranger_multi.py
+++ b/scripts/cellranger_multi.py
@@ -235,7 +235,7 @@ def gather_config_info(sample_dict, genome, IGO_ID):
         config.samples = ch_file_generation(project_ID, sample_name)
 
     # if both ch and fb are there and vdj not there, change the ch name
-    if "ch" in sample_dict.keys() and "fb" in sample_dict.keys() and "vdj" not in sample_dict.keys():
+    if "ch" in sample_dict.keys() and "fb" in sample_dict.keys() and ("vdj" not in sample_dict.keys()):
         sample_dict["ch"] = sample_dict["ch"].replace("FB_IGO", "CH_IGO")
 
     # find fastq files for each sample and append information into config["libraries"]
@@ -244,6 +244,7 @@ def gather_config_info(sample_dict, genome, IGO_ID):
         sample_list.append(i)
     fastq_list = find_fastq_file(sample_list)
     for key, value in sample_dict.items():
+        print("key: {}, value: {}".format(key, value))
         if key == "ge":
             config.lirbaries[value] = [fastq_list[value], "Gene Expression"]
         elif key == "vdj":

From 051e30b133b443e1b5f690bc8c7aa4240ddc9c3a Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Wed, 17 Apr 2024 15:17:38 -0400
Subject: [PATCH 52/87] Update cellranger_multi.py

---
 scripts/cellranger_multi.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scripts/cellranger_multi.py b/scripts/cellranger_multi.py
index 47f9b1d..47838b4 100644
--- a/scripts/cellranger_multi.py
+++ b/scripts/cellranger_multi.py
@@ -114,6 +114,7 @@ def write_ch_ge_only_to_csv(self, name_of_file):
             file.write("\n[libraries]\nfastq_id,fastqs,feature_types\n")
             
             for key, value in self.lirbaries.items():
+                key.replace("_CHMARKER_", "")
                 if value[1] == "Gene Expression" or value[1] == "Multiplexing Capture":
                     for i in value[0]:
                         file.write("{},{},{}\n".format(key, i, value[1]))
@@ -252,7 +253,11 @@ def gather_config_info(sample_dict, genome, IGO_ID):
         elif key == "fb":
             config.lirbaries[value] = [fastq_list[value], "Antibody Capture"]
         elif key == "ch":
-            config.lirbaries[value] = [fastq_list[value], "Multiplexing Capture"]
+            # for case of all ch, fb and vdj exits and doesn't need to make two copies of fb fastq file
+            if "ch" in sample_dict.keys() and "fb" in sample_dict.keys() and "vdj" in sample_dict.keys():
+                config.lirbaries[value + "_CHMARKER_"] = [fastq_list[value], "Multiplexing Capture"]
+            else:
+                config.lirbaries[value] = [fastq_list[value], "Multiplexing Capture"]
        
     return config
 

From 7f974d891760cc8ae63b36eede8909fb7fa4510d Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Wed, 17 Apr 2024 15:21:09 -0400
Subject: [PATCH 53/87] Update cellranger_multi.py

---
 scripts/cellranger_multi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/cellranger_multi.py b/scripts/cellranger_multi.py
index 47838b4..dae2b40 100644
--- a/scripts/cellranger_multi.py
+++ b/scripts/cellranger_multi.py
@@ -114,7 +114,7 @@ def write_ch_ge_only_to_csv(self, name_of_file):
             file.write("\n[libraries]\nfastq_id,fastqs,feature_types\n")
             
             for key, value in self.lirbaries.items():
-                key.replace("_CHMARKER_", "")
+                key = key.replace("_CHMARKER_", "")
                 if value[1] == "Gene Expression" or value[1] == "Multiplexing Capture":
                     for i in value[0]:
                         file.write("{},{},{}\n".format(key, i, value[1]))

From 766ee25ce427a0ab25a180fae592f5e3188e5ab3 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Fri, 19 Apr 2024 10:25:28 -0400
Subject: [PATCH 54/87] updating LaunchMetrics.py

pointing script to new methylated tables for hg38 and grcm39 for dragen 4.2 added id02 and id03 dragen servers back to production.
---
 scripts/run_param_config.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/scripts/run_param_config.py b/scripts/run_param_config.py
index f74814a..ef2e78a 100644
--- a/scripts/run_param_config.py
+++ b/scripts/run_param_config.py
@@ -18,11 +18,12 @@
 HAPLOTYPE_MAP = "HAPLOTYPE_MAP"
 
 # 3) Determined by recipe (see: recipe_options_mapping)
-BAITS="BAITS"
-TARGETS="TARGETS"
-MSKQ="MSKQ"
-MD="MD"
-DGN_REFERENCE="DGN_REFERENCE"
+BAITS = "BAITS"
+TARGETS = "TARGETS"
+MSKQ = "MSKQ"
+MD = "MD"
+DGN_REFERENCE = "DGN_REFERENCE"
+DGN_REFERENCE = "DGN_REFERENCE"
 """
 				D E P E N D E N C Y    G R A P H
 									+-----------+

From 13a544a9e6cfae0b25c2634ff0ac453792763515 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Fri, 19 Apr 2024 10:29:59 -0400
Subject: [PATCH 55/87] Update LaunchMetrics.py

pointing script to new methylated tables for hg38 and grcm39 for dragen 4.2 added id02 and id03 dragen servers back to production.
---
 scripts/LaunchMetrics.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 0573165..1fcb676 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -16,7 +16,7 @@
 # Global Variable : we do not want to process these experiments in this script
 DO_NOT_PROCESS = ["DLP"]
 # These recipes will be evaluated using DRAGEN because of their larger size of fastqs
-RUN_ON_DRAGEN = ["MissionBio", "SingleCellCNV", "MouseWholeGenome", "HumanWholeGenome", "PombeWholeGenome", "ChIPSeq", "AmpliconSeq"]
+RUN_ON_DRAGEN = ["MissionBio", "SingleCellCNV", "MouseWholeGenome", "HumanWholeGenome", "PombeWholeGenome", "ChIPSeq", "AmpliconSeq", "MethylCaptureSeq"]
 # these projects willl only need demux stats
 DEMUX_ONLY = ["SMARTSeq", "10X_Genomics"]
 
@@ -218,13 +218,13 @@ def dragen_methylation(sample, run, sample_parameters, work_directory, dragen_di
 		
 		# get the correct path for the reference
 		if (sample_parameters["GTAG"] == "GRCh38"):
-			dragen_path = "/igo/work/igo/dragen_hash_tables/hg38_methylated"
+			dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/hg38_methylated"
 		else:
-			dragen_path = "/igo/work/igo/dragen_hash_tables/grcm39_methylated"
+			dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/grcm39_methylated"
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen_methylation = "/opt/edico/bin/dragen --enable-methylation-calling true --methylation-protocol directional --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id03\" -q dragen -n 48 -M 4 {3}".format(dragen_methylation_job_name_header, sample.sample_id, dragen_directory, launch_dragen_methylation)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_methylation_job_name_header, sample.sample_id, dragen_directory, launch_dragen_methylation)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		

From a08df4add833c69f2d26cd745456e9876ea826ae Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Fri, 19 Apr 2024 10:33:52 -0400
Subject: [PATCH 56/87] Update LaunchMetrics.py

adding bin memory option to dragen rna and dragen methylation options
---
 scripts/LaunchMetrics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 8f993dc..86907f2 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -145,7 +145,7 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		rna_dragen_job_name_header = "{}___RNA_DRAGEN___".format(run)
 		
 		
-		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} ".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
+		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} --bin_memory 50000000000".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
 		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
@@ -223,7 +223,7 @@ def dragen_methylation(sample, run, sample_parameters, work_directory, dragen_di
 			dragen_path = "/igo/work/igo/dragen_hash_tables/4.2/grcm39_methylated"
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
-		launch_dragen_methylation = "/opt/edico/bin/dragen --enable-methylation-calling true --methylation-protocol directional --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
+		launch_dragen_methylation = "/opt/edico/bin/dragen --enable-methylation-calling true --methylation-protocol directional --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
 		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_methylation_job_name_header, sample.sample_id, dragen_directory, launch_dragen_methylation)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)

From e9299aa04d5b5868d6075fd7fba884e7273d63f4 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Sun, 21 Apr 2024 10:01:34 -0400
Subject: [PATCH 57/87] Update LaunchMetrics.py

bringing ID02 and ID03 back online
---
 scripts/LaunchMetrics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 86907f2..7789d20 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -21,7 +21,7 @@
 DEMUX_ONLY = ["SMARTSeq", "10X_Genomics"]
 
 # Organisms to have DRAGEN BAMS
-DRAGEN_RNA_GENOMES = ["GRCh38", "grcm39"]
+DRAGEN_RNA_GENOMES = ["GRCh38", "grcm39", "dm6"]
 # this list contains the headers of the columns.  we will access the data using these listings
 PICARD_VERSION = "2_23_2"
 PICARD_JAR = "/igo/home/igo/resources/picard2.23.2/picard.jar "
@@ -146,7 +146,7 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		
 		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} --bin_memory 50000000000".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
-		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
+		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
 		
@@ -181,7 +181,7 @@ def dragen(sample, run, sample_parameters, work_directory, dragen_directory, fas
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen = "/opt/edico/bin/dragen --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		

From c7817240ebe84087daeb07f33c4d1671a9cb8cfb Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Sun, 21 Apr 2024 11:06:12 -0400
Subject: [PATCH 58/87] updates for drosophila

changing entries in run param config script to point to correct location of drosophila genome.  taking out dm6 (drosophila) from the DRAGEN_RNA_GENOMES list
---
 scripts/LaunchMetrics.py    |  2 +-
 scripts/run_param_config.py | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 7789d20..014919a 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -21,7 +21,7 @@
 DEMUX_ONLY = ["SMARTSeq", "10X_Genomics"]
 
 # Organisms to have DRAGEN BAMS
-DRAGEN_RNA_GENOMES = ["GRCh38", "grcm39", "dm6"]
+DRAGEN_RNA_GENOMES = ["GRCh38", "grcm39"]
 # this list contains the headers of the columns.  we will access the data using these listings
 PICARD_VERSION = "2_23_2"
 PICARD_JAR = "/igo/home/igo/resources/picard2.23.2/picard.jar "
diff --git a/scripts/run_param_config.py b/scripts/run_param_config.py
index ef2e78a..a8f4654 100644
--- a/scripts/run_param_config.py
+++ b/scripts/run_param_config.py
@@ -281,15 +281,15 @@ def get_ordered_dic(unordered_dic):
 		},
 		"dm6": {
 				DEFAULT: {
-						GENOME: "/igo/work/nabors/genomes/Drosophila_melanogaster/Drosophila_melanogaster.BDGP6.46.dna.toplevel.fa",
-						REFERENCE: "/igo/work/nabors/genomes/Drosophila_melanogaster/Drosophila_melanogaster.BDGP6.46.dna.toplevel.fa"
+						GENOME: "/igo/work/nabors/genomes/Drosophila_melanogaster/ENSEMBL/Drosophila_melanogaster.BDGP6.46.dna.toplevel.fa",
+						REFERENCE: "/igo/work/nabors/genomes/Drosophila_melanogaster/ENSEMBL/Drosophila_melanogaster.BDGP6.46.dna.toplevel.fa"
 				},
 			"RNA": {
-					GENOME: "/igo/work/nabors/genomes/Drosophila_melanogaster/Drosophila_melanogaster.BDGP6.46.dna.toplevel.fa",
-					REFERENCE: "/igo/work/nabors/genomes/Drosophila_melanogaster/Drosophila_melanogaster.BDGP6.46.dna.toplevel.fa",
-					REF_FLAT: "/igo/work/nabors/genomes/Drosophila_melanogaster/GTF/Drosophila_melanogaster.BDGP6.46.110.gtf.ref.flat",
-					RIBOSOMAL_INTERVALS: "/igo/work/nabors/genomes/Drosophila_melanogaster/GTF/Drosophila_melanogaster.BDGP6.46.110.gtf.bed.rRNA.intervals",
-					GTF: "/igo/work/nabors/genomes/Drosophila_melanogaster/GTF/Drosophila_melanogaster.BDGP6.46.110.gtf",
+					GENOME: "/igo/work/nabors/genomes/Drosophila_melanogaster/ENSEMBL/Drosophila_melanogaster.BDGP6.46.dna.toplevel.fa",
+					REFERENCE: "/igo/work/nabors/genomes/Drosophila_melanogaster/ENSEMBL/Drosophila_melanogaster.BDGP6.46.dna.toplevel.fa",
+					REF_FLAT: "/igo/work/nabors/genomes/Drosophila_melanogaster/ENSEMBL/GTF/Drosophila_melanogaster.BDGP6.46.110.gtf.refFlat",
+					RIBOSOMAL_INTERVALS: "/igo/work/nabors/genomes/Drosophila_melanogaster/ENSEMBL/GTF/Drosophila_melanogaster.BDGP6.46.110.gtf.bed.rRNA.intervals",
+					GTF: "/igo/work/nabors/genomes/Drosophila_melanogaster/ENSEMBL/GTF/Drosophila_melanogaster.BDGP6.46.110.gtf",
 					GTAG: "dm6"
 				}
 		},

From 4af41c4d277927524247a75ef2b9429bffd3d991 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Sun, 21 Apr 2024 11:31:39 -0400
Subject: [PATCH 59/87] Update LaunchMetrics.py

took out MethylCaptureSeq from RUN_ON_DRAGEN table
---
 scripts/LaunchMetrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 014919a..c84d0b2 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -16,7 +16,7 @@
 # Global Variable : we do not want to process these experiments in this script
 DO_NOT_PROCESS = ["DLP"]
 # These recipes will be evaluated using DRAGEN because of their larger size of fastqs
-RUN_ON_DRAGEN = ["MissionBio", "SingleCellCNV", "MouseWholeGenome", "HumanWholeGenome", "PombeWholeGenome", "ChIPSeq", "AmpliconSeq", "MethylCaptureSeq"]
+RUN_ON_DRAGEN = ["MissionBio", "SingleCellCNV", "MouseWholeGenome", "HumanWholeGenome", "PombeWholeGenome", "ChIPSeq", "AmpliconSeq"]
 # these projects willl only need demux stats
 DEMUX_ONLY = ["SMARTSeq", "10X_Genomics"]
 

From ee933011d536ebecc5c5bbce454594c8dccfcb12 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Sun, 21 Apr 2024 12:34:09 -0400
Subject: [PATCH 60/87] Update LaunchMetrics.py

taking ID01 out of production.  right now it is at 98.5%.  will let the PE150 jobs launch on ID02 and ID03
---
 scripts/LaunchMetrics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index c84d0b2..9fd76f2 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -146,7 +146,7 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		
 		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} --bin_memory 50000000000".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
-		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
+		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
 		
@@ -181,7 +181,7 @@ def dragen(sample, run, sample_parameters, work_directory, dragen_directory, fas
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen = "/opt/edico/bin/dragen --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		
@@ -224,7 +224,7 @@ def dragen_methylation(sample, run, sample_parameters, work_directory, dragen_di
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen_methylation = "/opt/edico/bin/dragen --enable-methylation-calling true --methylation-protocol directional --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_methylation_job_name_header, sample.sample_id, dragen_directory, launch_dragen_methylation)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_methylation_job_name_header, sample.sample_id, dragen_directory, launch_dragen_methylation)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		

From 38530976a6419f8fa93776bed5b9aa255ae766f3 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Sun, 21 Apr 2024 15:33:53 -0400
Subject: [PATCH 61/87] Update LaunchMetrics.py

switch RNA to ID01 to use up some of the license of ID01
---
 scripts/LaunchMetrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 9fd76f2..4007eda 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -146,7 +146,7 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		
 		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} --bin_memory 50000000000".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
-		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
+		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
 		

From a7d2eb49a2fa9e8cfc06d99585d65adeb7461708 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Sun, 21 Apr 2024 15:43:38 -0400
Subject: [PATCH 62/87] Update LaunchMetrics.py

switch to dragen servers ID02 and ID03 so stats won't fail because of license expired on ID01
---
 scripts/LaunchMetrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 4007eda..9fd76f2 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -146,7 +146,7 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		
 		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} --bin_memory 50000000000".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
-		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
+		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
 		

From 2c0ff8109832d4045f800c15ac47bd1b8735eed6 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 29 Apr 2024 17:31:30 -0400
Subject: [PATCH 63/87] Update run_param_config.py

update for genome for MIssionBio-Heme to run large samples on DRAGEN
---
 scripts/run_param_config.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/run_param_config.py b/scripts/run_param_config.py
index a8f4654..7fa0307 100644
--- a/scripts/run_param_config.py
+++ b/scripts/run_param_config.py
@@ -97,7 +97,7 @@ def get_ordered_dic(unordered_dic):
 				"RDM": "hg19",
 				"myTYPE_V1": "hg19",
 				"PanCancerV2": "hg19",
-				"MissionBio-Heme": "hg19",
+				"MissionBio-Heme": "GRCh38",
 				"WholeExome_v4": "hg19",
 				"AmpliSeq": "hg19",
 				"HemeBrainPACT_v1": "hg19"
@@ -575,8 +575,8 @@ def get_ordered_dic(unordered_dic):
 				MD: "yes"
 		},
 		"MissionBio-Heme": {
-				BAITS: "/igo/home/igo/resources/ilist/MissionBio-Heme/AML_BAITS.iList",
-				TARGETS: "/igo/home/igo/resources/ilist/MissionBio-Heme/AML_BAITS.iList",
+				BAITS: "/igo/work/nabors/bed_files/Mission_Bio/hg38/MissionBio-Heme_BAITS.iList",
+				TARGETS: "/igo/work/nabors/bed_files/Mission_Bio/hg38/MissionBio-Heme_TARGETS.iList",
 				MSKQ: "no",
 				MD: "yes"
 		},

From e1b3fbd336f6f945be7de609e6461cde26b500e7 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Thu, 9 May 2024 13:44:56 -0400
Subject: [PATCH 64/87] Update LaunchMetrics.py

putting id01 back into production
---
 scripts/LaunchMetrics.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 9fd76f2..c84d0b2 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -146,7 +146,7 @@ def dragen_rna_alignment_and_metrics(sample, run, sample_parameters, rna_directo
 		
 		
 		launch_dragen_rna = "/opt/edico/bin/dragen -f -r {} --fastq-list {} --fastq-list-sample-id {} -a {} --intermediate-results-dir /staging/temp --enable-map-align true --enable-sort true --enable-bam-indexing true --enable-map-align-output true --output-format BAM --enable-rna true --enable-duplicate-marking true --enable-rna-quantification true --output-file-prefix {} --output-directory {} --bin_memory 50000000000".format(rna_path, fastq_list, sample.sample_id, sample_parameters["GTF"], sample.sample_id, rna_directory)
-		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
+		bsub_launch_dragen_rna = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(rna_dragen_job_name_header, sample.sample_id, rna_directory, launch_dragen_rna)
 		print(bsub_launch_dragen_rna)
 		call(bsub_launch_dragen_rna, shell = True)
 		
@@ -181,7 +181,7 @@ def dragen(sample, run, sample_parameters, work_directory, dragen_directory, fas
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen = "/opt/edico/bin/dragen --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_job_name_header, sample.sample_id, dragen_directory, launch_dragen)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		
@@ -224,7 +224,7 @@ def dragen_methylation(sample, run, sample_parameters, work_directory, dragen_di
 			
 		metric_file_prefix = "{}___P{}___{}___{}".format(run, sample.project[8:], sample.sample_id, sample_parameters["GTAG"])
 		launch_dragen_methylation = "/opt/edico/bin/dragen --enable-methylation-calling true --methylation-protocol directional --ref-dir {} --fastq-list {} --fastq-list-sample-id {} --intermediate-results-dir /staging/temp --output-directory {} --output-file-prefix {} --enable-sort true --enable-duplicate-marking true --bin_memory 50000000000".format(dragen_path, fastq_list, sample.sample_id, dragen_directory, sample.sample_id)
-		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_methylation_job_name_header, sample.sample_id, dragen_directory, launch_dragen_methylation)
+		bsub_launch_dragen = "bsub -J {0}{1} -o {0}{1}.out -cwd \"{2}\" -m \"id01 id02 id03\" -q dragen -n 48 -M 4 {3}".format(dragen_methylation_job_name_header, sample.sample_id, dragen_directory, launch_dragen_methylation)
 		print(bsub_launch_dragen)
 		call(bsub_launch_dragen, shell = True)
 		

From 37e925793148a10523500b3bf490cd5dd91c800c Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Fri, 10 May 2024 16:28:33 -0400
Subject: [PATCH 65/87] update recipe change

---
 deliver_pipeline_dag.py      |   1 +
 scripts/cellranger_config.py |  24 ++------
 scripts/deliver_pipeline.py  | 106 +++++++++++++----------------------
 stats_by_project_dag.py      |   4 +-
 4 files changed, 47 insertions(+), 88 deletions(-)

diff --git a/deliver_pipeline_dag.py b/deliver_pipeline_dag.py
index e3fe37b..dd1fde7 100644
--- a/deliver_pipeline_dag.py
+++ b/deliver_pipeline_dag.py
@@ -25,6 +25,7 @@
     def deliver(ds, **kwargs):
         project = kwargs["params"]["project"]
         pi = kwargs["params"]["pi"]
+        # recipe here is actually request name
         recipe = kwargs["params"]["recipe"]
         print("Delivering the pipeline output and/or .bams for {} {} {}".format(project, pi, recipe))
 
diff --git a/scripts/cellranger_config.py b/scripts/cellranger_config.py
index 235488f..e4e6105 100644
--- a/scripts/cellranger_config.py
+++ b/scripts/cellranger_config.py
@@ -18,20 +18,6 @@
             "Mouse": " --reference=/igo/work/genomes/10X_Genomics/VDJ/refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0 "
         }
     },
-    "atac_count": {
-        "tool": " /igo/work/nabors/tools/cellranger-atac-2.1.0/cellranger-atac count ",
-        "genome": {
-            "Human": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-GRCh38-1.0.1 ",
-            "Mouse": " --reference=/igo/work/nabors/genomes/10X_Genomics/ATAC/refdata-cellranger-atac-mm10-1.1.0 "
-        }
-    },
-    "cnv": {
-        "tool": " /igo/work/nabors/tools/cellranger-dna-1.1.0/cellranger-dna cnv ",
-        "genome": {
-            "Human": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCh38-1.0.0 ",
-            "Mouse": " --reference=/igo/work/nabors/10X_Genomics/CNV/refdata-GRCm38-1.0.0 "
-        }
-    },
     "multi": {
         "tool": " /igo/work/nabors/tools/cellranger-8.0.0/cellranger multi "
     },
@@ -62,12 +48,10 @@
 ARC_OPTIONS = " --nopreflight --jobmode=lsf --mempercore=64 --disable-ui --maxjobs=200"
 
 # 10X recipe list for different pipelines
-COUNT_FLAVORS = ["10X_Genomics_GeneExpression-3", "10X_Genomics_GeneExpression-5"]
-VDJ_FLAVORS = ["10X_Genomics_VDJ"]
-ATAC_FLAVORS = ["10X_Genomics_ATAC"]
-CNV_FLAVORS = ["10X_Genomics_CNV"]
-ARC_FLAVORS = ["10X_Genomics_Multiome", "10X_Genomics_Multiome_ATAC", "10X_Genomics_Multiome_GeneExpression"]
-SPATIAL_FLAVORS = ["10X_Genomics_Visium"]
+COUNT_FLAVORS = ["SC_Chromium-GEX-3", "SC_Chromium-GEX-5"]
+VDJ_FLAVORS = ["SC_Chromium-TCR", "SC_Chromium-BCR"]
+ARC_FLAVORS = ["SC_Chromium-Multiome", "SC_Chromium-Multiome_ATAC", "SC_Chromium-Multiome_GEX"]
+SPATIAL_FLAVORS = ["ST_Visium"]
 
 # we do not want to PROCESS SAIL (15500) or SCRI (12437) projects
 SCRI = "12437"
diff --git a/scripts/deliver_pipeline.py b/scripts/deliver_pipeline.py
index 26e8af8..895dc63 100644
--- a/scripts/deliver_pipeline.py
+++ b/scripts/deliver_pipeline.py
@@ -27,70 +27,59 @@
 PICARD = "java -jar /igo/home/igo/resources/picard2.23.2/picard.jar "
 NGS_STATS_FASTQ_ENDPOINT = "http://igodb.mskcc.org:8080/ngs-stats/permissions/getRequestPermissions/"
 
-def deliver_pipeline_output(project, pi, recipe):
-    if not project or not pi or not recipe:
+def deliver_pipeline_output(project, pi, requestName):
+    if not project or not pi or not requestName:
         return "Project, pi and recipe are all required arguments."
     # change pi to all lowercase
     pi = pi.lower()
     delivery_folder = LAB_SHARE_DIR + "/" + pi + "/Project_" + project + "/pipeline"
 
-    if recipe.startswith("RNASeq"):
+    if requestName == "RNALibraryPrep":
         print("Delivering all RNASeq .bams for {} {} {}".format(project, pi, recipe))
         bamdict = find_bams(project, STATS_DIR)
         bsub_commands =  write_bams_to_share(bamdict, delivery_folder)
         reconcile_bam_fastq_list(project, bamdict)
         return "Completed RNA bams delivery"
     
-    # if is missionbio recipe, find tapestri pipelie output and copy all sample folders
-    elif recipe == "MissionBio":
-        tapestri_path = "/igo/staging/stats/MissionBio/Project_" + project
-        if not os.path.exists(tapestri_path):
-            print("No tapestri result available")
-        else:
-            tapestri_delivery_folder = delivery_folder + "/Tapestri"
-            if not os.path.exists(tapestri_delivery_folder):
-                print("Creating pipeline delivery folder {}".format(tapestri_delivery_folder))
-                os.makedirs(tapestri_delivery_folder)
-            
-            # copy each sample folder to the delivery folder
-            tapestri_path = tapestri_path + "/"
-            sample_list = os.listdir(tapestri_path)
-            for sample in sample_list:
-                sample_folder = tapestri_path + sample
-                destination = tapestri_delivery_folder + "/" + sample
-                print("copy {}".format(sample_folder))
-                shutil.copytree(sample_folder, destination, symlinks=True)
-
-    # if recipe is CRISPRSeq or GeoMx, go to pipeline folder and find output, if exists the copy
-    # add cellranger multi output for featurebarcoding project here for now
-    elif recipe == "CRISPRSeq" or recipe == "GeoMx" or recipe == "GeoMX" or recipe == "10XGenomics_FeatureBarcoding":
-        pipeline_path = "/igo/staging/PIPELINE/Project_" + project
-        if not os.path.exists(pipeline_path):
-            print("No pipeline result available")
-        else:
-            if not os.path.exists(delivery_folder):
-                print("Creating pipeline delivery folder {}".format(delivery_folder))
-                os.makedirs(delivery_folder)
-            
-            # copy each sample folder to the delivery folder
-            pipeline_path = pipeline_path + "/"
-            sample_list = os.listdir(pipeline_path)
-            for sample in sample_list:
-                sample_path = pipeline_path + sample
-                destination = delivery_folder + "/" + sample
-                print("copy {}".format(sample_path))
-                if os.path.isdir(sample_path):
-                    shutil.copytree(sample_path, destination, symlinks=True)
-                else:
-                    cmd = "cp {} {}".format(sample_path, destination)
-                    print(cmd)
-                    call(cmd, shell=True)
-    
-    # if 10X recipe or SCRI project starting with 12437, copy cell ranger result to project folder
-    elif recipe.startswith("10XGenomics") or project.startswith("12437_"):
+    # TCR seq only need deliver manifest, those files located under viale lab drive
+    # example file: /pskis34/LIMS/TCRseqManifest/Project_13545_TCRseq_Manifest_Beta.csv
+    elif requestName == "TCRSeq":
+        pipeline_path_prefix = "/rtssdc/mohibullahlab/LIMS/TCRseqManifest/Project_" + project + "_TCRseq"
+        TCR_delivery_folder = delivery_folder + "/Manifest"
+        if not os.path.exists(TCR_delivery_folder):
+                print("Creating pipeline delivery folder {}".format(TCR_delivery_folder))
+                os.makedirs(TCR_delivery_folder)
+        
+        cmd = "cp {}* {}/".format(pipeline_path_prefix, TCR_delivery_folder)
+        print(cmd)
+        call(cmd, shell=True)
+
+    # For all other projects, check CELLRANGER folder first then PIPELINE folder
+    else:
         folder_list = scripts.deliver_cellranger.find_cellranger(project)
         if len(folder_list) == 0:
-            print("No cellranger result available")
+            # check PIPELINE folder 
+            pipeline_path = "/igo/staging/PIPELINE/Project_" + project
+            if not os.path.exists(pipeline_path):
+                print("No cellranger/pipeline result available")
+            else:
+                if not os.path.exists(delivery_folder):
+                    print("Creating pipeline delivery folder {}".format(delivery_folder))
+                    os.makedirs(delivery_folder)
+                
+                # copy each sample folder to the delivery folder
+                pipeline_path = pipeline_path + "/"
+                sample_list = os.listdir(pipeline_path)
+                for sample in sample_list:
+                    sample_path = pipeline_path + sample
+                    destination = delivery_folder + "/" + sample
+                    print("copy {}".format(sample_path))
+                    if os.path.isdir(sample_path):
+                        shutil.copytree(sample_path, destination, symlinks=True)
+                    else:
+                        cmd = "cp {} {}".format(sample_path, destination)
+                        print(cmd)
+                        call(cmd, shell=True)
         else:
             # create pipeline folder if not exists
             cellranger_delivery_folder = delivery_folder + "/cellranger"
@@ -105,21 +94,6 @@ def deliver_pipeline_output(project, pi, recipe):
                 print("copy {}".format(folder))
                 shutil.copytree(folder, sample_delivery_name, symlinks=True)
 
-    # TCR seq only need deliver manifest, those files located under viale lab drive
-    # example file: /pskis34/LIMS/TCRseqManifest/Project_13545_TCRseq_Manifest_Beta.csv
-    elif recipe == "TCRSeq-IGO":
-        pipeline_path_prefix = "/rtssdc/mohibullahlab/LIMS/TCRseqManifest/Project_" + project + "_TCRseq"
-        TCR_delivery_folder = delivery_folder + "/Manifest"
-        if not os.path.exists(TCR_delivery_folder):
-                print("Creating pipeline delivery folder {}".format(TCR_delivery_folder))
-                os.makedirs(TCR_delivery_folder)
-        
-        cmd = "cp {}* {}/".format(pipeline_path_prefix, TCR_delivery_folder)
-        print(cmd)
-        call(cmd, shell=True)
-
-    else:
-        print("Pipeline delivery is not needed for recipe {} and project {}".format(recipe, project))
     return "Completed pipeline delivery"
 
 def find_bams(project, stats_base_dir):
diff --git a/stats_by_project_dag.py b/stats_by_project_dag.py
index b99dc9e..ddb104d 100644
--- a/stats_by_project_dag.py
+++ b/stats_by_project_dag.py
@@ -57,9 +57,9 @@ def run_stats(ds, **kwargs):
                 print(cmd)
                 subprocess.run(cmd, shell=True)
 
-        elif "10X_" in recipe:
+        elif "SC_Chromium" in recipe:
             scripts.cellranger.launch_cellranger_by_project_location(project_directory, recipe, species)
-        elif "ONT" in recipe:
+        elif "Nanopore" in recipe:
             cmd = "bsub -J ont_stats_{} -n 16 -M 16 /igo/work/nabors/tools/venvpy3/bin/python /igo/work/igo/igo-demux/scripts/ont_stats.py {}".format(project_id, project_directory)
             print(cmd)
             subprocess.run(cmd, shell=True)

From dc946b440818b2b07f2d944e6bbb487ff7b3ca02 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Fri, 10 May 2024 16:38:19 -0400
Subject: [PATCH 66/87] update recipe

---
 SampleSheet.py   |  4 ++--
 demux_run_dag.py | 14 +++++---------
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/SampleSheet.py b/SampleSheet.py
index b41f43d..7907ef8 100644
--- a/SampleSheet.py
+++ b/SampleSheet.py
@@ -99,7 +99,7 @@ def split_sample_sheet(self):
          if sample sheet recipes have mixed DLP and other all DLP need to go on a separate sample sheet named "_DLP"
         """
         # if 10x DRAGEN demux add to header CreateFastqForIndexReads,1,,,,,,,
-        if any("10X_" in s for s in self.recipe_set):
+        if any("SC_Chromium" in s for s in self.recipe_set):
             print("Adding CreateFastqForIndexReads,1 to sample sheet header since 10X samples are present")
             self.df_ss_header.loc[len(self.df_ss_header.index)-1] = ["CreateFastqForIndexReads",1,"","","","","","",""]
             self.df_ss_header.loc[len(self.df_ss_header.index)] = ["[Data]","","","","","","","",""]
@@ -111,7 +111,7 @@ def split_sample_sheet(self):
         split_ss_list = [ss_copy, self]  
 
         was_split = False
-        if "DLP" in self.recipe_set and len(self.recipe_set) > 1:
+        if "SC_DLP" in self.recipe_set and len(self.recipe_set) > 1:
             print("Copying all DLP samples to a new sample sheet")
             # copy all DLP rows to a new sample sheet
             dlp_data = self.df_ss_data[self.df_ss_data["Sample_Well"].str.match("DLP") == True].copy()
diff --git a/demux_run_dag.py b/demux_run_dag.py
index 12e3733..aa026bb 100644
--- a/demux_run_dag.py
+++ b/demux_run_dag.py
@@ -66,7 +66,7 @@ def demux(ds, **kwargs):
         
         # check if the sample sheet contains DLP project
         is_DLP = False
-        if "DLP" in sample_sheet.recipe_set:
+        if "SC_DLP" in sample_sheet.recipe_set:
             is_DLP = True
             dragen_demux = True
         
@@ -214,7 +214,7 @@ def stats(ds, **kwargs):
 
     def fingerprinting(ds, **kwargs):
         # read in sample sheet as arguments, filter out projects that need to run fingerprinting
-        recipe_list_for_fp = [".*IMPACT*", ".*Heme*", "IDT_Exome*", "WholeExomeSequencing", "Twist_Exome", "MSK-ACCESS*", "CMO-CH", "HumanWholeGenome"]
+        recipe_list_for_fp = ["PED-PEG", "WGS_Deep", "HC_IMPACT", "HC_IMPACT-Heme", "HC_ACCESS", "WES_Human", "HC_CMOCH"]
         # call fingerprinting_dag.py for each project
         samplesheet_path = kwargs["params"]["samplesheet"]
 
@@ -228,13 +228,9 @@ def fingerprinting(ds, **kwargs):
         project_list_to_run = []        
         for project, recipe in sample_sheet.project_dict.items():
             # fingerprinting only support human
-            if project_genome_dict[project] == "Human":
-                for recipe_list_item in recipe_list_for_fp:
-                    print(project, recipe)
-                    expr = re.compile(recipe_list_item)
-                    if expr.match(recipe):
-                        project_list_to_run.append(project)
-                        break
+            if project_genome_dict[project] == "Human" and recipe in recipe_list_for_fp:
+                project_list_to_run.append(project)
+                
         print("Projects need to run fp: {}".format(project_list_to_run))
         if len(project_list_to_run) == 0:
             return "No project need to run fingerprinting"

From c51746524be1f3c280b2eacb6755a29e6b2f5392 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Thu, 16 May 2024 10:10:28 -0400
Subject: [PATCH 67/87] Update dragen_csv_to_html.py

changing index and index2 to create a demux html file for a run that was demuxed using index2 only
---
 scripts/dragen_csv_to_html.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/dragen_csv_to_html.py b/scripts/dragen_csv_to_html.py
index 9c871b4..ee3c0d1 100644
--- a/scripts/dragen_csv_to_html.py
+++ b/scripts/dragen_csv_to_html.py
@@ -25,9 +25,9 @@ def build_lane_summary_html(demux_reports_dir, write_to_file):
     for i in range(1, lane_number + 1):
         df_name = "top_unknown_lane" + str(i)
         df_by_lanes[df_name] = top_unknown_barcodes_csv_covert.loc[top_unknown_barcodes_csv_covert["Lane"] == i]
-        if not df_by_lanes[df_name]["index2"].isnull().values.any():
-            df_by_lanes[df_name]["index"] = df_by_lanes[df_name]["index"].str.cat(df_by_lanes[df_name]["index2"], sep="-")
-        df_by_lanes[df_name] = df_by_lanes[df_name].drop("index2", axis=1)
+        if not df_by_lanes[df_name]["index"].isnull().values.any():
+            df_by_lanes[df_name]["index2"] = df_by_lanes[df_name]["index2"].str.cat(df_by_lanes[df_name]["index"], sep="-")
+        df_by_lanes[df_name] = df_by_lanes[df_name].drop("index", axis=1)
     # format two tables in the html with different column headers
     with open(write_to_file, 'w') as _file:
         _file.write("<h2>Lane Summary<h2>" + demux_stats_csv_convert.to_html(index = False, float_format =  '{:,.0f}'.format) + "\n<h2>Top Unknown Barcodes<h2>\n" + "<table>\n" )

From 3c7a34b647a2a4257a93c445e7dd86065e9187b0 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Thu, 16 May 2024 10:15:38 -0400
Subject: [PATCH 68/87] Update dragen_csv_to_html.py

changing code back to original for the index,index2 for creating the lane_barcode.html file
---
 scripts/dragen_csv_to_html.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/dragen_csv_to_html.py b/scripts/dragen_csv_to_html.py
index ee3c0d1..9c871b4 100644
--- a/scripts/dragen_csv_to_html.py
+++ b/scripts/dragen_csv_to_html.py
@@ -25,9 +25,9 @@ def build_lane_summary_html(demux_reports_dir, write_to_file):
     for i in range(1, lane_number + 1):
         df_name = "top_unknown_lane" + str(i)
         df_by_lanes[df_name] = top_unknown_barcodes_csv_covert.loc[top_unknown_barcodes_csv_covert["Lane"] == i]
-        if not df_by_lanes[df_name]["index"].isnull().values.any():
-            df_by_lanes[df_name]["index2"] = df_by_lanes[df_name]["index2"].str.cat(df_by_lanes[df_name]["index"], sep="-")
-        df_by_lanes[df_name] = df_by_lanes[df_name].drop("index", axis=1)
+        if not df_by_lanes[df_name]["index2"].isnull().values.any():
+            df_by_lanes[df_name]["index"] = df_by_lanes[df_name]["index"].str.cat(df_by_lanes[df_name]["index2"], sep="-")
+        df_by_lanes[df_name] = df_by_lanes[df_name].drop("index2", axis=1)
     # format two tables in the html with different column headers
     with open(write_to_file, 'w') as _file:
         _file.write("<h2>Lane Summary<h2>" + demux_stats_csv_convert.to_html(index = False, float_format =  '{:,.0f}'.format) + "\n<h2>Top Unknown Barcodes<h2>\n" + "<table>\n" )

From 3daa6d043dcbd3498f7b0376c087bf402e50ce23 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Mon, 20 May 2024 08:12:42 -0400
Subject: [PATCH 69/87] Update cellranger.py

---
 scripts/cellranger.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/scripts/cellranger.py b/scripts/cellranger.py
index 73b7737..0b7e266 100644
--- a/scripts/cellranger.py
+++ b/scripts/cellranger.py
@@ -55,12 +55,8 @@ def get_tag(recipe):
     tag = "Skip"
     if recipe in CONFIG.COUNT_FLAVORS:
         tag = "count"
-    if recipe in CONFIG.CNV_FLAVORS:
-        tag = "cnv"    
     if recipe in CONFIG.VDJ_FLAVORS:
         tag = "vdj"
-    if recipe in CONFIG.ATAC_FLAVORS:
-        tag = "atac_count"
     if recipe in CONFIG.ARC_FLAVORS:
         tag = "arc"
     if recipe in CONFIG.SPATIAL_FLAVORS:

From e9bb520b9ece7bbfe91e308c5a54ea5ca7af51d1 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Mon, 20 May 2024 08:19:07 -0400
Subject: [PATCH 70/87] update test code according to new recipe

---
 test/SampleSheet_DLP.csv | 38 +++++++++++++++++++-------------------
 test_scripts.py          |  2 +-
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/test/SampleSheet_DLP.csv b/test/SampleSheet_DLP.csv
index 987b100..a811a3f 100644
--- a/test/SampleSheet_DLP.csv
+++ b/test/SampleSheet_DLP.csv
@@ -1,20 +1,20 @@
-[Header],,,,,,,,
-IEMFileVersion,4,,,,,,,
-Date,11/30/2021,,,,,,,
-Workflow,GenerateFASTQ,,,,,,,
-Application,MICHELLE,,,,,,,
-Assay,,,,,,,,
-Description,,,,,,,,
-Chemistry,Default,,,,,,,
-,,,,,,,,
-[Reads],,,,,,,,
-151,,,,,,,,
-151,,,,,,,,
-,,,,,,,,
-[Settings],,,,,,,,
-BarcodeMismatchesIndex1,0,,,,,,,
-BarcodeMismatchesIndex2,0,,,,,,,
-[Data],,,,,,,,
-Lane,Sample_ID,Sample_Plate,Sample_Well,I7_Index_ID,index,index2,Sample_Project,Description
-1,DLPNegativeCONTROL_12345A_3_3_IGO_DLPNegativeCONTROL-2710,Mouse,DLP,DLPi7_03-i5_03,AAGGACAT,AACCCCGT,Project_12345,someone@mskcc.org
+[Header],,,,,,,,
+IEMFileVersion,4,,,,,,,
+Date,11/30/21,,,,,,,
+Workflow,GenerateFASTQ,,,,,,,
+Application,MICHELLE,,,,,,,
+Assay,,,,,,,,
+Description,,,,,,,,
+Chemistry,Default,,,,,,,
+,,,,,,,,
+[Reads],,,,,,,,
+151,,,,,,,,
+151,,,,,,,,
+,,,,,,,,
+[Settings],,,,,,,,
+BarcodeMismatchesIndex1,0,,,,,,,
+BarcodeMismatchesIndex2,0,,,,,,,
+[Data],,,,,,,,
+Lane,Sample_ID,Sample_Plate,Sample_Well,I7_Index_ID,index,index2,Sample_Project,Description
+1,DLPNegativeCONTROL_12345A_3_3_IGO_DLPNegativeCONTROL-2710,Mouse,SC_DLP,DLPi7_03-i5_03,AAGGACAT,AACCCCGT,Project_12345,someone@mskcc.org
 1,IM-1613_RU1697a_IGO_12437_AD_11,Mouse,10X_Genomics,SI-GA-G9,SI-GA-G9,SI-GA-G9,Project_12437_AD,peerd@mskcc.org
\ No newline at end of file
diff --git a/test_scripts.py b/test_scripts.py
index 8feba1f..1152ff1 100644
--- a/test_scripts.py
+++ b/test_scripts.py
@@ -30,7 +30,7 @@ def testCellranger_generate_cellranger_cmd():
 
 def testCellranger_get_tag():
     assert(cellranger.get_tag("10X_genomic") == "Skip")
-    assert(cellranger.get_tag("10X_Genomics_GeneExpression-3") == "count")
+    assert(cellranger.get_tag("SC_Chromium-GEX-3") == "count")
 
 def testCellranger_get_sequencer_runID():
     fastq_path = "/igo/staging/FASTQ/DIANA_0453_AHFKJ5DRXY/Project_06265_AG/Sample_06265_8869_1_IGO_06265_AG_3"

From 2a3904407adf39e3ee458f350d016879d0906619 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Mon, 20 May 2024 10:28:25 -0400
Subject: [PATCH 71/87] Update cellranger_multi.py

---
 scripts/cellranger_multi.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/cellranger_multi.py b/scripts/cellranger_multi.py
index dae2b40..dd79db2 100644
--- a/scripts/cellranger_multi.py
+++ b/scripts/cellranger_multi.py
@@ -418,12 +418,12 @@ def gather_sample_set_info(sample_name):
         for key, value in sample.items():
             if value[0].startswith(ilab_request) and key.endswith(sample_number):
                 value[2] = value[2].split(",")
-                if "10X_Genomics_FeatureBarcoding" in value[2][0]:
+                if "SC_Chromium-FB-5" in value[2][0]:
                     if "Feature Barcoding" in fb_type:
                         sample_set["fb"] = "_IGO_".join([value[1], key])
                     if "Cell Hashing" in fb_type:
                         sample_set["ch"] = "_IGO_".join([value[1], key])
-                if "10X_Genomics_VDJ" in value[2][0]:
+                if "SC_Chromium-BCR" in value[2][0] or "SC_Chromium-TCR" in value[2][0]:
                     sample_set["vdj"] = "_IGO_".join([value[1], key])
     # TODO add vdj type to the whole pipeline
     return sample_set

From d15734f48ecdc43c9f63b582e7b035066877acf5 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 20 May 2024 13:12:42 -0400
Subject: [PATCH 72/87] Update dragen_csv_to_html.py

temporary change to create html file from run demexed with index2 only
---
 scripts/dragen_csv_to_html.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/dragen_csv_to_html.py b/scripts/dragen_csv_to_html.py
index 9c871b4..ee3c0d1 100644
--- a/scripts/dragen_csv_to_html.py
+++ b/scripts/dragen_csv_to_html.py
@@ -25,9 +25,9 @@ def build_lane_summary_html(demux_reports_dir, write_to_file):
     for i in range(1, lane_number + 1):
         df_name = "top_unknown_lane" + str(i)
         df_by_lanes[df_name] = top_unknown_barcodes_csv_covert.loc[top_unknown_barcodes_csv_covert["Lane"] == i]
-        if not df_by_lanes[df_name]["index2"].isnull().values.any():
-            df_by_lanes[df_name]["index"] = df_by_lanes[df_name]["index"].str.cat(df_by_lanes[df_name]["index2"], sep="-")
-        df_by_lanes[df_name] = df_by_lanes[df_name].drop("index2", axis=1)
+        if not df_by_lanes[df_name]["index"].isnull().values.any():
+            df_by_lanes[df_name]["index2"] = df_by_lanes[df_name]["index2"].str.cat(df_by_lanes[df_name]["index"], sep="-")
+        df_by_lanes[df_name] = df_by_lanes[df_name].drop("index", axis=1)
     # format two tables in the html with different column headers
     with open(write_to_file, 'w') as _file:
         _file.write("<h2>Lane Summary<h2>" + demux_stats_csv_convert.to_html(index = False, float_format =  '{:,.0f}'.format) + "\n<h2>Top Unknown Barcodes<h2>\n" + "<table>\n" )

From ac2024c4bb71928b55fb22afa12e3c25c9c69937 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 20 May 2024 13:17:01 -0400
Subject: [PATCH 73/87] Update dragen_csv_to_html.py

change code back
---
 scripts/dragen_csv_to_html.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/dragen_csv_to_html.py b/scripts/dragen_csv_to_html.py
index ee3c0d1..9c871b4 100644
--- a/scripts/dragen_csv_to_html.py
+++ b/scripts/dragen_csv_to_html.py
@@ -25,9 +25,9 @@ def build_lane_summary_html(demux_reports_dir, write_to_file):
     for i in range(1, lane_number + 1):
         df_name = "top_unknown_lane" + str(i)
         df_by_lanes[df_name] = top_unknown_barcodes_csv_covert.loc[top_unknown_barcodes_csv_covert["Lane"] == i]
-        if not df_by_lanes[df_name]["index"].isnull().values.any():
-            df_by_lanes[df_name]["index2"] = df_by_lanes[df_name]["index2"].str.cat(df_by_lanes[df_name]["index"], sep="-")
-        df_by_lanes[df_name] = df_by_lanes[df_name].drop("index", axis=1)
+        if not df_by_lanes[df_name]["index2"].isnull().values.any():
+            df_by_lanes[df_name]["index"] = df_by_lanes[df_name]["index"].str.cat(df_by_lanes[df_name]["index2"], sep="-")
+        df_by_lanes[df_name] = df_by_lanes[df_name].drop("index2", axis=1)
     # format two tables in the html with different column headers
     with open(write_to_file, 'w') as _file:
         _file.write("<h2>Lane Summary<h2>" + demux_stats_csv_convert.to_html(index = False, float_format =  '{:,.0f}'.format) + "\n<h2>Top Unknown Barcodes<h2>\n" + "<table>\n" )

From 0114d986ac2afda070a81b906c31437f582a5ee1 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 20 May 2024 16:03:09 -0400
Subject: [PATCH 74/87] Update dragen_csv_to_html.py

change again for latest demux
---
 scripts/dragen_csv_to_html.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/dragen_csv_to_html.py b/scripts/dragen_csv_to_html.py
index 9c871b4..ee3c0d1 100644
--- a/scripts/dragen_csv_to_html.py
+++ b/scripts/dragen_csv_to_html.py
@@ -25,9 +25,9 @@ def build_lane_summary_html(demux_reports_dir, write_to_file):
     for i in range(1, lane_number + 1):
         df_name = "top_unknown_lane" + str(i)
         df_by_lanes[df_name] = top_unknown_barcodes_csv_covert.loc[top_unknown_barcodes_csv_covert["Lane"] == i]
-        if not df_by_lanes[df_name]["index2"].isnull().values.any():
-            df_by_lanes[df_name]["index"] = df_by_lanes[df_name]["index"].str.cat(df_by_lanes[df_name]["index2"], sep="-")
-        df_by_lanes[df_name] = df_by_lanes[df_name].drop("index2", axis=1)
+        if not df_by_lanes[df_name]["index"].isnull().values.any():
+            df_by_lanes[df_name]["index2"] = df_by_lanes[df_name]["index2"].str.cat(df_by_lanes[df_name]["index"], sep="-")
+        df_by_lanes[df_name] = df_by_lanes[df_name].drop("index", axis=1)
     # format two tables in the html with different column headers
     with open(write_to_file, 'w') as _file:
         _file.write("<h2>Lane Summary<h2>" + demux_stats_csv_convert.to_html(index = False, float_format =  '{:,.0f}'.format) + "\n<h2>Top Unknown Barcodes<h2>\n" + "<table>\n" )

From d4c5e5b6e7beca53b53c708d809ca1376cb7a3d0 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 20 May 2024 16:10:41 -0400
Subject: [PATCH 75/87] Update dragen_csv_to_html.py

change code back to original
---
 scripts/dragen_csv_to_html.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/dragen_csv_to_html.py b/scripts/dragen_csv_to_html.py
index ee3c0d1..9c871b4 100644
--- a/scripts/dragen_csv_to_html.py
+++ b/scripts/dragen_csv_to_html.py
@@ -25,9 +25,9 @@ def build_lane_summary_html(demux_reports_dir, write_to_file):
     for i in range(1, lane_number + 1):
         df_name = "top_unknown_lane" + str(i)
         df_by_lanes[df_name] = top_unknown_barcodes_csv_covert.loc[top_unknown_barcodes_csv_covert["Lane"] == i]
-        if not df_by_lanes[df_name]["index"].isnull().values.any():
-            df_by_lanes[df_name]["index2"] = df_by_lanes[df_name]["index2"].str.cat(df_by_lanes[df_name]["index"], sep="-")
-        df_by_lanes[df_name] = df_by_lanes[df_name].drop("index", axis=1)
+        if not df_by_lanes[df_name]["index2"].isnull().values.any():
+            df_by_lanes[df_name]["index"] = df_by_lanes[df_name]["index"].str.cat(df_by_lanes[df_name]["index2"], sep="-")
+        df_by_lanes[df_name] = df_by_lanes[df_name].drop("index2", axis=1)
     # format two tables in the html with different column headers
     with open(write_to_file, 'w') as _file:
         _file.write("<h2>Lane Summary<h2>" + demux_stats_csv_convert.to_html(index = False, float_format =  '{:,.0f}'.format) + "\n<h2>Top Unknown Barcodes<h2>\n" + "<table>\n" )

From 49420eb70865a2d96049f4a0df82afa8465d77af Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Tue, 21 May 2024 16:23:50 -0400
Subject: [PATCH 76/87] new recipe names

Updating LaunchMetrics and run_param_config to be able to handle new recipe names from sample sheet
---
 scripts/LaunchMetrics.py    |  6 +--
 scripts/run_param_config.py | 83 ++++++++++++++++++++++---------------
 2 files changed, 52 insertions(+), 37 deletions(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index c84d0b2..94ca747 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -16,9 +16,9 @@
 # Global Variable : we do not want to process these experiments in this script
 DO_NOT_PROCESS = ["DLP"]
 # These recipes will be evaluated using DRAGEN because of their larger size of fastqs
-RUN_ON_DRAGEN = ["MissionBio", "SingleCellCNV", "MouseWholeGenome", "HumanWholeGenome", "PombeWholeGenome", "ChIPSeq", "AmpliconSeq"]
+RUN_ON_DRAGEN = ["MissionBio", "SingleCellCNV", "WGS_Deep", "ChIP", "CUT&RUN","Amplicon"]
 # these projects willl only need demux stats
-DEMUX_ONLY = ["SMARTSeq", "10X_Genomics"]
+DEMUX_ONLY = ["SMARTSeq", "Chromium", "10X_Genomics"]
 
 # Organisms to have DRAGEN BAMS
 DRAGEN_RNA_GENOMES = ["GRCh38", "grcm39"]
@@ -85,7 +85,7 @@ def launch_metrics(self, all_samples, run, project_directory):
 				self.dragen(sample, run, sample_parameters, work_directory, dragen_directory, fastq_list)
 				continue
 			# check for methylated samples
-			if ((sample.recipe == "MethylCaptureSeq") or (sample.recipe == "WholeGenomeBisulfiteSequencing")):
+			if ("Methyl" in sample.recipe):
 				pathlib.Path(dragen_directory).mkdir(parents = True, exist_ok = True)
 				self.dragen_methylation(sample, run, sample_parameters, work_directory, dragen_directory, fastq_list)
 				continue
diff --git a/scripts/run_param_config.py b/scripts/run_param_config.py
index 7fa0307..a35ab98 100644
--- a/scripts/run_param_config.py
+++ b/scripts/run_param_config.py
@@ -59,25 +59,25 @@ def get_ordered_dic(unordered_dic):
 		Returns:
 			type, OrderedDict: Ordered dictionary by key-length
 		"""
-		return OrderedDict(sorted(unordered_dic.items(), key=lambda t: -len(t[0])))
+		return OrderedDict(sorted(unordered_dic.items(), key = lambda t: -len(t[0])))
 
 """ Mapping of recipes to their type, default should be DNA """
 recipe_type_mapping_UNORDERED = {
-		"MouseWholeGenome": { TYPE: "WGS" },
-		"PigWholeGenome": { TYPE: "WGS" },
-		"PombeWholeGenome": { TYPE: "WGS" },
-		"ShallowWGS": { TYPE: "WGS" },
-		"10X_Genomics_WGS": { TYPE: "WGS" },
-		"WholeGenomeSequencing": { TYPE: "WGS" },
-		"HumanWholeGenome": { TYPE: "WGS" },
+		# "MouseWholeGenome": { TYPE: "WGS" },
+		# "PigWholeGenome": { TYPE: "WGS" },
+		# "PombeWholeGenome": { TYPE: "WGS" },
+		"WGS_Shallow": { TYPE: "WGS" },
+		# "10X_Genomics_WGS": { TYPE: "WGS" },
+		"WGS_Metagenomic": { TYPE: "WGS" },
+		"WGS_Deep": { TYPE: "WGS" },
 		".*RNA.*": { TYPE: "RNA" },
-		".*96Well_SmartSeq2": { TYPE: "RNA" },
+		# ".*96Well_SmartSeq2": { TYPE: "RNA" },
 		".*SMARTer.*": { TYPE: "RNA" },
-		"FusionDiscoverySeq": { TYPE: "RNA" },
+		# "FusionDiscoverySeq": { TYPE: "RNA" },
 		".*Ribo.*": { TYPE: "RNA" },
-		"SMART-Seq": { TYPE: "RNA" },
+		# "SMART-Seq": { TYPE: "RNA" },
 		"SMARTSeq": { TYPE: "RNA" },
-		".*CDH1_RNA.*": { TYPE: "CAPTURE" },
+		# ".*CDH1_RNA.*": { TYPE: "CAPTURE" },
 		# FOR NEW ENTRIES
 		# "{regex}": { TYPE: type }
 		".*": { TYPE: "DNA" }      # DEFAULT
@@ -97,16 +97,16 @@ def get_ordered_dic(unordered_dic):
 				"RDM": "hg19",
 				"myTYPE_V1": "hg19",
 				"PanCancerV2": "hg19",
-				"MissionBio-Heme": "GRCh38",
+				"User_MissionBio": "GRCh38",
 				"WholeExome_v4": "hg19",
 				"AmpliSeq": "hg19",
 				"HemeBrainPACT_v1": "hg19"
 		},
 		"Mouse": {
 				"M-IMPACT_v1": "mm10",
-				"M-IMPACT_v2": "mm10",
+				"HC_IMPACT-Mouse": "mm10",
 				"Twist_mWES": "mm10",
-				"10X_Genomics_Multiome": "mm10"
+				"SC_Chromium-Multiome": "mm10"
 		}
 }
 """ Mapping of species to their genome-type """
@@ -432,7 +432,8 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "yes",
 				MD: "yes"
 		},
-		"IMPACT505": {
+		"HC_IMPACT": {
+				# IMPACT505
 				# NOTE: interval list file name "IMPACT468_BAITS" is stored in LIMS and passed to pipelines, change file name with caution
 				BAITS: "/igo/home/igo/resources/ilist/hg38/IMPACT505/IMPACT505_BAITS.baits",
 				TARGETS: "/igo/home/igo/resources/ilist/hg38/IMPACT505/IMPACT505_TARGETS.targets",
@@ -445,13 +446,15 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "yes",
 				MD: "yes"
 		},
-		"IMPACT-Heme": {
+		"HC_IMPACT-Heme": {
+				# IMPACT-Heme
 				BAITS: "/igo/home/igo/resources/ilist/hg38/IMPACT-Heme_v2/IMPACT-Heme_v2_BAITS.iList",
 				TARGETS: "/igo/home/igo/resources/ilist/hg38/IMPACT-Heme_v2/IMPACT-Heme_v2_TARGETS.iList",
 				MSKQ: "yes",
 				MD: "yes"
 		},
-		"IMPACT_Heme_v2": {
+		"HC_IMPACT-Heme": {
+				# IMPACT_Heme_v2
 				BAITS: "/igo/home/igo/resources/ilist/hg38/IMPACT-Heme_v2/IMPACT-Heme_v2_BAITS.iList",
 				TARGETS: "/igo/home/igo/resources/ilist/hg38/IMPACT-Heme_v2/IMPACT-Heme_v2_TARGETS.iList",
 				MSKQ: "yes",
@@ -463,7 +466,8 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "yes",
 				MD: "yes"
 		},
-		"M-IMPACT_v2": {
+		"HC_IMPACT-Mouse": {
+				# M-IMPACT_v2
 				BAITS: "/home/igo/resources/BED-Targets/IMPACT/MM_IMPACT/M-IMPACT_v2.baits",
 				TARGETS: "/home/igo/resources/BED-Targets/IMPACT/MM_IMPACT/M-IMPACT_v2.targets",
 				MSKQ: "yes",
@@ -501,10 +505,11 @@ def get_ordered_dic(unordered_dic):
 				MD: "yes"
 		},
 		"IDT_Exome_v2_FP_Viral_Probes": {
-			BAITS: "/igo/home/igo/resources/ilist/hg38/IDT_Exome_v2_FP/IDT_Exome_v2_FP_BAITS.baits",
-			TARGETS: "/igo/home/igo/resources/ilist/hg38/IDT_Exome_v2_FP/IDT_Exome_v2_FP_TARGETS.targets",
-			MSKQ: "no",
-			MD: "yes"
+				# IDT_Exome_v2_FP_Viral_Probes or WES_Human
+				BAITS: "/igo/home/igo/resources/ilist/hg38/IDT_Exome_v2_FP/IDT_Exome_v2_FP_BAITS.baits",
+				TARGETS: "/igo/home/igo/resources/ilist/hg38/IDT_Exome_v2_FP/IDT_Exome_v2_FP_TARGETS.targets",
+				MSKQ: "no",
+				MD: "yes"
 		},
 		"IDT_Exome_v1": {
 				BAITS: "/home/igo/resources/BED-Targets/xgen-exome-research-panel-BAITS.iList",
@@ -548,14 +553,16 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "yes",
 				MD: "yes"
 		},
-		"MSK-ACCESS_v1": {
+		"HC_ACCESS": {
+				# MSK-ACCESS_v1
 				BAITS: "/igo/home/igo/resources/ilist/hg38/MSK-ACCESS-v1/MSK-ACCESS-v1_0-probesAllwFP.baits",
 				TARGETS: "/igo/home/igo/resources/ilist/hg38/MSK-ACCESS-v1/MSK-ACCESS-v1_0-probesAllwFP.targets",
 				MSKQ: "no",
 				MD: "yes",
 				HAPLOTYPE_MAP: "/home/igo/fingerprint_maps/map_files/hg38_no_chr_ACCESS_unordered.map"
 		},
-		"MSK-ACCESS_v2": {
+		"HC_ACCESS": {
+				# MSK-ACCESS_v2
 				BAITS: "/igo/home/igo/resources/ilist/hg38/MSK-ACCESS-v2/MSK-ACCESS-v2_0-probesAllwFP.baits",
 				TARGETS: "/igo/home/igo/resources/ilist/hg38/MSK-ACCESS-v2/MSK-ACCESS-v2_0-probesAllwFP.targets",
 				MSKQ: "no",
@@ -574,7 +581,8 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "no",
 				MD: "yes"
 		},
-		"MissionBio-Heme": {
+		"User_MissionBio": {
+				# MissionBio-Heme
 				BAITS: "/igo/work/nabors/bed_files/Mission_Bio/hg38/MissionBio-Heme_BAITS.iList",
 				TARGETS: "/igo/work/nabors/bed_files/Mission_Bio/hg38/MissionBio-Heme_TARGETS.iList",
 				MSKQ: "no",
@@ -648,25 +656,29 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "no",
 				MD: "yes"
 		},
-		"MethylCaptureSeq": {
+		"Methyl_Capture": {
+				# MethylCaptureSeq
 				BAITS: "/igo/home/igo/resources/ilist/hg38/MethylCaptureSeq/truseq-methyl-capture-epic-manifest-file-hg38.baits.ilist",
 				TARGETS: "/igo/home/igo/resources/ilist/hg38/MethylCaptureSeq/truseq-methyl-capture-epic-manifest-file-hg38.targets.ilist",
 				MSKQ: "no",
 				MD: "yes"
 		},
-		"MSK-CH": {
+		"HC_CMOCH": {
+				# MSK-CH
 				BAITS: "/igo/home/igo/resources/ilist/hg38/CMO-CH/CMO-CH.hg38.baits",
 				TARGETS: "/igo/home/igo/resources/ilist/hg38/CMO-CH/CMO-CH.hg38.targets",
 				MSKQ: "no",
 				MD: "yes"
 		},
-		"CMO-CH": {
+		"HC_CMOCH": {
+				# CMO-CH
 				BAITS: "/igo/home/igo/resources/ilist/hg38/CMO-CH/CMO-CH.hg38.baits",
 				TARGETS: "/igo/home/igo/resources/ilist/hg38/CMO-CH/CMO-CH.hg38.targets",
 				MSKQ: "no",
 				MD: "yes"
 		},
-		"HumanWholeGenome": {
+		"WGS_Deep": {
+				# HumanWholeGenome
 				MSKQ: "no",
 				MD: "yes",
 				HAPLOTYPE_MAP: "", # TODO - Add this
@@ -674,7 +686,8 @@ def get_ordered_dic(unordered_dic):
 				REFERENCE: "/igo/work/genomes/H.sapiens/GRCh38.p13/ncbi-genomes-2021-09-23/GCF_000001405.39_GRCh38.p13_genomic.fna",
 				DGN_REFERENCE: "/staging/ref/hg38_alt_masked_graph_v2+cnv+graph+rna-8-1644018559"
 		},
-		"MouseWholeGenome": {
+		"WGS_Deep": {
+				# MouseWholeGenome
 				MSKQ: "no",
 				MD: "yes"
 				# TODO
@@ -692,7 +705,7 @@ def get_ordered_dic(unordered_dic):
 						# TODO
 						# sh $DIR/../PicardScripts/LaunchPipelines.sh $RUNTYPE --input /igo/work/FASTQ/$RUNNAME/$PROJECT/ --genome $GENOME --type WGS --md $MARKDUPLICATES --mskq $MSKQ
 		},
-		"ShallowWGS": {
+		"WGS_Shallow": {
 				MSKQ: "no",
 				MD: "yes"
 				# TODO
@@ -710,13 +723,15 @@ def get_ordered_dic(unordered_dic):
 				# TODO
 				# sh $DIR/../PicardScripts/LaunchPipelines.sh $RUNTYPE --input /igo/work/FASTQ/$RUNNAME/$PROJECT/ --genome $GENOME --md $MARKDUPLICATES --mskq $MSKQ
 		},
-		"AmpliconSeq": {
+		"DNA_Amplicon": {
+				# AmpliconSeq
 				MSKQ: "no",
 				MD: "yes"
 				# TODO
 				# sh $DIR/../PicardScripts/LaunchPipelines.sh $RUNTYPE --input /igo/work/FASTQ/$RUNNAME/$PROJECT/ --genome $GENOME --md $MARKDUPLICATES --mskq $MSKQ
 		},
-		"CRISPRSeq": {
+		"DNA_CRISPR": {
+				# CRISPRSeq
 				MSKQ: "no",
 				MD: "yes"
 				# TODO

From fcfe5d11fa60a488dfcb6b410b49199e8141826c Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Wed, 22 May 2024 15:01:23 -0400
Subject: [PATCH 77/87] Update LaunchMetrics.py

Added Visium to the list that just processes demuxed reads only
---
 scripts/LaunchMetrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index 94ca747..a200b4b 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -18,7 +18,7 @@
 # These recipes will be evaluated using DRAGEN because of their larger size of fastqs
 RUN_ON_DRAGEN = ["MissionBio", "SingleCellCNV", "WGS_Deep", "ChIP", "CUT&RUN","Amplicon"]
 # these projects willl only need demux stats
-DEMUX_ONLY = ["SMARTSeq", "Chromium", "10X_Genomics"]
+DEMUX_ONLY = ["SMARTSeq", "Chromium", "10X_Genomics", "Visium"]
 
 # Organisms to have DRAGEN BAMS
 DRAGEN_RNA_GENOMES = ["GRCh38", "grcm39"]

From 37a96244d102f69d8e4cca3ab44ecd6357115bae Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Wed, 29 May 2024 08:43:34 -0400
Subject: [PATCH 78/87] Update deliver_pipeline.py

---
 scripts/deliver_pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/deliver_pipeline.py b/scripts/deliver_pipeline.py
index 895dc63..4e0213f 100644
--- a/scripts/deliver_pipeline.py
+++ b/scripts/deliver_pipeline.py
@@ -35,7 +35,7 @@ def deliver_pipeline_output(project, pi, requestName):
     delivery_folder = LAB_SHARE_DIR + "/" + pi + "/Project_" + project + "/pipeline"
 
     if requestName == "RNALibraryPrep":
-        print("Delivering all RNASeq .bams for {} {} {}".format(project, pi, recipe))
+        print("Delivering all RNASeq .bams for {} {} {}".format(project, pi, requestName))
         bamdict = find_bams(project, STATS_DIR)
         bsub_commands =  write_bams_to_share(bamdict, delivery_folder)
         reconcile_bam_fastq_list(project, bamdict)

From a964db0c5bd39ce881fc75393b5b9942a439535c Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Sat, 1 Jun 2024 10:36:53 -0400
Subject: [PATCH 79/87] Update run_param_config.py

needed to update the recipe for Whole Exome Sequencing for Human and Mouse
---
 scripts/run_param_config.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/scripts/run_param_config.py b/scripts/run_param_config.py
index a35ab98..5bf9564 100644
--- a/scripts/run_param_config.py
+++ b/scripts/run_param_config.py
@@ -473,7 +473,8 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "yes",
 				MD: "yes"
 		},
-		"WholeExomeSequencing": {
+		"WES_Human": {
+				# WholeExomeSequencing
 				BAITS: "/igo/home/igo/resources/ilist/hg38/IDT_Exome_v2_FP/IDT_Exome_v2_FP_BAITS.baits",
 				TARGETS: "/igo/home/igo/resources/ilist/hg38/IDT_Exome_v2_FP/IDT_Exome_v2_FP_TARGETS.targets",
 				MSKQ: "no",
@@ -504,7 +505,7 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "no",
 				MD: "yes"
 		},
-		"IDT_Exome_v2_FP_Viral_Probes": {
+		"WES_Human": {
 				# IDT_Exome_v2_FP_Viral_Probes or WES_Human
 				BAITS: "/igo/home/igo/resources/ilist/hg38/IDT_Exome_v2_FP/IDT_Exome_v2_FP_BAITS.baits",
 				TARGETS: "/igo/home/igo/resources/ilist/hg38/IDT_Exome_v2_FP/IDT_Exome_v2_FP_TARGETS.targets",
@@ -612,7 +613,8 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "no",
 				MD: "yes"
 		},
-		"Twist_mWES": {
+		"WES_Mouse": {
+			# Twist_mWES
 			BAITS: "/home/igo/resources/ilist/Twist_mWES/Twist_mWES_BAITS.IntervalList",
 			TARGETS: "/home/igo/resources/ilist/Twist_mWES/Twist_mWES_TARGETS.IntervalList"
 		},

From ef568930f5ce256ce45b36dc4de4ae1f5453aebf Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Sun, 2 Jun 2024 12:49:30 -0400
Subject: [PATCH 80/87] Update run_param_config.py

changing WES_Mouse to point to the Agilent mouse bait set
---
 scripts/run_param_config.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/scripts/run_param_config.py b/scripts/run_param_config.py
index 5bf9564..5cdc458 100644
--- a/scripts/run_param_config.py
+++ b/scripts/run_param_config.py
@@ -105,6 +105,7 @@ def get_ordered_dic(unordered_dic):
 		"Mouse": {
 				"M-IMPACT_v1": "mm10",
 				"HC_IMPACT-Mouse": "mm10",
+				"WES_Mouse": "mm10",
 				"Twist_mWES": "mm10",
 				"SC_Chromium-Multiome": "mm10"
 		}
@@ -493,7 +494,8 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "no",
 				MD: "yes"
 		},
-		"Agilent_MouseAllExonV1": {
+		"WES_Mouse": {
+				# Agilent_MouseAllExonV1
 				BAITS: "/home/igo/resources/BED-Targets/Agilent_MouseAllExonV1_mm10_v1_baits.ilist",
 				TARGETS: "/home/igo/resources/BED-Targets/Agilent_MouseAllExonV1_mm10_v1_targets.ilist",
 				MSKQ: "no",
@@ -613,10 +615,12 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "no",
 				MD: "yes"
 		},
-		"WES_Mouse": {
-			# Twist_mWES
+		"Twist_mWES": {
+			# WES_Mouse ??
 			BAITS: "/home/igo/resources/ilist/Twist_mWES/Twist_mWES_BAITS.IntervalList",
-			TARGETS: "/home/igo/resources/ilist/Twist_mWES/Twist_mWES_TARGETS.IntervalList"
+			TARGETS: "/home/igo/resources/ilist/Twist_mWES/Twist_mWES_TARGETS.IntervalList",
+			MSKQ: "no",
+			MD: "yes"
 		},
 		"Twist_Kentsis_spikeinWES_RK_V3": {
 			BAITS: "/home/igo/resources/ilist/Twist_Kentsis_spikeinWES_RK_V3/Twist_Kentsis_spikeinWES_RK_V3_BAITS.intervalList",

From 6a5be030baefa83964c1f1cb8a12af579c1164e8 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 3 Jun 2024 13:19:30 -0400
Subject: [PATCH 81/87] Update LaunchMetrics.py

changing DLP recipe name to new name to skip and DLP samples
---
 scripts/LaunchMetrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/LaunchMetrics.py b/scripts/LaunchMetrics.py
index a200b4b..6f3c9f7 100644
--- a/scripts/LaunchMetrics.py
+++ b/scripts/LaunchMetrics.py
@@ -14,7 +14,7 @@
 
 
 # Global Variable : we do not want to process these experiments in this script
-DO_NOT_PROCESS = ["DLP"]
+DO_NOT_PROCESS = ["SC_DLP"]
 # These recipes will be evaluated using DRAGEN because of their larger size of fastqs
 RUN_ON_DRAGEN = ["MissionBio", "SingleCellCNV", "WGS_Deep", "ChIP", "CUT&RUN","Amplicon"]
 # these projects willl only need demux stats

From 8407306612f2473bb12758e3034298391ded11d9 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 3 Jun 2024 13:52:23 -0400
Subject: [PATCH 82/87] Update demux_run_dag.py

updated recipes from DLP to SC_DLP
---
 demux_run_dag.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/demux_run_dag.py b/demux_run_dag.py
index aa026bb..c69ccd4 100644
--- a/demux_run_dag.py
+++ b/demux_run_dag.py
@@ -107,7 +107,7 @@ def demux(ds, **kwargs):
     def get_dlp_chip(samplesheet, project):
         samplesheet.df_ss_data.reset_index()
         for index, row in samplesheet.df_ss_data.iterrows():
-            if row['Sample_Well'] == 'DLP' and project == row['Sample_Project']:
+            if row['Sample_Well'] == 'SC_DLP' and project == row['Sample_Project']:
                 # return chip from 071PP_DLP_UNSORTED_128624A_13_12_IGO_09443_CU_1_1_121
                 sample = row['Sample_ID']
                 return get_dlp_chip_from_sample_name(sample)
@@ -135,7 +135,7 @@ def stats(ds, **kwargs):
         if "REFERENCE" in samplesheet_path:
             return "No stats for reference "  + samplesheet_path
 
-        if "DLP" in sample_sheet.recipe_set:
+        if "SC_DLP" in sample_sheet.recipe_set:
             scripts.get_total_reads_from_demux.run_DLP(sample_sheet, sequencer_and_run)
             scripts.upload_stats.upload_stats(sequencer_and_run)
             

From 8dcb8c2b84d58369b7236175e420b2bd51a53de1 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 3 Jun 2024 14:14:40 -0400
Subject: [PATCH 83/87] Update run_param_config.py

let recipes with WES_Mouse use the Twist_mWES bait set
---
 scripts/run_param_config.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/run_param_config.py b/scripts/run_param_config.py
index 5cdc458..61f5609 100644
--- a/scripts/run_param_config.py
+++ b/scripts/run_param_config.py
@@ -494,7 +494,7 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "no",
 				MD: "yes"
 		},
-		"WES_Mouse": {
+		"Agilent_MouseAllExonV1": {
 				# Agilent_MouseAllExonV1
 				BAITS: "/home/igo/resources/BED-Targets/Agilent_MouseAllExonV1_mm10_v1_baits.ilist",
 				TARGETS: "/home/igo/resources/BED-Targets/Agilent_MouseAllExonV1_mm10_v1_targets.ilist",
@@ -615,8 +615,8 @@ def get_ordered_dic(unordered_dic):
 				MSKQ: "no",
 				MD: "yes"
 		},
-		"Twist_mWES": {
-			# WES_Mouse ??
+		"WES_Mouse": {
+			# Twist_mWES ??
 			BAITS: "/home/igo/resources/ilist/Twist_mWES/Twist_mWES_BAITS.IntervalList",
 			TARGETS: "/home/igo/resources/ilist/Twist_mWES/Twist_mWES_TARGETS.IntervalList",
 			MSKQ: "no",

From 6b08ed7c4ee28af3dd1de5b3c37f83578a3c7b65 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 10 Jun 2024 09:44:44 -0400
Subject: [PATCH 84/87] Update SampleSheet.py

updating this script to recognize new SC_DLP name so the sample sheet can split correctly
---
 SampleSheet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/SampleSheet.py b/SampleSheet.py
index 7907ef8..266d9f3 100644
--- a/SampleSheet.py
+++ b/SampleSheet.py
@@ -114,9 +114,9 @@ def split_sample_sheet(self):
         if "SC_DLP" in self.recipe_set and len(self.recipe_set) > 1:
             print("Copying all DLP samples to a new sample sheet")
             # copy all DLP rows to a new sample sheet
-            dlp_data = self.df_ss_data[self.df_ss_data["Sample_Well"].str.match("DLP") == True].copy()
+            dlp_data = self.df_ss_data[self.df_ss_data["Sample_Well"].str.match("SC_DLP") == True].copy()
             # and remove DLP samples from the main sample sheet
-            self.df_ss_data= self.df_ss_data[self.df_ss_data["Sample_Well"].str.match("DLP") == False].copy()
+            self.df_ss_data= self.df_ss_data[self.df_ss_data["Sample_Well"].str.match("SC_DLP") == False].copy()
             # rename DLP sample sheet w/"_DLP.csv"
             dlp_path = os.path.splitext(self.path)[0]+'_DLP.csv'
             header_copy = self.df_ss_header.copy(deep=True)

From 05c46ae5a9a02e827003c23ad62d2b1f7364bdc5 Mon Sep 17 00:00:00 2001
From: luc <cl3262@nyu.edu>
Date: Tue, 11 Jun 2024 11:56:24 -0400
Subject: [PATCH 85/87] add HD image info to visium pipeline

---
 scripts/cellranger.py         | 37 ++++++++++++++++++++---------------
 scripts/cellranger_spatial.py | 18 +++++++++++++++--
 2 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/scripts/cellranger.py b/scripts/cellranger.py
index 0b7e266..18816c2 100644
--- a/scripts/cellranger.py
+++ b/scripts/cellranger.py
@@ -6,6 +6,7 @@
 import json
 import subprocess
 import os.path
+import shutil
 import scripts.get_sequencing_read_data
 import scripts.cellranger_spatial
 import scripts.cellranger_config as CONFIG
@@ -144,18 +145,11 @@ def lanuch_by_project(sequencer_and_run, project, sample_id_list, sample_genome_
     sample_fastqfile_dict = find_fastq_file(sample_id_list)
     send_json = {}
     send_json["samples"] = []
-    # CREATE RUN FOLDER AND PROJECT FOLDER IF NOT ALREADY THERE
-    os.chdir(CONFIG.STATS_AREA)
-    runs = next(os.walk("."))[1]
-    if sequencer_and_run not in runs:
-        os.mkdir(sequencer_and_run, CONFIG.ACCESS)
-                
-    stats_and_run = CONFIG.STATS_AREA + sequencer_and_run
-    os.chdir(stats_and_run)
-    projects = next(os.walk("."))[1]
-    if project not in projects:
-        os.mkdir(project, CONFIG.ACCESS)
-    work_area = stats_and_run + "/" + project + "/" 
+    # CREATE RUN FOLDER AND PROJECT FOLDER IF NOT ALREADY THERE    
+    work_area = CONFIG.STATS_AREA + sequencer_and_run + "/" + project + "/" 
+    if not os.path.exists(work_area):
+        os.makedirs(work_area, CONFIG.ACCESS)
+
     # GO TO project ID LOCATION to start cellranger command
     os.chdir(work_area)
 
@@ -190,15 +184,26 @@ def lanuch_by_project(sequencer_and_run, project, sample_id_list, sample_genome_
                     cmd = "{}--id=Sample_{}{}".format(tool, sample, transcriptome) + "--fastqs=" + ",".join(sample_fastqfile_dict[sample]) + " --cytaimage={} --slide={} --area={}".format(sample_info.tiff_image, sample_info.chip_id, sample_info.chip_position)
                     if sample_genome_dict[sample] == "Human":
                         probe = CONFIG.config_dict[tag]["probe"]["Human_CytAssist"]
-                        cmd = cmd + " --probe-set={}".format(probe)
                     elif sample_genome_dict[sample] == "Mouse":
-                        probe = CONFIG.config_dict[tag]["probe"][sample_genome_dict[sample]]
-                        cmd = cmd + " --probe-set={}".format(probe)
+                        if sample_info.slide.startswith("H1"):
+                            probe = CONFIG.config_dict[tag]["probe"]["Mouse_HD"]
+                        else:
+                            probe = CONFIG.config_dict[tag]["probe"]["Mouse"]
+                    cmd = cmd + " --probe-set={}".format(probe)
                         
                 elif sample_info.preservation == "FFPE":
                     probe = CONFIG.config_dict[tag]["probe"][sample_genome_dict[sample]]
                     cmd = cmd + " --probe-set={}".format(probe)
                 
+                # Eventhough HE image is required internal, the pipeline doesn't need it. Add it if exists
+                if sample_info.HE_tiff_image != "EMPTY":
+                    cmd = cmd + " --image={}".format(sample_info.HE_tiff_image)
+                    # copy microsope image here in sub folder for delivery 
+                    HE_folder_loc = work_area + "Microscope/"
+                    if not os.path.exists(HE_folder_loc):
+                        os.makedirs(HE_folder_loc)
+                    shutil.copy(sample_info.HE_tiff_image , HE_folder_loc)
+                
                 # if there is manual alignment json file availabe, add that to the cmd
                 if sample_info.json != "EMPTY":
                     cmd = cmd + " --loupe-alignment={}".format(sample_info.json)
@@ -206,7 +211,7 @@ def lanuch_by_project(sequencer_and_run, project, sample_id_list, sample_genome_
                 bsub_cmd = "bsub -J {}_{}_{}_SPATIAL -o {}_SPATIAL.out{}{}".format(sequencer_and_run, project, sample, sample, cmd, CONFIG.OPTIONS)
                 print(bsub_cmd)
                 subprocess.run(bsub_cmd, shell=True)
-        
+
         elif tag != "Skip":
             cmd = generate_cellranger_cmd(sample, tag, sample_genome_dict[sample], sample_fastqfile_dict[sample], sequencer_and_run)
             print(cmd)
diff --git a/scripts/cellranger_spatial.py b/scripts/cellranger_spatial.py
index f28e9de..01d905e 100644
--- a/scripts/cellranger_spatial.py
+++ b/scripts/cellranger_spatial.py
@@ -1,4 +1,3 @@
-import pandas as pd
 import os
 import json
 import os.path
@@ -16,6 +15,7 @@ def __init__(self, sample, project_id):
         self.preservation = "EMPTY"
         self.tiff_image = "EMPTY"
         self.json = "EMPTY"
+        self.HE_tiff_image = "EMPTY"
         self.get_info_from_LIMS()
         self.copy_tiff(project_id)
         self.copy_json(project_id)
@@ -33,9 +33,14 @@ def copy_tiff(self, project_id):
         source_loc_dir = CONFIG.original_tiff_images_directory + project_id
         destination_loc = CONFIG.tiff_images_directory + project_id
         destination_file = destination_loc + "/" + self.sample_name + ".tif"
+        destination_HE_loc = destination_loc + "/Microscope"
+        destination_HE_file = destination_HE_loc + "/HE_" + self.sample_name + ".tif"       
         # create TIFF_images director if not exists
         if not os.path.exists(destination_loc):
             os.makedirs(destination_loc)
+        # create microscope image director if not exists
+        if not os.path.exists(destination_HE_loc):
+            os.makedirs(destination_HE_loc)
 
         # copy image file per sample
         original_tiff_image = source_loc_dir + "/" + self.sample_name + ".tif"
@@ -45,7 +50,16 @@ def copy_tiff(self, project_id):
             print("copy {} to {}".format(original_tiff_image, destination_file))
         else:
             print("tif file is not in proper format for sample {}, please check".format(self.IGO_ID))
-            
+
+        # copy HE file per sample if exists
+        original_HE_tiff_image = source_loc_dir + "/Microscope/HE_" + self.sample_name + ".tif"
+        if os.path.isfile(original_HE_tiff_image):
+            shutil.copy(original_HE_tiff_image, destination_HE_file)
+            self.HE_tiff_image = destination_HE_file
+            print("copy {} to {}".format(original_HE_tiff_image, destination_HE_file))
+        else:
+            print("HE tif file does not exist for sample {}, please check".format(self.IGO_ID))
+    
     # copy json file if exists
     def copy_json(self, project_id):
         # project_id format as Project_12345

From b4519accc8986e4d8d174535c6596c0fb6365d6f Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Mon, 17 Jun 2024 09:30:48 -0400
Subject: [PATCH 86/87] Update run_param_config.py

to run WGS metrics on User_WGS recipe
---
 scripts/run_param_config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/run_param_config.py b/scripts/run_param_config.py
index 61f5609..5085ff6 100644
--- a/scripts/run_param_config.py
+++ b/scripts/run_param_config.py
@@ -70,6 +70,7 @@ def get_ordered_dic(unordered_dic):
 		# "10X_Genomics_WGS": { TYPE: "WGS" },
 		"WGS_Metagenomic": { TYPE: "WGS" },
 		"WGS_Deep": { TYPE: "WGS" },
+		"User_WGS": { TYPE: "WGS" },
 		".*RNA.*": { TYPE: "RNA" },
 		# ".*96Well_SmartSeq2": { TYPE: "RNA" },
 		".*SMARTer.*": { TYPE: "RNA" },

From e512fe7c76f7c7bbe824bb9731b354ad978f87e2 Mon Sep 17 00:00:00 2001
From: darrelln32 <darrelln399@gmail.com>
Date: Thu, 20 Jun 2024 10:27:37 -0400
Subject: [PATCH 87/87] Update cellranger_config.py

changing names for ARC/Multiome to match recipe names on sample sheet
---
 scripts/cellranger_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/cellranger_config.py b/scripts/cellranger_config.py
index e4e6105..c41c53e 100644
--- a/scripts/cellranger_config.py
+++ b/scripts/cellranger_config.py
@@ -50,7 +50,7 @@
 # 10X recipe list for different pipelines
 COUNT_FLAVORS = ["SC_Chromium-GEX-3", "SC_Chromium-GEX-5"]
 VDJ_FLAVORS = ["SC_Chromium-TCR", "SC_Chromium-BCR"]
-ARC_FLAVORS = ["SC_Chromium-Multiome", "SC_Chromium-Multiome_ATAC", "SC_Chromium-Multiome_GEX"]
+ARC_FLAVORS = ["SC_Chromium-Multiome", "SC_Chromium-Multiome-ATAC", "SC_Chromium-Multiome-GEX"]
 SPATIAL_FLAVORS = ["ST_Visium"]
 
 # we do not want to PROCESS SAIL (15500) or SCRI (12437) projects