Merge pull request #41 from moka-guys/develop

Develop (#41) Co-Authored-By: rebeccahaines1 <[email protected]> Co-Authored-By: Graeme <[email protected]>
moka-guys · Jun 20, 2024 · 88fab90 · 88fab90
2 parents d28f8ba + b5631dc
commit 88fab90
Show file tree

Hide file tree

Showing 8 changed files with 250 additions and 215 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -3,11 +3,9 @@ FROM python:3.10.6
 LABEL author="Rachel Duffin" \
     maintainer="[email protected]"
 
-RUN mkdir -p /duty_csv /outputs
-ADD ./requirements.txt /duty_csv/
+RUN mkdir /duty_csv/
+COPY . /duty_csv/
+RUN mkdir -p /outputs/
 RUN pip3 install -r /duty_csv/requirements.txt
-ADD ./*.py /duty_csv/
-ADD /.git /duty_csv/
-ADD ./templates/ /duty_csv/templates/
-WORKDIR /outputs
+WORKDIR /outputs/
 ENTRYPOINT [ "python3","/duty_csv/duty_csv.py" ]
diff --git a/Makefile b/Makefile
@@ -10,16 +10,13 @@ IMG           := $(REGISTRY)/$(APP)
 IMG_VERSIONED := $(IMG):$(BUILD)
 IMG_LATEST    := $(IMG):latest
 
-.PHONY: push build version cleanbuild
+.PHONY: push build
 
 push: build
 	docker push $(IMG_VERSIONED)
 	docker push $(IMG_LATEST)
 
-build: version
-	docker buildx build --platform linux/amd64 -t $(IMG_VERSIONED) . || docker build -t $(IMG_VERSIONED) .
+build:
+	docker buildx build --platform linux/amd64 --no-cache -t $(IMG_VERSIONED) . || docker build --no-cache -t $(IMG_VERSIONED) .
 	docker tag $(IMG_VERSIONED) $(IMG_LATEST)
 	docker save $(IMG_VERSIONED) | gzip > $(DIR)/$(REGISTRY)-$(APP):$(BUILD).tar.gz
-
-cleanbuild:
-	docker buildx build --platform linux/amd64 --no-cache -t $(IMG_VERSIONED) .
diff --git a/README.md b/README.md
@@ -2,7 +2,15 @@
 
 This repository processes DNAnexus runfolders, identifying those requiring download to the GSTT network.
 
-The script supports all runtypes. For those runtypes that have downstream outputs requiring download onto the GSTT network, it will generate a CSV file containing URLs for the files requiring download, and attach the CSV file to an email containing instructions on how to download the files to the GSTT network. For those runtypes with no downstream outputs, an email will still be sent but no CSV file will be attached. The email is sent to the bioinformatics shared inbox. Run types are defined in the configuration file.
+The script supports all runtypes. For those runtypes that have downstream outputs requiring download onto the GSTT network, it will generate a CSV and text file containing URLs for the files requiring download, and attach the CSV and text file to an email containing instructions on how to download the files to the GSTT network. For those runtypes with no downstream outputs, an email will still be sent but no CSV or text file will be attached. The email is sent to the bioinformatics shared inbox. Run types are defined in the configuration file.
+
+Currently supports the following run types:
+* ArcherDX
+* OncoDEEP
+* TSO500
+* WES
+* SNP
+* Custom Panels
 
 ## Running the script
 
@@ -42,6 +50,7 @@ export DX_API_TOKEN=$DNANEXUS_AUTH_TOKEN
 The script can then be run as follows:
 
 ```bash
+export DX_API_TOKEN=$TOKEN
 python3 duty_csv.py [-h] -P PROJECT_NAME -I PROJECT_ID -EU EMAIL_USER -PW EMAIL_PW -TP TSO_PANNUMBERS
                    [TSO_PANNUMBERS ...] -SP STG_PANNUMBERS [STG_PANNUMBERS ...] -CP CP_CAPTURE_PANNOS
                    [CP_CAPTURE_PANNOS ...] [-T]
@@ -57,8 +66,9 @@ It is important that any changes to this script are fully tested for integration
 
 ## Outputs
 
-The script has 3 file outputs:
+The script has 4 file outputs:
 * CSV file - contains information required by the [process_duty_csv](https://github.com/moka-guys/Automate_Duty_Process_CSV) script to download the required files output by the pipeline from DNAnexus to the required locations on the GSTT network
+* TXT file - contains commands that can be run in powershell to download the files via Chrome
 * HTML file - this file is the HTMl that is used as the email message contents
 * Log file - contains all log messages from running the script
 
@@ -73,7 +83,7 @@ sudo make build
 The docker image can be run as follows, making sure to supply the DNAnexus authentication token as an environment variable:
 
 ```bash
-sudo docker run -e DX_API_TOKEN=$DNANEXUS_AUTH_TOKEN -v $PATH_TO_OUTPUTS:/outputs seglh/duty_csv:$TAG [-h] -P PROJECT_NAME -I PROJECT_ID -EU EMAIL_USER -PW EMAIL_PW -TP TSO_PANNUMBERS -SP STG_PANNUMBERS -CP CP_CAPTURE_PANNOS
+sudo docker run --rm -e DX_API_TOKEN=$DNANEXUS_AUTH_TOKEN -v $PATH_TO_OUTPUTS:/outputs seglh/duty_csv:$TAG [-h] -P PROJECT_NAME -I PROJECT_ID -EU EMAIL_USER -PW EMAIL_PW -TP TSO_PANNUMBERS -SP STG_PANNUMBERS -CP CP_CAPTURE_PANNOS
 ```
 
 The current and all previous versions of the tool are stored as dockerised versions in 001_ToolsReferenceData project as .tar.gz files.

diff --git a/config.py b/config.py
@@ -31,11 +31,10 @@
 RUNTYPE_IDENTIFIERS = {
     "WES": {"present": ["WES", "NGS"], "absent": []},
     "CustomPanels": {"present": ["NGS"], "absent": ["WES"]},
-    "LRPCR": {"present": ["LRPCR"], "absent": []},
     "SNP": {"present": ["SNP"], "absent": []},
     "TSO500": {"present": ["TSO"], "absent": []},
     "ArcherDX": {"present": ["ADX"], "absent": []},
-    "SWIFT": {"present": ["ONC"], "absent": []},
+    "OncoDEEP": {"present": ["OKD"], "absent": []},
 }
 
 PER_RUNTYPE_DOWNLOADS = {
@@ -45,31 +44,28 @@
             "regex": r"\S+.chanjo_txt$",
         }
     },
-    **dict.fromkeys(
-        ["CustomPanels", "LRPCR"],
-        {
-            "exon_level_coverage": {
-                "folder": "/coverage",
-                "regex": r"\S+.exon_level.txt$",
-            },
-            "rpkm": {
-                "folder": "/conifer_output",
-                "regex": r"combined_bed_summary\S+",
-            },
-            "fh_prs": {
-                "folder": "/PRS_output",
-                "regex": r"\S+.txt$",
-            },
-            "polyedge": {
-                "folder": "/polyedge",
-                "regex": r"\S+_polyedge.pdf$",
-            },
-            "exomedepth": {
-                "folder": "/exomedepth_output",
-                "regex": r"\S+_output.pdf$",
-            },
+    "CustomPanels": {
+        "exon_level_coverage": {
+            "folder": "/coverage",
+            "regex": r"\S+.exon_level.txt$",
+        },
+        "rpkm": {
+            "folder": "/conifer_output",
+            "regex": r"combined_bed_summary\S+",
+        },
+        "fh_prs": {
+            "folder": "/PRS_output",
+            "regex": r"\S+.txt$",
+        },
+        "polyedge": {
+            "folder": "/polyedge",
+            "regex": r"\S+_polyedge.pdf$",
+        },
+        "exomedepth": {
+            "folder": "/exomedepth_output",
+            "regex": r"\S+_output.pdf$",
         },
-    ),
+    },
     "SNP": {
         "vcf": {
             "folder": "/output",
@@ -94,7 +90,7 @@
             "regex": r"^MetricsOutput\S*.tsv$",
         },
     },
-    **dict.fromkeys(["ArcherDX", "SWIFT"], False),
+    **dict.fromkeys(["ArcherDX", "OncoDEEP"], False),
 }
 
 P_BIOINF_TESTING = "P:/Bioinformatics/testing/process_duty_csv"
@@ -108,36 +104,33 @@
                 "subdir": None,
             }
         },
-        **dict.fromkeys(
-            ["CustomPanels", "LRPCR"],
-            {
-                "exon_level_coverage": {
-                    "Via": f"{P_BIOINF_TESTING}/CustomPanels/%s%s/",
-                    "StG": f"{P_BIOINF_TESTING}/StG/%s/",
-                    "subdir": r"coverage/",
-                },
-                "rpkm": {
-                    "Via": f"{P_BIOINF_TESTING}/CustomPanels/%s%s/",
-                    "StG": f"{P_BIOINF_TESTING}/StG/%s/",
-                    "subdir": r"RPKM/",
-                },
-                "fh_prs": {
-                    "Via": f"{P_BIOINF_TESTING}/CustomPanels/%s%s/",
-                    "StG": f"{P_BIOINF_TESTING}/StG/%s/",
-                    "subdir": r"FH_PRS/",
-                },
-                "polyedge": {
-                    "Via": f"{P_BIOINF_TESTING}/CustomPanels/%s%s/",
-                    "StG": f"{P_BIOINF_TESTING}/StG/%s/",
-                    "subdir": r"polyedge/",
-                },
-                "exomedepth": {
-                    "Via": f"{P_BIOINF_TESTING}/CustomPanels/%s%s/",
-                    "StG": f"{P_BIOINF_TESTING}/StG/%s/",
-                    "subdir": r"exomedepth/",
-                },
-            },
-        ),
+        "CustomPanels": {
+            "exon_level_coverage": {
+                "Via": f"{P_BIOINF_TESTING}/CustomPanels/%s%s/",
+                "StG": f"{P_BIOINF_TESTING}/StG/%s/",
+                "subdir": r"coverage/",
+            },
+            "rpkm": {
+                "Via": f"{P_BIOINF_TESTING}/CustomPanels/%s%s/",
+                "StG": f"{P_BIOINF_TESTING}/StG/%s/",
+                "subdir": r"RPKM/",
+            },
+            "fh_prs": {
+                "Via": f"{P_BIOINF_TESTING}/CustomPanels/%s%s/",
+                "StG": f"{P_BIOINF_TESTING}/StG/%s/",
+                "subdir": r"FH_PRS/",
+            },
+            "polyedge": {
+                "Via": f"{P_BIOINF_TESTING}/CustomPanels/%s%s/",
+                "StG": f"{P_BIOINF_TESTING}/StG/%s/",
+                "subdir": r"polyedge/",
+            },
+            "exomedepth": {
+                "Via": f"{P_BIOINF_TESTING}/CustomPanels/%s%s/",
+                "StG": f"{P_BIOINF_TESTING}/StG/%s/",
+                "subdir": r"exomedepth/",
+            },
+        },
         "SNP": {
             "vcf": {
                 "Via": f"{P_BIOINF_TESTING}/SNP/VCFs_Andrew/",
@@ -177,36 +170,33 @@
                 "subdir": None,
             },
         },
-        **dict.fromkeys(
-            ["CustomPanels", "LRPCR"],
-            {
-                "exon_level_coverage": {
-                    "Via": "P:/DNA LAB/Current/NGS worksheets/%s%s/",
-                    "StG": "P:/DNA LAB/StG SFTP/StG SFTP outgoing/%s/",
-                    "subdir": r"coverage/",
-                },
-                "rpkm": {
-                    "Via": "P:/DNA LAB/Current/NGS worksheets/%s%s/",
-                    "StG": "P:/DNA LAB/StG SFTP/StG SFTP outgoing/%s/",
-                    "subdir": r"RPKM/",
-                },
-                "fh_prs": {
-                    "Via": "P:/DNA LAB/Current/NGS worksheets/%s%s/",
-                    "StG": "P:/DNA LAB/StG SFTP/StG SFTP outgoing/%s/",
-                    "subdir": r"FH_PRS/",
-                },
-                "polyedge": {
-                    "Via": "P:/DNA LAB/Current/NGS worksheets/%s%s/",
-                    "StG": "P:/DNA LAB/StG SFTP/StG SFTP outgoing/%s/",
-                    "subdir": r"polyedge/",
-                },
-                "exomedepth": {
-                    "Via": "P:/DNA LAB/Current/NGS worksheets/%s%s/",
-                    "StG": "P:/DNA LAB/StG SFTP/StG SFTP outgoing/%s/",
-                    "subdir": r"exomedepth/",
-                },
-            },
-        ),
+        "CustomPanels": {
+            "exon_level_coverage": {
+                "Via": "P:/DNA LAB/Current/NGS worksheets/%s%s/",
+                "StG": "P:/DNA LAB/StG SFTP/StG SFTP outgoing/%s/",
+                "subdir": r"coverage/",
+            },
+            "rpkm": {
+                "Via": "P:/DNA LAB/Current/NGS worksheets/%s%s/",
+                "StG": "P:/DNA LAB/StG SFTP/StG SFTP outgoing/%s/",
+                "subdir": r"RPKM/",
+            },
+            "fh_prs": {
+                "Via": "P:/DNA LAB/Current/NGS worksheets/%s%s/",
+                "StG": "P:/DNA LAB/StG SFTP/StG SFTP outgoing/%s/",
+                "subdir": r"FH_PRS/",
+            },
+            "polyedge": {
+                "Via": "P:/DNA LAB/Current/NGS worksheets/%s%s/",
+                "StG": "P:/DNA LAB/StG SFTP/StG SFTP outgoing/%s/",
+                "subdir": r"polyedge/",
+            },
+            "exomedepth": {
+                "Via": "P:/DNA LAB/Current/NGS worksheets/%s%s/",
+                "StG": "P:/DNA LAB/StG SFTP/StG SFTP outgoing/%s/",
+                "subdir": r"exomedepth/",
+            },
+        },
         "SNP": {
             "vcf": {
                 "Via": "P:/Bioinformatics/VCFs_Andrew/",
@@ -235,7 +225,7 @@
                 "StG": False,
                 "subdir": r"metrics/",
             },
-            **dict.fromkeys(["ArcherDX", "SWIFT"], False),
+            **dict.fromkeys(["ArcherDX", "OncoDEEP"], False),
         },
     },
 }