From 1dc96898139a0eb98edeced13ded474db7afe33e Mon Sep 17 00:00:00 2001 From: rnmitchell Date: Fri, 13 Sep 2024 06:55:17 -0400 Subject: [PATCH] initial commit --- lusSTR/cli/__init__.py | 12 +++--- lusSTR/cli/config.py | 89 ++++++++++++++++++++++++--------------- lusSTR/cli/snps.py | 14 +++--- lusSTR/cli/strs.py | 16 +++---- lusSTR/wrappers/filter.py | 4 ++ 5 files changed, 77 insertions(+), 58 deletions(-) diff --git a/lusSTR/cli/__init__.py b/lusSTR/cli/__init__.py index e3a20d5b..c2658b0d 100644 --- a/lusSTR/cli/__init__.py +++ b/lusSTR/cli/__init__.py @@ -8,20 +8,16 @@ from lusSTR.cli import snps from lusSTR.cli import gui -mains = { - "config": config.main, - "strs": strs.main, - "snps": snps.main, - "gui": gui.main -} +mains = {"config": config.main, "strs": strs.main, "snps": snps.main, "gui": gui.main} subparser_funcs = { "config": config.subparser, "strs": strs.subparser, "snps": snps.subparser, - "gui": gui.subparser + "gui": gui.subparser, } + def main(args=None): if args is None: args = get_parser().parse_args() @@ -36,6 +32,7 @@ def main(args=None): result = mainmethod(args) return result + def get_parser(): parser = argparse.ArgumentParser() parser.add_argument( @@ -47,5 +44,6 @@ def get_parser(): func(subparsers) return parser + if __name__ == "__main__": main() diff --git a/lusSTR/cli/config.py b/lusSTR/cli/config.py index fa81c160..7c78abba 100644 --- a/lusSTR/cli/config.py +++ b/lusSTR/cli/config.py @@ -101,83 +101,102 @@ def edit_str_config(config, args): def subparser(subparsers): p = subparsers.add_parser("config", description="Create config file for running STR pipeline") p.add_argument( - "-w", "--workdir", metavar="W", default=".", - help="directory to add config file; default is current working directory") + "-w", + "--workdir", + metavar="W", + default=".", + help="directory to add config file; default is current working directory", + ) p.add_argument( - "-a", "--analysis-software", choices=["uas", "straitrazor", "genemarker"], default="uas", - dest="asoftware", help="Analysis software program used prior to lusSTR. Choices are uas, " - "straitrazor or genemarker. Default is uas." + "-a", + "--analysis-software", + choices=["uas", "straitrazor", "genemarker"], + default="uas", + dest="asoftware", + help="Analysis software program used prior to lusSTR. Choices are uas, " + "straitrazor or genemarker. Default is uas.", ) p.add_argument("--input", help="Input file or directory") p.add_argument("--out", "-o", help="Output file/directory name") p.add_argument( - "--powerseq", action="store_true", - help="Use to indicate sequences were created using the PowerSeq Kit." + "--powerseq", + action="store_true", + help="Use to indicate sequences were created using the PowerSeq Kit.", ) p.add_argument( - "--sex", action="store_true", + "--sex", + action="store_true", help="Use if including the X and Y STR markers. Separate reports for these markers " "will be created.", ) p.add_argument( - "--nocombine", action="store_true", + "--nocombine", + action="store_true", help="Do not combine read counts for duplicate sequences within the UAS region " "during the 'convert' step. By default, read counts are combined for sequences " "not run through the UAS.", ) p.add_argument( - "--reference", action="store_true", - help="Use for creating Reference profiles for STR workflow" + "--reference", + action="store_true", + help="Use for creating Reference profiles for STR workflow", ) p.add_argument( - "--software", choices=["efm", "mpsproto", "strmix"], default="strmix", + "--software", + choices=["efm", "mpsproto", "strmix"], + default="strmix", help="Specify the probabilistic genotyping software package of choice. The final output" - " files will be in the correct format for direct use. Default is strmix." + " files will be in the correct format for direct use. Default is strmix.", ) p.add_argument( - "--str-type", choices=["ce", "ngs", "lusplus"], default="ngs", - dest="datatype", help="Data type for STRs. Options are: CE allele ('ce'), sequence " + "--str-type", + choices=["ce", "ngs", "lusplus"], + default="ngs", + dest="datatype", + help="Data type for STRs. Options are: CE allele ('ce'), sequence " "or bracketed sequence form('ngs'), or LUS+ allele ('lusplus'). Default is 'ngs'.", ) p.add_argument( - "--noinfo", action="store_true", - help="Use to not create the Sequence Information File in the 'filter' step" + "--noinfo", + action="store_true", + help="Use to not create the Sequence Information File in the 'filter' step", ) p.add_argument( - "--separate", action="store_true", + "--separate", + action="store_true", help="Use to separate EFM profiles in the 'filter' step. If specifying for SNPs, " - "each sample will also be separated into 10 different bins for mixture deconvolution." + "each sample will also be separated into 10 different bins for mixture deconvolution.", ) p.add_argument( - "--nofiltering", action="store_true", + "--nofiltering", + action="store_true", help="For STRs, use to perform no filtering during the 'filter' step. For SNPs, " - "only alleles specified as 'Typed' by the UAS will be included at the 'format' step." + "only alleles specified as 'Typed' by the UAS will be included at the 'format' step.", ) p.add_argument( - "--snps", action="store_true", - help="Use to create a config file for the SNP workflow" + "--snps", action="store_true", help="Use to create a config file for the SNP workflow" ) p.add_argument( - "--snp-type", default="all", dest="snptype", + "--snp-type", + default="all", + dest="snptype", help="Specify the type of SNPs to include in the final report. 'p' will include only the " "Phenotype SNPs; 'a' will include only the Ancestry SNPs; 'i' will include only the " "Identity SNPs; and 'all' will include all SNPs. More than one type can be specified (e.g. " - " 'p, a'). Default is all." + " 'p, a'). Default is all.", ) p.add_argument( - "--kintelligence", action="store_true", - help="Use if processing Kintelligence SNPs within a Kintellience Report(s)" + "--kintelligence", + action="store_true", + help="Use if processing Kintelligence SNPs within a Kintellience Report(s)", ) p.add_argument( - "--snp-reference", dest="ref", - help="Specify any references for SNP data for use in EFM." + "--snp-reference", dest="ref", help="Specify any references for SNP data for use in EFM." ) p.add_argument( - "--strand", choices=["uas", "forward"], + "--strand", + choices=["uas", "forward"], help="Specify the strand orientation for the final output files. UAS orientation is " - "default for STRs; forward strand is default for SNPs." - ) - p.add_argument( - "--custom", action="store_true", - help="Specifying custom sequence ranges." + "default for STRs; forward strand is default for SNPs.", ) + p.add_argument("--custom", action="store_true", help="Specifying custom sequence ranges.") diff --git a/lusSTR/cli/snps.py b/lusSTR/cli/snps.py index 7304faa0..edb3a2aa 100644 --- a/lusSTR/cli/snps.py +++ b/lusSTR/cli/snps.py @@ -22,15 +22,15 @@ def main(args): lusSTR.snakefile(workflow="snps"), targets=[pretarget], workdir=workdir, verbose=True ) if result is not True: - raise SystemError('Snakemake failed') + raise SystemError("Snakemake failed") + def subparser(subparsers): - p = subparsers.add_parser( - "snps", description="Running the SNP pipeline" - ) + p = subparsers.add_parser("snps", description="Running the SNP pipeline") p.add_argument( - "target", choices=["format", "all"], + "target", + choices=["format", "all"], help="Steps to run. Specifying 'format' will run only 'format'. Specifying " - "'all' will run all steps of the SNP workflow ('format' and 'convert')." + "'all' will run all steps of the SNP workflow ('format' and 'convert').", ) - p.add_argument("-w", "--workdir", metavar="W", default=".", help="working directory") \ No newline at end of file + p.add_argument("-w", "--workdir", metavar="W", default=".", help="working directory") diff --git a/lusSTR/cli/strs.py b/lusSTR/cli/strs.py index d1f1510e..d5fbaa68 100644 --- a/lusSTR/cli/strs.py +++ b/lusSTR/cli/strs.py @@ -18,20 +18,18 @@ def main(args): pretarget = args.target if args.target != "all" else "filter" workdir = args.workdir - result = snakemake( - lusSTR.snakefile(workflow="strs"), targets=[pretarget], workdir=workdir - ) + result = snakemake(lusSTR.snakefile(workflow="strs"), targets=[pretarget], workdir=workdir) if result is not True: - raise SystemError('Snakemake failed') + raise SystemError("Snakemake failed") + def subparser(subparsers): - p = subparsers.add_parser( - "strs", description="Running the STR pipeline" - ) + p = subparsers.add_parser("strs", description="Running the STR pipeline") p.add_argument( - "target", choices=["format", "convert", "all"], + "target", + choices=["format", "convert", "all"], help="Steps to run. Specifying 'format' will run only 'format'. Specifying " "'convert' will run both 'format' and 'convert'. Specifying 'all' will run " - "all steps of the STR workflow ('format', 'convert' and 'filter')." + "all steps of the STR workflow ('format', 'convert' and 'filter').", ) p.add_argument("-w", "--workdir", metavar="W", default=".", help="working directory") diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py index 9b2342b2..1d5f3f5c 100644 --- a/lusSTR/wrappers/filter.py +++ b/lusSTR/wrappers/filter.py @@ -19,11 +19,13 @@ import math from matplotlib.backends.backend_pdf import PdfPages import matplotlib.pyplot as plt +import mpld3 import numpy as np import os import pandas as pd from pathlib import Path import re +import streamlit.components.v1 as components import sys @@ -425,6 +427,8 @@ def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True): else: title = "Marker Plots for All Alleles With Custom Y-Axis Scale" plt.text(0.4, 0.95, title, transform=fig.transFigure, size=24) + fig_html = mpld3.fig_to_html(fig) + components.html(fig_html, height=600) def get_at(df, locus):