diff --git a/README.md b/README.md index 088102f4..9a36728a 100755 --- a/README.md +++ b/README.md @@ -52,7 +52,10 @@ Once lusSTR has been installed, the GUI can be started with the command: ``` lusstr gui ``` -All lusSTR settings for either the STR pipeline or the SNP pipeline can be specified after selecting the desired pipeline tab. +All lusSTR settings for either the STR pipeline or the SNP pipeline can be specified after selecting the desired pipeline tab. + +The GUI provides an option to view marker plots interactively and manually edit the sequence type after lusSTR is complete. +There is also an option to upload previously run data to view. See the last STR section (```View and Edit lusSTR data```) for more information. ____ ## *Command line interface* @@ -210,8 +213,24 @@ When using STRmix data, the data type can be specified using the ```data-type``` Finally, output files are created for direct use in EuroForMix (EFM), MPSproto or STRmix. If EFM or MPSproto is specified, a single file is created containing all samples in the input file (however, separate output files for each sample can be created with the ```separate``` setting specified in the config file). If STRmix is specified, a directory containing files for each individual sample is created. The ```profile-type``` setting allows for the creation of either a ```reference``` or ```evidence``` profile. Both EuroForMix/MPSproto and STRmix require different formatting depending on the type of sample. +--- + +### View and Edit lusSTR data + +The ```See Individual Marker Plots & Data``` button at the bottom of the lusSTR STR page can be run either directly after a lusSTR run (e.g. the settings have already been specified) or the user can upload previously-run lusSTR data by uploading the lusSTR output folder using the ```Select an Output Folder``` button. If using previously run data, the configuration settings within the ```config.yaml``` file in that folder will automatically be loaded and used. If other settings are desired, please re-run lusSTR using the new settings. + +Once the ```See Individual Marker Plots & Data``` button has been pressed, the sample of interest can be selected from the dropdown menu (if multiple samples are present). The first screen contains the marker plots for all autosomal STRs. Individual markers can be selected from the dropdown menu. Markers with potential issues are flagged with a caution symbol. + +On the individual marker pages, the ```allele_type``` column can be changed (no other column in the data table can be edited). A dropdown menu will appear to change the assigned allele type for a specific sequence. If a type is changed, the page will automatically refresh and the marker plot will be updated with the edited changes. The user may edit multiple markers and samples. + +Once all desired changes are made, selecting the "Save Edits" button will create the following files containing the changes: + 1. New final lusSTR output files (either ```STRmix``` or ```EFM``` files as generated previously with lusSTR) + 2. New allele information file + 3. New PDFs of the marker plots +The new files are automatically generated into a folder labeled as: ```edited_date_time``` (e.g. ```edited_10042024_16_29_50```) ___ + ## SNP Data Processing lusSTR is able to process SNPs derived from the ForenSeq Signature Prep assay and the ForenSeq Kintelligence assay. SNPs from the ForenSeq Signature Prep assay could be analyzed using either the Verogen UAS or STRait Razor. SNPs from the ForenSeq Kintelligence assay must first be analyzed using the UAS. diff --git a/lusSTR/cli/__init__.py b/lusSTR/cli/__init__.py index e3a20d5b..c2658b0d 100644 --- a/lusSTR/cli/__init__.py +++ b/lusSTR/cli/__init__.py @@ -8,20 +8,16 @@ from lusSTR.cli import snps from lusSTR.cli import gui -mains = { - "config": config.main, - "strs": strs.main, - "snps": snps.main, - "gui": gui.main -} +mains = {"config": config.main, "strs": strs.main, "snps": snps.main, "gui": gui.main} subparser_funcs = { "config": config.subparser, "strs": strs.subparser, "snps": snps.subparser, - "gui": gui.subparser + "gui": gui.subparser, } + def main(args=None): if args is None: args = get_parser().parse_args() @@ -36,6 +32,7 @@ def main(args=None): result = mainmethod(args) return result + def get_parser(): parser = argparse.ArgumentParser() parser.add_argument( @@ -47,5 +44,6 @@ def get_parser(): func(subparsers) return parser + if __name__ == "__main__": main() diff --git a/lusSTR/cli/config.py b/lusSTR/cli/config.py index fa81c160..7c78abba 100644 --- a/lusSTR/cli/config.py +++ b/lusSTR/cli/config.py @@ -101,83 +101,102 @@ def edit_str_config(config, args): def subparser(subparsers): p = subparsers.add_parser("config", description="Create config file for running STR pipeline") p.add_argument( - "-w", "--workdir", metavar="W", default=".", - help="directory to add config file; default is current working directory") + "-w", + "--workdir", + metavar="W", + default=".", + help="directory to add config file; default is current working directory", + ) p.add_argument( - "-a", "--analysis-software", choices=["uas", "straitrazor", "genemarker"], default="uas", - dest="asoftware", help="Analysis software program used prior to lusSTR. Choices are uas, " - "straitrazor or genemarker. Default is uas." + "-a", + "--analysis-software", + choices=["uas", "straitrazor", "genemarker"], + default="uas", + dest="asoftware", + help="Analysis software program used prior to lusSTR. Choices are uas, " + "straitrazor or genemarker. Default is uas.", ) p.add_argument("--input", help="Input file or directory") p.add_argument("--out", "-o", help="Output file/directory name") p.add_argument( - "--powerseq", action="store_true", - help="Use to indicate sequences were created using the PowerSeq Kit." + "--powerseq", + action="store_true", + help="Use to indicate sequences were created using the PowerSeq Kit.", ) p.add_argument( - "--sex", action="store_true", + "--sex", + action="store_true", help="Use if including the X and Y STR markers. Separate reports for these markers " "will be created.", ) p.add_argument( - "--nocombine", action="store_true", + "--nocombine", + action="store_true", help="Do not combine read counts for duplicate sequences within the UAS region " "during the 'convert' step. By default, read counts are combined for sequences " "not run through the UAS.", ) p.add_argument( - "--reference", action="store_true", - help="Use for creating Reference profiles for STR workflow" + "--reference", + action="store_true", + help="Use for creating Reference profiles for STR workflow", ) p.add_argument( - "--software", choices=["efm", "mpsproto", "strmix"], default="strmix", + "--software", + choices=["efm", "mpsproto", "strmix"], + default="strmix", help="Specify the probabilistic genotyping software package of choice. The final output" - " files will be in the correct format for direct use. Default is strmix." + " files will be in the correct format for direct use. Default is strmix.", ) p.add_argument( - "--str-type", choices=["ce", "ngs", "lusplus"], default="ngs", - dest="datatype", help="Data type for STRs. Options are: CE allele ('ce'), sequence " + "--str-type", + choices=["ce", "ngs", "lusplus"], + default="ngs", + dest="datatype", + help="Data type for STRs. Options are: CE allele ('ce'), sequence " "or bracketed sequence form('ngs'), or LUS+ allele ('lusplus'). Default is 'ngs'.", ) p.add_argument( - "--noinfo", action="store_true", - help="Use to not create the Sequence Information File in the 'filter' step" + "--noinfo", + action="store_true", + help="Use to not create the Sequence Information File in the 'filter' step", ) p.add_argument( - "--separate", action="store_true", + "--separate", + action="store_true", help="Use to separate EFM profiles in the 'filter' step. If specifying for SNPs, " - "each sample will also be separated into 10 different bins for mixture deconvolution." + "each sample will also be separated into 10 different bins for mixture deconvolution.", ) p.add_argument( - "--nofiltering", action="store_true", + "--nofiltering", + action="store_true", help="For STRs, use to perform no filtering during the 'filter' step. For SNPs, " - "only alleles specified as 'Typed' by the UAS will be included at the 'format' step." + "only alleles specified as 'Typed' by the UAS will be included at the 'format' step.", ) p.add_argument( - "--snps", action="store_true", - help="Use to create a config file for the SNP workflow" + "--snps", action="store_true", help="Use to create a config file for the SNP workflow" ) p.add_argument( - "--snp-type", default="all", dest="snptype", + "--snp-type", + default="all", + dest="snptype", help="Specify the type of SNPs to include in the final report. 'p' will include only the " "Phenotype SNPs; 'a' will include only the Ancestry SNPs; 'i' will include only the " "Identity SNPs; and 'all' will include all SNPs. More than one type can be specified (e.g. " - " 'p, a'). Default is all." + " 'p, a'). Default is all.", ) p.add_argument( - "--kintelligence", action="store_true", - help="Use if processing Kintelligence SNPs within a Kintellience Report(s)" + "--kintelligence", + action="store_true", + help="Use if processing Kintelligence SNPs within a Kintellience Report(s)", ) p.add_argument( - "--snp-reference", dest="ref", - help="Specify any references for SNP data for use in EFM." + "--snp-reference", dest="ref", help="Specify any references for SNP data for use in EFM." ) p.add_argument( - "--strand", choices=["uas", "forward"], + "--strand", + choices=["uas", "forward"], help="Specify the strand orientation for the final output files. UAS orientation is " - "default for STRs; forward strand is default for SNPs." - ) - p.add_argument( - "--custom", action="store_true", - help="Specifying custom sequence ranges." + "default for STRs; forward strand is default for SNPs.", ) + p.add_argument("--custom", action="store_true", help="Specifying custom sequence ranges.") diff --git a/lusSTR/cli/gui.py b/lusSTR/cli/gui.py index 7c531abd..e2fc94b0 100644 --- a/lusSTR/cli/gui.py +++ b/lusSTR/cli/gui.py @@ -13,8 +13,15 @@ # Importing Necessary Packages # ################################################################# +from datetime import datetime import json import importlib.resources +from lusSTR.wrappers.filter import get_at, EFM_output, marker_plots, make_plot, STRmix_output +import math +import numpy as np +import pandas as pd +from pathlib import Path +import plotly.express as px import streamlit as st from streamlit_option_menu import option_menu import yaml @@ -27,6 +34,15 @@ # Functions # ################################################################# + +def get_filter_metadata_file(): + return importlib.resources.files("lusSTR") / "data/filters.json" + + +with open(get_filter_metadata_file(), "r") as fh: + filter_marker_data = json.load(fh) + + # ------------ Function to Generate config.yaml File ---------- # @@ -155,8 +171,10 @@ def show_home_page(): st.markdown( """ - lusSTR is an end-to-end workflow for processing human forensic data (STRs and SNPs) derived from Next Generation Sequencing (NGS) data for use in probabilistic genotyping software. - For more information on lusSTR, visit our [GitHub page](https://github.com/bioforensics/lusSTR). + lusSTR is an end-to-end workflow for processing human forensic data (STRs and SNPs) + derived from Next Generation Sequencing (NGS) data for use in probabilistic genotyping + software. For more information on lusSTR, visit our + [GitHub page](https://github.com/bioforensics/lusSTR). """, unsafe_allow_html=True, ) @@ -164,6 +182,239 @@ def show_home_page(): st.info("Please Select One of the Tabs Above to Get Started on Processing Your Data!") +def df_on_change(locus): + state = st.session_state[f"{locus}_edited"] + for index, updates in state["edited_rows"].items(): + st.session_state[locus].loc[st.session_state[locus].index == index, "edited"] = True + for key, value in updates.items(): + st.session_state[locus].loc[st.session_state[locus].index == index, key] = value + + +def interactive_plots_allmarkers(sample_df, flagged_df): + cols = st.columns(4) + max_reads = max(sample_df["Reads"]) + n = 100 if max_reads > 1000 else 10 + max_yvalue = int(math.ceil(max_reads / n)) * n + increase_value = int(math.ceil((max_yvalue / 5)) / n) * n + n = 0 + for marker in sample_df["Locus"].unique(): + col = cols[n] + container = col.container(border=True) + sample_locus = sample_df["SampleID"].unique() + "_" + marker + marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele") + if sample_locus in flagged_df["key"].values: + marker = f"⚠️{marker}⚠️" + plot = interactive_plots(marker_df, marker, max_yvalue, increase_value, all=True) + container.plotly_chart(plot, use_container_width=True) + if n == 3: + n = 0 + else: + n += 1 + + +def interactive_plots(df, locus, ymax, increase, all=False): + if "⚠️" in locus: + locus_at = locus.replace("⚠️", "") + else: + locus_at = locus + at = get_at(df, locus_at) + for i, row in df.iterrows(): + if "stutter" in df.loc[i, "allele_type"]: + df.loc[i, "Label"] = "Stutter" + else: + df.loc[i, "Label"] = df.loc[i, "allele_type"] + min_x = round(min(df["CE_Allele"]) - 1) + max_x = round(max(df["CE_Allele"]) + 1) + plot = px.bar( + df, + x="CE_Allele", + y="Reads", + color="Label", + color_discrete_map={ + "Typed": "green", + "BelowAT": "red", + "Stutter": "blue", + "Deleted": "purple", + }, + title=locus, + ) + plot.add_hline(y=at, line_width=3, line_dash="dot", line_color="gray") + plot.add_annotation(text=f"AT", x=min_x + 0.1, y=at, showarrow=False, yshift=10) + plot.update_layout( + xaxis=dict(range=[min_x, max_x], tickmode="array", tickvals=np.arange(min_x, max_x, 1)) + ) + if all: + plot.update_layout( + yaxis=dict(range=[0, ymax], tickmode="array", tickvals=np.arange(0, ymax, increase)) + ) + return plot + + +def remake_final_files(full_df, outpath): + if st.session_state.custom_ranges: + seq_col = "Custom_Range_Sequence" + brack_col = "Custom_Bracketed_Notation" + else: + seq_col = ( + "UAS_Output_Sequence" + if st.session_state.strand == "uas" + else "Forward_Strand_Sequence" + ) + brack_col = ( + "UAS_Output_Bracketed_Notation" + if st.session_state.strand == "uas" + else "Forward_Strand_Bracketed_Notation" + ) + if st.session_state.nofilters: + full_df["allele_type"] = "Typed" + if st.session_state.output_type == "efm" or st.session_state.output_type == "mpsproto": + EFM_output( + full_df, + outpath, + st.session_state.profile_type, + st.session_state.data_type, + brack_col, + st.session_state.sex, + st.session_state.separate, + ) + else: + STRmix_output( + full_df, outpath, st.session_state.profile_type, st.session_state.data_type, seq_col + ) + + +def interactive_setup(df1, file): + col1, col2, col3, col4, col5 = st.columns(5) + sample = col1.selectbox("Select Sample:", options=df1["SampleID"].unique()) + sample_df = df1[df1["SampleID"] == sample].reset_index(drop=True) + locus_list = pd.concat([pd.Series("All Markers"), sample_df["Locus"].drop_duplicates()]) + if os.path.isfile(f"{file}_Flagged_Loci.csv"): + flags = pd.read_csv(f"{file}_Flagged_Loci.csv") + else: + flags = pd.DataFrame(columns=["key", "SampleID", "Locus"]) + flags["key"] = flags["SampleID"] + "_" + flags["Locus"] + flags_sample = flags[flags["SampleID"] == sample].reset_index(drop=True) + for flagged_locus in flags_sample["Locus"].unique(): + locus_list = locus_list.str.replace(flagged_locus, f"⚠️{flagged_locus}⚠️") + locus = col2.selectbox("Select Marker:", options=locus_list) + if "⚠️" in locus: + locus = locus.replace("⚠️", "") + if locus == "All Markers": + if not flags_sample.empty: + st.write( + f"⚠️ indicates potential problems with the marker. Examine the individual marker " + f"plots for more information." + ) + interactive_plots_allmarkers(sample_df, flags) + else: + locus_key = f"{sample}_{locus}" + if locus_key not in st.session_state: + st.session_state[locus_key] = sample_df[sample_df["Locus"] == locus].reset_index( + drop=True + ) + Type = [ + "Deleted", + "Typed", + "-1_stutter", + "-2_stutter", + "BelowAT", + "-1_stutter/+1_stutter", + "+1_stutter", + ] + plot = interactive_plots(st.session_state[locus_key], locus, None, None) + st.plotly_chart(plot, use_container_width=True) + col1, col2, col3 = st.columns(3) + if locus_key in flags["key"].values: + locus_flags = flags[flags["key"] == locus_key] + for flag in locus_flags["Flags"].unique(): + col2.write(f"⚠️ Potential issue: {flag} identified!") + st.data_editor( + data=st.session_state[locus_key], + disabled=( + "SampleID", + "Locus", + "UAS_Output_Sequence", + "CE_Allele", + "UAS_Output_Bracketed_Notation", + "Custom_Range_Sequence", + "Custom_Bracketed_Notation", + "Reads", + "parent_allele1", + "parent_allele2", + "allele1_ref_reads", + "allele2_ref_reads", + "perc_noise", + "perc_stutter", + ), + column_config={ + "allele_type": st.column_config.SelectboxColumn("allele_type", options=Type) + }, + hide_index=True, + key=f"{locus_key}_edited", + on_change=df_on_change, + args=(locus_key,), + ) + if st.button("Save Edits"): + ph = st.empty() + with ph.container(): + st.write("Saving Changes - May take a minute or two.") + combined_df = pd.DataFrame() + for sample in df1["SampleID"].unique(): + sample_df = df1[df1["SampleID"] == sample].reset_index(drop=True) + for locus in sample_df["Locus"].unique(): + locus_key = f"{sample}_{locus}" + try: + combined_df = pd.concat([combined_df, st.session_state[locus_key]]) + except KeyError: + combined_df = pd.concat( + [ + combined_df, + sample_df[sample_df["Locus"] == locus].reset_index(drop=True), + ] + ) + now = datetime.now() + dt = now.strftime("%m%d%Y_%H_%M_%S") + del combined_df["Label"] + Path(f"{st.session_state.wd_dirname}/{st.session_state.output}/edited_{dt}").mkdir( + parents=True, exist_ok=True + ) + outpath = f"{st.session_state.wd_dirname}/{st.session_state.output}/edited_{dt}/" + marker_plots(combined_df, f"{st.session_state.output}_edited_{dt}", sex=False, wd=outpath) + combined_df.to_csv( + f"{st.session_state.wd_dirname}/{st.session_state.output}/edited_{dt}/" + f"{st.session_state.output}_sequence_info_edited_{dt}.csv", + index=False, + ) + new_text = ( + f"Changes saved to {st.session_state.wd_dirname}/{st.session_state.output}" + f"/edited_{dt}/{st.session_state.output}_sequence_info_edited_{dt}.csv" + f"New {st.session_state.output_type} files created in {st.session_state.wd_dirname}" + f"/{st.session_state.output}/edited_{dt}/ folder" + ) + remake_final_files(combined_df, outpath) + ph.empty() + with ph.container(): + st.write( + f"New files and marker plots with edits saved to {st.session_state.wd_dirname}/" + f"{st.session_state.output}/edited_{dt}/" + ) + + +def create_settings(): + if os.path.isfile(f"{st.session_state.wd_dirname}/config.yaml"): + st.write(f"Loading settings from {st.session_state.wd_dirname}/config.yaml") + with open(f"{st.session_state.wd_dirname}/config.yaml", "r") as file: + config_settings = yaml.safe_load(file) + st.session_state.output = config_settings["output"] + st.session_state.custom_ranges = config_settings["custom_ranges"] + st.session_state.profile_type = config_settings["profile_type"] + st.session_state.data_type = config_settings["data_type"] + st.session_state.sex = config_settings["sex"] + st.session_state.separate = config_settings["separate"] + st.session_state.strand = config_settings["strand"] + st.session_state.output_type = config_settings["output_type"] + + ##################################################################### # STR WORKFLOW # ##################################################################### @@ -177,7 +428,8 @@ def show_STR_page(): st.title("STR Workflow") st.info( - "Please Select STR Settings Below for lusSTR! For Information Regarding the Settings, See the How to Use Tab." + "Please Select STR Settings Below for lusSTR! For Information Regarding the " + "Settings, See the How to Use Tab." ) # Input File Specification @@ -185,7 +437,8 @@ def show_STR_page(): # Ask user if submitting a directory or individual file st.info( - "Please Indicate If You Are Providing An Individual Input File or a Folder Containing Multiple Input Files" + "Please Indicate If You Are Providing An Individual Input File or a Folder Containing " + "Multiple Input Files" ) input_option = st.radio( "Select Input Option:", ("Individual File", "Folder with Multiple Files") @@ -198,13 +451,13 @@ def show_STR_page(): # Logic for Path Picker based on user's input option if input_option == "Folder with Multiple Files": - clicked = st.button("Please Select a Folder") + clicked = st.button("Select a Folder") if clicked: dirname = folder_picker_dialog() st.session_state.samp_input = dirname else: - clicked_file = st.button("Please Select a File") + clicked_file = st.button("Select a File") if clicked_file: filename = file_picker_dialog() st.session_state.samp_input = filename @@ -216,6 +469,30 @@ def show_STR_page(): # Store the Selected Path to Reference in Config samp_input = st.session_state.samp_input + ##################################################################### + # STR: Specify Working Directory # + ##################################################################### + + st.subheader("Output Folder Selection") + + col1, col2, col3, col4, col5 = st.columns(5) + + # Initialize session state if not already initialized + if "wd_dirname" not in st.session_state: + st.session_state.wd_dirname = None + + clicked_wd = col1.button("Select An Output Folder") + if clicked_wd: + wd = folder_picker_dialog() + st.session_state.wd_dirname = wd + + # Display selected path + if st.session_state.wd_dirname: + st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname) + + # Store Selected Path to Reference in Config + wd_dirname = st.session_state.wd_dirname + ##################################################################### # STR: General Software Settings to Generate Config File # ##################################################################### @@ -224,7 +501,10 @@ def show_STR_page(): col1, col2, col3, col4, col5 = st.columns(5) - analysis_software = { + if "analysis_software" not in st.session_state: + st.session_state.analysis_software = None + + st.session_state.analysis_software = { "UAS": "uas", "STRait Razor v3": "straitrazor", "GeneMarker HTS": "genemarker", @@ -236,17 +516,27 @@ def show_STR_page(): ) ] - custom_ranges = st.checkbox( + if "custom_ranges" not in st.session_state: + st.session_state.custom_ranges = None + + st.session_state.custom_ranges = st.checkbox( "Use Custom Sequence Ranges", - help="Check the box to use the specified custom sequence ranges as defined in the str_markers.json file.", + help="Check the box to use the specified custom sequence ranges as defined in the " + "str_markers.json file.", ) - sex = st.checkbox( + if "sex" not in st.session_state: + st.session_state.sex = None + + st.session_state.sex = st.checkbox( "Include X- and Y-STRs", help="Check the box to include X- and Y-STRs, otherwise leave unchecked.", ) - kit = {"ForenSeq Signature Prep": "forenseq", "PowerSeq 46GY": "powerseq"}[ + if "kit" not in st.session_state: + st.session_state.kit = None + + st.session_state.kit = {"ForenSeq Signature Prep": "forenseq", "PowerSeq 46GY": "powerseq"}[ col2.selectbox( "Library Preparation Kit", options=["ForenSeq Signature Prep", "PowerSeq 46GY"], @@ -254,15 +544,24 @@ def show_STR_page(): ) ] - output = col3.text_input( + if "output" not in st.session_state: + st.session_state.output = None + + st.session_state.output = col3.text_input( "Output File Name", "lusstr_output", - help="Please specify a name for the created files. It can only contain alphanumeric characters, underscores and hyphens. No spaces allowed.", + help="Please specify a name for the created files. It can only contain alphanumeric " + "characters, underscores and hyphens. No spaces allowed.", ) - nocombine = st.checkbox( + if "nocombine" not in st.session_state: + st.session_state.nocombine = None + + st.session_state.nocombine = st.checkbox( "Do Not Combine Identical Sequences", - help="If using STRait Razor data, by default, identical sequences (after removing flanking sequences) are combined and reads are summed. Checking this will not combine identical sequences.", + help="If using STRait Razor data, by default, identical sequences (after removing " + "flanking sequences) are combined and reads are summed. Checking this will not combine" + " identical sequences.", ) ##################################################################### @@ -273,7 +572,14 @@ def show_STR_page(): col1, col2, col3, col4, col5 = st.columns(5) - output_type = {"STRmix": "strmix", "EuroForMix": "efm", "MPSproto": "mpsproto"}[ + if "output_type" not in st.session_state: + st.session_state.output_type = None + + st.session_state.output_type = { + "STRmix": "strmix", + "EuroForMix": "efm", + "MPSproto": "mpsproto", + }[ col1.selectbox( "Probabilistic Genotyping Software", options=["STRmix", "EuroForMix", "MPSproto"], @@ -281,84 +587,95 @@ def show_STR_page(): ) ] - profile_type = {"Evidence": "evidence", "Reference": "reference"}[ + if "profile_type" not in st.session_state: + st.session_state.profile_type = None + + st.session_state.profile_type = {"Evidence": "evidence", "Reference": "reference"}[ col2.selectbox( "Profile Type", options=["Evidence", "Reference"], - help="Select the file type (format) to create for the probabilistic genotyping software.", + help="Select the file type (format) to create for the probabilistic genotyping " + "software.", ) ] - data_type = {"Sequence": "ngs", "CE allele": "ce", "LUS+ allele": "lusplus"}[ + if "data_type" not in st.session_state: + st.session_state.data_type = None + + st.session_state.data_type = {"Sequence": "ngs", "CE allele": "ce", "LUS+ allele": "lusplus"}[ col3.selectbox( "Data Type", options=["Sequence", "CE allele", "LUS+ allele"], - help="Select the allele type used to determine sequence type (belowAT, stutter or typed) and used in the final output file.", + help="Select the allele type used to determine sequence type (belowAT, stutter or " + "typed) and used in the final output file.", ) ] - info = st.checkbox( + if "info" not in st.session_state: + st.session_state.info = None + + st.session_state.info = st.checkbox( "Create Allele Information File", value=True, - help="Create file containing information about each sequence, including sequence type (belowAT, stutter or typed), stuttering sequence information and metrics involving stutter and noise.", + help="Create file containing information about each sequence, including sequence type " + "(belowAT, stutter or typed), stuttering sequence information and metrics involving " + "stutter and noise.", ) - separate = st.checkbox( + if "separate" not in st.session_state: + st.session_state.separate = None + + st.session_state.separate = st.checkbox( "Create Separate Files for Samples", - help="If checked, will create individual files for samples; If unchecked, will create one file with all samples.", + help="If checked, will create individual files for samples; If unchecked, will create " + "one file with all samples.", ) - nofilters = st.checkbox( + if "nofilters" not in st.session_state: + st.session_state.nofilters = None + + st.session_state.nofilters = st.checkbox( "Skip All Filtering Steps", - help="Filtering will not be performed but will still create EFM/MPSproto/STRmix output files containing all sequences.", + help="Filtering will not be performed but will still create EFM/MPSproto/STRmix output " + "files containing all sequences.", ) - strand = {"UAS Orientation": "uas", "Forward Strand": "forward"}[ + if "strand" not in st.session_state: + st.session_state.strand = None + + st.session_state.strand = {"UAS Orientation": "uas", "Forward Strand": "forward"}[ col4.selectbox( "Strand Orientation", options=["Forward Strand", "UAS Orientation"], - help="Indicates the strand orientation in which to report the sequence in the final output table as some markers are reported in the UAS on the reverse strand. Selecting the UAS Orientation will report those markers on the reverse strand while the remaining will be reported on the forward strand. Selecting the Forward Strand will report all markers on the forward strand orientation. This applies to STRmix NGS only.", + help="Indicates the strand orientation in which to report the sequence in the final " + "output table as some markers are reported in the UAS on the reverse strand. " + "Selecting the UAS Orientation will report those markers on the reverse strand while" + " the remaining will be reported on the forward strand. Selecting the Forward Strand " + "will report all markers on the forward strand orientation. This applies to STRmix " + "NGS only.", ) ] - ##################################################################### - # STR: Specify Working Directory # - ##################################################################### - - st.subheader("Output Folder Selection") - - col1, col2, col3, col4, col5 = st.columns(5) - - # Initialize session state if not already initialized - if "wd_dirname" not in st.session_state: - st.session_state.wd_dirname = None - - clicked_wd = col1.button("Please Select An Output Folder") - if clicked_wd: - wd = folder_picker_dialog() - st.session_state.wd_dirname = wd - - # Display selected path - if st.session_state.wd_dirname: - st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname) - - # Store Selected Path to Reference in Config - wd_dirname = st.session_state.wd_dirname - ##################################################################### # STR: Generate Config File Based on Settings # ##################################################################### # Submit Button Instance - if st.button("Submit"): + if st.button("Run lusSTR"): # Check if all required fields are filled - if analysis_software and samp_input and output and wd_dirname: + if ( + st.session_state.analysis_software + and st.session_state.samp_input + and st.session_state.output + and st.session_state.wd_dirname + ): # Validate output prefix - if not validate_prefix(output): + if not validate_prefix(st.session_state.output): st.warning( - "Please enter a valid output prefix. Only alphanumeric characters, underscore, and hyphen are allowed." + "Please enter a valid output prefix. Only alphanumeric characters, " + "underscore, and hyphen are allowed." ) st.stop() # Stop execution if prefix is invalid @@ -368,48 +685,80 @@ def show_STR_page(): # Construct config data config_data = { - "analysis_software": analysis_software, - "custom_ranges": custom_ranges, - "sex": sex, - "samp_input": samp_input, - "output": output, - "kit": kit, - "nocombine": nocombine, - "output_type": output_type, - "profile_type": profile_type, - "data_type": data_type, - "info": info, - "separate": separate, - "nofilters": nofilters, - "strand": strand, + "analysis_software": st.session_state.analysis_software, + "custom_ranges": st.session_state.custom_ranges, + "sex": st.session_state.sex, + "samp_input": st.session_state.samp_input, + "output": st.session_state.output, + "kit": st.session_state.kit, + "nocombine": st.session_state.nocombine, + "output_type": st.session_state.output_type, + "profile_type": st.session_state.profile_type, + "data_type": st.session_state.data_type, + "info": st.session_state.info, + "separate": st.session_state.separate, + "nofilters": st.session_state.nofilters, + "strand": st.session_state.strand, } # Generate YAML config file - generate_config_file(config_data, wd_dirname, "STR") + generate_config_file(config_data, st.session_state.wd_dirname, "STR") # Subprocess lusSTR commands command = ["lusstr", "strs", "all"] # Specify WD to lusSTR if wd_dirname: - command.extend(["-w", wd_dirname + "/"]) + command.extend(["-w", st.session_state.wd_dirname + "/"]) # Run lusSTR command in terminal try: subprocess.run(command, check=True) st.success( - "Config File Generated and lusSTR Executed Successfully! Output Files Have Been Saved to Your Designated Directory and Labeled with your Specified Prefix" + "Config File Generated and lusSTR Executed Successfully! Output Files" + "Have Been Saved to Your Designated Directory and Labeled with your " + "Specified Prefix" ) except subprocess.CalledProcessError as e: st.error(f"Error: {e}") st.info( - "Please make sure to check the 'How to Use' tab for common error resolutions." + "Please make sure to check the 'How to Use' tab for common error " + "resolutions." ) else: st.warning( - "Please make sure to fill out all required fields (Analysis Software, Input Directory or File, Prefix for Output, and Specification of Working Directory) before submitting." + "Please make sure to fill out all required fields (Analysis Software, Input " + "Directory or File, Prefix for Output, and Specification of Working Directory) " + "before submitting." ) + st.write("---") + st.write( + "After running lusSTR, or if lusSTR has been run previously, the user may view and edit " + "the individual STR marker plots and data." + ) + st.write( + "If lusSTR has been previously run, only the above ```Output Folder``` containing the run" + " files needs to be specified. Other settings will be automatically loaded from the " + "config.yaml file within the specified folder." + ) + if "interactive" not in st.session_state: + st.session_state.interactive = None + if st.button("See Individual Marker Plots & Data") or st.session_state.interactive: + st.session_state.interactive = True + create_settings() + if st.session_state.custom_ranges: + file = ( + f"{st.session_state.wd_dirname}/{st.session_state.output}/" + f"{st.session_state.output}_custom_range" + ) + else: + file = f"{wd_dirname}/{st.session_state.output}/{st.session_state.output}" + try: + sequence_info = pd.read_csv(f"{file}_sequence_info.csv") + interactive_setup(sequence_info, file) + except FileNotFoundError: + print(f"{file}_sequence_info.csv not found. Please check output folder specification.") ##################################################################### @@ -425,7 +774,8 @@ def show_SNP_page(): st.title("SNP Workflow") st.info( - "Please Select SNP Settings Below for lusSTR! For Information Regarding the Settings, See the How to Use Tab." + "Please Select SNP Settings Below for lusSTR! For Information Regarding the Settings," + " See the How to Use Tab." ) # Input File Specification @@ -433,7 +783,8 @@ def show_SNP_page(): # Ask user if submitting a directory or individual file st.info( - "Please Indicate If You Are Providing An Individual Input File or a Folder Containing Multiple Input Files" + "Please Indicate If You Are Providing An Individual Input File or a Folder Containing " + "Multiple Input Files" ) input_option = st.radio( "Select Input Option:", ("Individual File", "Folder with Multiple Files") @@ -553,7 +904,8 @@ def show_SNP_page(): reference = col1.text_input( "Please Specify Your Reference Sample IDs", - help="List IDs of the samples to be run as references in EFM; default is no reference samples", + help="List IDs of the samples to be run as references in EFM; default is no " + "reference samples", ) ##################################################################### @@ -577,9 +929,6 @@ def show_SNP_page(): if st.session_state.wd_dirname: st.text_input("Your Specified Output Folder:", st.session_state.wd_dirname) - # Store Selected Path to Reference in Config - wd_dirname = st.session_state.wd_dirname - ##################################################################### # SNP: Generate Config File Based on Settings # ##################################################################### @@ -593,7 +942,8 @@ def show_SNP_page(): # Validate output prefix if not validate_prefix(output): st.warning( - "Please enter a valid output prefix. Only alphanumeric characters, underscore, and hyphen are allowed." + "Please enter a valid output prefix. Only alphanumeric characters, " + "underscore, and hyphen are allowed." ) st.stop() # Stop execution if prefix is invalid @@ -617,33 +967,38 @@ def show_SNP_page(): # If a reference file was specified, add to config if reference: - config_data["references"] = reference + config_data["references"] = st.session_state.reference # Generate YAML config file - generate_config_file(config_data, wd_dirname, "SNP") + generate_config_file(config_data, st.session_state.wd_dirname, "SNP") # Subprocess lusSTR commands command = ["lusstr", "snps", "all"] # Specify WD to lusSTR if wd_dirname: - command.extend(["-w", wd_dirname + "/"]) + command.extend(["-w", st.session_state.wd_dirname + "/"]) # Run lusSTR command in terminal try: subprocess.run(command, check=True) st.success( - "Config File Generated and lusSTR Executed Successfully! Output Files Have Been Saved to Your Designated Directory and Labeled with your Specified Prefix" + "Config File Generated and lusSTR Executed Successfully! Output Files " + "Have Been Saved to Your Designated Directory and Labeled with your " + "Specified Prefix" ) except subprocess.CalledProcessError as e: st.error(f"Error: {e}") st.info( - "Please make sure to check the 'How to Use' tab for common error resolutions." + "Please make sure to check the 'How to Use' tab for common error " + "resolutions." ) else: st.warning( - "Please make sure to fill out all required fields (Analysis Software, Input Directory or File, Prefix for Output, and Specification of Working Directory) before submitting." + "Please make sure to fill out all required fields (Analysis Software, Input " + "Directory or File, Prefix for Output, and Specification of Working Directory) " + "before submitting." ) @@ -659,25 +1014,35 @@ def show_how_to_use_page(): st.header("1. File/Folder Path Formatting") st.write( - "Please ensure that the displayed path accurately reflects your selection. When using the file or folder picker, navigate to the desired location and click 'OK' to confirm your selection." + "Please ensure that the displayed path accurately reflects your selection. When using the" + " file or folder picker, navigate to the desired location and click 'OK' to confirm your " + "selection." ) st.header("2. Specifying Output Prefix") st.write( - "The purpose of specifying the output prefix is for lusSTR to create result files and folders with that prefix in your working directory. Please ensure that you are following proper file naming formatting and rules when specifying this prefix. Avoid using characters such as '/', '', '.', and others. Note: To avoid potential errors, you can simply use the default placeholder for output." + "The purpose of specifying the output prefix is for lusSTR to create result files and " + "folders with that prefix in your working directory. Please ensure that you are following" + " proper file naming formatting and rules when specifying this prefix. Avoid using " + "characters such as '/', '', '.', and others. Note: To avoid potential errors, you can " + "simply use the default placeholder for output." ) st.code("Incorrect: 'working_directory/subfolder/subfolder'\nCorrect: output") st.write( - "Note that some result files may be saved directly in the working directory with the specified prefix, while others will be populated in a folder labeled with the prefix in your working directory." + "Note that some result files may be saved directly in the working directory with the " + "specified prefix, while others will be populated in a folder labeled with the prefix " + "in your working directory." ) st.write("Be aware of this behavior when checking for output files.") st.header("3. Specifying Output Folder") st.write( - "Please Ensure That You Properly Specify an Output Folder. This is where all lusSTR output files will be saved. To avoid potential errors, specifying a working directory is required." + "Please Ensure That You Properly Specify an Output Folder. This is where all lusSTR " + "output files will be saved. To avoid potential errors, specifying a working directory " + "is required." ) st.title("About lusSTR") @@ -687,13 +1052,16 @@ def show_how_to_use_page(): **_lusSTR Accommodates Four Different Input Formats:_** - (1) UAS Sample Details Report, UAS Sample Report, and UAS Phenotype Report (for SNP processing) in .xlsx format (a single file or directory containing multiple files) + (1) UAS Sample Details Report, UAS Sample Report, and UAS Phenotype Report (for SNP " + "processing) in .xlsx format (a single file or directory containing multiple files) - (2) STRait Razor v3 output with one sample per file (a single file or directory containing multiple files) + (2) STRait Razor v3 output with one sample per file (a single file or directory containing" + " multiple files) (3) GeneMarker v2.6 output (a single file or directory containing multiple files) - (4) Sample(s) sequences in CSV format; first four columns must be Locus, NumReads, Sequence, SampleID; Optional last two columns can be Project and Analysis IDs. + (4) Sample(s) sequences in CSV format; first four columns must be Locus, NumReads, Sequence, " + "SampleID; Optional last two columns can be Project and Analysis IDs. """, @@ -709,7 +1077,8 @@ def show_how_to_use_page(): def show_contact_page(): st.title("Contact Us") st.write( - "For any questions or issues, please contact rebecca.mitchell@st.dhs.gov, daniel.standage@st.dhs.gov, or s.h.syed@email.msmary.edu" + "For any questions or issues, please contact rebecca.mitchell@st.dhs.gov, " + "daniel.standage@st.dhs.gov, or s.h.syed@email.msmary.edu" ) diff --git a/lusSTR/cli/snps.py b/lusSTR/cli/snps.py index 7304faa0..edb3a2aa 100644 --- a/lusSTR/cli/snps.py +++ b/lusSTR/cli/snps.py @@ -22,15 +22,15 @@ def main(args): lusSTR.snakefile(workflow="snps"), targets=[pretarget], workdir=workdir, verbose=True ) if result is not True: - raise SystemError('Snakemake failed') + raise SystemError("Snakemake failed") + def subparser(subparsers): - p = subparsers.add_parser( - "snps", description="Running the SNP pipeline" - ) + p = subparsers.add_parser("snps", description="Running the SNP pipeline") p.add_argument( - "target", choices=["format", "all"], + "target", + choices=["format", "all"], help="Steps to run. Specifying 'format' will run only 'format'. Specifying " - "'all' will run all steps of the SNP workflow ('format' and 'convert')." + "'all' will run all steps of the SNP workflow ('format' and 'convert').", ) - p.add_argument("-w", "--workdir", metavar="W", default=".", help="working directory") \ No newline at end of file + p.add_argument("-w", "--workdir", metavar="W", default=".", help="working directory") diff --git a/lusSTR/cli/strs.py b/lusSTR/cli/strs.py index d1f1510e..d5fbaa68 100644 --- a/lusSTR/cli/strs.py +++ b/lusSTR/cli/strs.py @@ -18,20 +18,18 @@ def main(args): pretarget = args.target if args.target != "all" else "filter" workdir = args.workdir - result = snakemake( - lusSTR.snakefile(workflow="strs"), targets=[pretarget], workdir=workdir - ) + result = snakemake(lusSTR.snakefile(workflow="strs"), targets=[pretarget], workdir=workdir) if result is not True: - raise SystemError('Snakemake failed') + raise SystemError("Snakemake failed") + def subparser(subparsers): - p = subparsers.add_parser( - "strs", description="Running the STR pipeline" - ) + p = subparsers.add_parser("strs", description="Running the STR pipeline") p.add_argument( - "target", choices=["format", "convert", "all"], + "target", + choices=["format", "convert", "all"], help="Steps to run. Specifying 'format' will run only 'format'. Specifying " "'convert' will run both 'format' and 'convert'. Specifying 'all' will run " - "all steps of the STR workflow ('format', 'convert' and 'filter')." + "all steps of the STR workflow ('format', 'convert' and 'filter').", ) p.add_argument("-w", "--workdir", metavar="W", default=".", help="working directory") diff --git a/lusSTR/scripts/filter_settings.py b/lusSTR/scripts/filter_settings.py index 859ce736..0892ca49 100644 --- a/lusSTR/scripts/filter_settings.py +++ b/lusSTR/scripts/filter_settings.py @@ -48,7 +48,7 @@ def single_allele_thresholds(metadata, locus_reads, single_all_df): elif thresholds("Analytical", metadata, locus_reads, single_all_df["Reads"][0])[1] is False: single_all_df[["allele_type", "perc_noise"]] = ["BelowAT", 1.0] elif thresholds("Analytical", metadata, locus_reads, single_all_df["Reads"][0])[1] is True: - single_all_df["allele_type"] = "real_allele" + single_all_df["allele_type"] = "Typed" return single_all_df @@ -67,7 +67,7 @@ def multiple_allele_thresholds(metadata, locus_reads, locus_allele_info): round(quest_allele_reads / locus_reads, 3), ] else: - locus_allele_info.loc[i, "allele_type"] = "real_allele" + locus_allele_info.loc[i, "allele_type"] = "Typed" return locus_allele_info, locus_reads @@ -102,7 +102,7 @@ def thresholds(filter, metadata, locus_reads, quest_al_reads): def ce_filtering(locus_allele_info, locus_reads, metadata, datatype, brack_col): for i in range(len(locus_allele_info)): # check for stutter alleles - if locus_allele_info.loc[i, "allele_type"] != "real_allele": + if locus_allele_info.loc[i, "allele_type"] != "Typed": continue else: ref_allele_reads = locus_allele_info.loc[i, "Reads"] @@ -133,8 +133,8 @@ def ce_filtering(locus_allele_info, locus_reads, metadata, datatype, brack_col): 3, ) else: - locus_allele_info.loc[j, "perc_stutter"] = "" - locus_allele_info.loc[j, "perc_noise"] = "" + locus_allele_info.loc[j, "perc_stutter"] = None + locus_allele_info.loc[j, "perc_noise"] = None elif "BelowAT" in locus_allele_info.loc[j, "allele_type"]: locus_allele_info.loc[j, "perc_noise"] = round( locus_allele_info.loc[j, "Reads"] / locus_reads, 3 @@ -345,7 +345,7 @@ def output_allele_call(quest_al_reads, all_thresh, orig_type): if quest_al_reads <= all_thresh: all_type = orig_type else: - all_type = "real_allele" + all_type = "Typed" return all_type @@ -382,7 +382,7 @@ def check_2stutter(stutter_df, allele_des, allele, brack_col): def allele_counts(allele_df): mix_df = pd.DataFrame(columns=["SampleID", "Locus", "Flags"]) try: - if allele_df.allele_type.value_counts()["real_allele"] > 2: + if allele_df.allele_type.value_counts()["Typed"] > 2: mix_df.loc[len(mix_df.index)] = [ allele_df.loc[1, "SampleID"], allele_df.loc[1, "Locus"], @@ -466,8 +466,8 @@ def allele_imbalance_check(allele_df): locus = allele_df.loc[0, "Locus"] metadata = filter_marker_data[locus] het_perc = metadata["MinimumHeterozygousBalanceThresholdDynamicPercent"] - if allele_df.allele_type.value_counts()["real_allele"] >= 2: - real_df = allele_df[allele_df["allele_type"] == "real_allele"].reset_index(drop=True) + if allele_df.allele_type.value_counts()["Typed"] >= 2: + real_df = allele_df[allele_df["allele_type"] == "Typed"].reset_index(drop=True) max_reads = real_df["Reads"].max() min_reads = real_df["Reads"].min() if min_reads / max_reads < het_perc: diff --git a/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv b/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv index 9bcfd9a0..5a2541dd 100644 --- a/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv +++ b/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv @@ -1,24 +1,24 @@ SampleID,Locus,CE_Allele,LUS_Plus,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter -Sample1,D4S2408,10.0,10_10_0,1022,real_allele,,,,,, +Sample1,D4S2408,10.0,10_10_0,1022,Typed,,,,,, Sample1,D4S2408,9.0,9_9_0,116,-1_stutter/+1_stutter,10_10_0,8_8_0,1022.0,1050.0,, -Sample1,D4S2408,8.0,8_8_0,1050,real_allele,,,,,, -Sample1,D8S1179,14.0,14_12_1_0,869,real_allele,,,,,, +Sample1,D4S2408,8.0,8_8_0,1050,Typed,,,,,, +Sample1,D8S1179,14.0,14_12_1_0,869,Typed,,,,,, Sample1,D8S1179,13.0,13_11_1_0,184,-1_stutter,14_12_1_0,,869.0,,,0.212 Sample1,D8S1179,12.0,12_10_1_0,37,-2_stutter,14_12_1_0,,869.0,,,0.201 -Sample1,D9S1122,13.0,13_11,948,real_allele,,,,,, +Sample1,D9S1122,13.0,13_11,948,Typed,,,,,, Sample1,D9S1122,12.0,12_10,108,-1_stutter,13_11,,948.0,,,0.114 -Sample1,D9S1122,11.0,11_11,991,real_allele,,,,,, +Sample1,D9S1122,11.0,11_11,991,Typed,,,,,, Sample1,D9S1122,10.0,10_10,87,-1_stutter,11_11,,991.0,,,0.088 -Sample1,FGA,23.0,23_15_3_0,1436,real_allele,,,,,, +Sample1,FGA,23.0,23_15_3_0,1436,Typed,,,,,, Sample1,FGA,22.0,22_14_3_0,262,-1_stutter,23_15_3_0,,1436.0,,,0.182 Sample1,FGA,21.0,21_13_3_0,48,BelowAT,,,,,0.013, -Sample1,FGA,20.0,20_12_3_0,1750,real_allele,,,,,, -Sample1,FGA,18.0,18_10_3_0,181,real_allele,,,,,, +Sample1,FGA,20.0,20_12_3_0,1750,Typed,,,,,, +Sample1,FGA,18.0,18_10_3_0,181,Typed,,,,,, Sample1,FGA,17.0,17_9_3_0,15,BelowAT,,,,,0.004, -Sample1,PENTA D,15.0,15_15,50,real_allele,,,,,, -Sample1,PENTA D,13.0,13_13,1000,real_allele,,,,,, -Sample1,PENTA E,7.0,7_7,505,real_allele,,,,,, -Sample1,TH01,7.0,7_7,2197,real_allele,,,,,, -Sample1,TH01,6.0,6_6,1632,real_allele,,,,,, +Sample1,PENTA D,15.0,15_15,50,Typed,,,,,, +Sample1,PENTA D,13.0,13_13,1000,Typed,,,,,, +Sample1,PENTA E,7.0,7_7,505,Typed,,,,,, +Sample1,TH01,7.0,7_7,2197,Typed,,,,,, +Sample1,TH01,6.0,6_6,1632,Typed,,,,,, Sample1,TH01,5.0,5_5,66,BelowAT,,,,,0.017, Sample1,TPOX,11.0,11_11,15,BelowAT,,,,,1.0, diff --git a/lusSTR/tests/data/LUSPlus_stutter_test/test_filtering_EFMoutput_sequence_info.csv b/lusSTR/tests/data/LUSPlus_stutter_test/test_filtering_EFMoutput_sequence_info.csv index 9016d7a3..84e60cfd 100644 --- a/lusSTR/tests/data/LUSPlus_stutter_test/test_filtering_EFMoutput_sequence_info.csv +++ b/lusSTR/tests/data/LUSPlus_stutter_test/test_filtering_EFMoutput_sequence_info.csv @@ -1,26 +1,26 @@ -SampleID,Locus,LUS_Plus,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter,CE_Allele -Sample1,D4S2408,10_10_0,900.0,real_allele,,,,,,, -Sample1,D4S2408,9_9_0,1357.0,real_allele,,,,,,, -Sample1,D4S2408,8_8_0,1000.0,real_allele,,,,,,, -Sample1,D8S1179,14_11_1_0,739.0,real_allele,,,,,,, -Sample1,D8S1179,13_11_1_0,95.0,real_allele,,,,,,, -Sample1,D8S1179,13_10_1_0,89.0,-1_stutter,14_11_1_0,,739.0,,,0.12, -Sample1,D8S1179,12_9_1_0,26.0,real_allele,,,,,,, -Sample1,D8S1179,12_10_1_0,11.0,BelowAT,,,,,0.01,, -Sample1,D9S1122,13_11,948.0,real_allele,,,,,,, -Sample1,D9S1122,12_10,108.0,-1_stutter,13_11,,948.0,,,0.114, -Sample1,D9S1122,11_11,991.0,real_allele,,,,,,, -Sample1,D9S1122,10_10,87.0,-1_stutter,11_11,,991.0,,,0.088, -Sample1,FGA,23_15_3_0,1436.0,real_allele,,,,,,, -Sample1,FGA,22_14_3_0,262.0,-1_stutter,23_15_3_0,,1436.0,,,0.182, -Sample1,FGA,21_13_3_0,48.0,BelowAT,,,,,0.013,, -Sample1,FGA,20_12_3_0,1750.0,real_allele,,,,,,, -Sample1,FGA,18_10_3_0,181.0,real_allele,,,,,,, -Sample1,FGA,17_9_3_0,15.0,BelowAT,,,,,0.004,, -Sample1,PENTA D,15_15,50.0,real_allele,,,,,,, -Sample1,PENTA D,13_13,1000.0,real_allele,,,,,,, -Sample1,PENTA E,7_7,505.0,real_allele,,,,,,,7.0 -Sample1,TH01,7_7,2197.0,real_allele,,,,,,, -Sample1,TH01,6_6,1632.0,real_allele,,,,,,, -Sample1,TH01,5_5,66.0,BelowAT,,,,,0.017,, -Sample1,TPOX,11_11,15.0,BelowAT,,,,,1.0,,11.0 +SampleID,Locus,LUS_Plus,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter,CE_Allele +Sample1,D4S2408,10_10_0,900,Typed,,,,,,, +Sample1,D4S2408,9_9_0,1357,Typed,,,,,,, +Sample1,D4S2408,8_8_0,1000,Typed,,,,,,, +Sample1,D8S1179,14_11_1_0,739,Typed,,,,,,, +Sample1,D8S1179,13_11_1_0,95,Typed,,,,,,, +Sample1,D8S1179,13_10_1_0,89,-1_stutter,14_11_1_0,,739,,,0.12, +Sample1,D8S1179,12_9_1_0,26,Typed,,,,,,, +Sample1,D8S1179,12_10_1_0,11,BelowAT,,,,,0.01,, +Sample1,D9S1122,13_11,948,Typed,,,,,,, +Sample1,D9S1122,12_10,108,-1_stutter,13_11,,948,,,0.114, +Sample1,D9S1122,11_11,991,Typed,,,,,,, +Sample1,D9S1122,10_10,87,-1_stutter,11_11,,991,,,0.088, +Sample1,FGA,23_15_3_0,1436,Typed,,,,,,, +Sample1,FGA,22_14_3_0,262,-1_stutter,23_15_3_0,,1436,,,0.182, +Sample1,FGA,21_13_3_0,48,BelowAT,,,,,0.013,, +Sample1,FGA,20_12_3_0,1750,Typed,,,,,,, +Sample1,FGA,18_10_3_0,181,Typed,,,,,,, +Sample1,FGA,17_9_3_0,15,BelowAT,,,,,0.004,, +Sample1,PENTA D,15_15,50,Typed,,,,,,, +Sample1,PENTA D,13_13,1000,Typed,,,,,,, +Sample1,PENTA E,7_7,505,Typed,,,,,,,7 +Sample1,TH01,7_7,2197,Typed,,,,,,, +Sample1,TH01,6_6,1632,Typed,,,,,,, +Sample1,TH01,5_5,66,BelowAT,,,,,0.017,, +Sample1,TPOX,11_11,15,BelowAT,,,,,1,,11 \ No newline at end of file diff --git a/lusSTR/tests/data/MPSproto_test/test_filtering_EFMoutput_sequence_info.csv b/lusSTR/tests/data/MPSproto_test/test_filtering_EFMoutput_sequence_info.csv index 6d5ac112..295bc50a 100644 --- a/lusSTR/tests/data/MPSproto_test/test_filtering_EFMoutput_sequence_info.csv +++ b/lusSTR/tests/data/MPSproto_test/test_filtering_EFMoutput_sequence_info.csv @@ -1,26 +1,26 @@ -SampleID,Locus,UAS_Output_Sequence,CE_Allele,UAS_Output_Bracketed_Notation,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter -Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,10.0,[ATCT]10,900,real_allele,,,,,, -Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,9.0,[ATCT]9,1357,real_allele,,,,,, -Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCT,8.0,[ATCT]8,1000,real_allele,,,,,, -Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,14.0,[TCTA]2 TCTG [TCTA]11,739,real_allele,,,,,, -Sample1,D8S1179,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,13.0,TCTA TCTG [TCTA]11,95,-1_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.129 -Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,13.0,[TCTA]2 TCTG [TCTA]10,89,-1_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.12 -Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA,12.0,[TCTA]2 TCTG [TCTA]9,26,-2_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.035 -Sample1,D8S1179,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,12.0,TCTA TCTG [TCTA]10,11,BelowAT,,,,,0.01, -Sample1,D9S1122,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,13.0,TAGA TCGA [TAGA]11,948,real_allele,,,,,, -Sample1,D9S1122,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,12.0,TAGA TCGA [TAGA]10,108,-1_stutter,TAGA TCGA [TAGA]11,,948.0,,,0.114 -Sample1,D9S1122,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,11.0,[TAGA]11,991,real_allele,,,,,, -Sample1,D9S1122,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,10.0,[TAGA]10,87,-1_stutter,[TAGA]11,,991.0,,,0.088 -Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,23.0,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2,1436,real_allele,,,,,, -Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,22.0,[TTTC]3 TTTT TTCT [CTTT]14 CTCC [TTCC]2,262,-1_stutter,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2,,1436.0,,,0.182 -Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,21.0,[TTTC]3 TTTT TTCT [CTTT]13 CTCC [TTCC]2,48,BelowAT,,,,,0.013, -Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,20.0,[TTTC]3 TTTT TTCT [CTTT]12 CTCC [TTCC]2,1750,real_allele,,,,,, -Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,18.0,[TTTC]3 TTTT TTCT [CTTT]10 CTCC [TTCC]2,181,real_allele,,,,,, -Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,17.0,[TTTC]3 TTTT TTCT [CTTT]9 CTCC [TTCC]2,15,BelowAT,,,,,0.004, -Sample1,PENTA D,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,15.0,AAAAG [AAAGA]13,50,real_allele,,,,,, -Sample1,PENTA D,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,13.0,AAAAG [AAAGA]13,1000,real_allele,,,,,, -Sample1,PENTA E,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,7.0,[AAAGA]7,505,real_allele,,,,,, -Sample1,TH01,AATGAATGAATGAATGAATGAATGAATG,7.0,[AATG]7,2197,real_allele,,,,,, -Sample1,TH01,AATGAATGAATGAATGAATGAATG,6.0,[AATG]6,1632,real_allele,,,,,, -Sample1,TH01,AATGAATGAATGAATGAATG,5.0,[AATG]5,66,BelowAT,,,,,0.017, -Sample1,TPOX,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,11.0,[AATG]11,15,BelowAT,,,,,1.0, +SampleID,Locus,UAS_Output_Sequence,CE_Allele,UAS_Output_Bracketed_Notation,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter +Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,10,[ATCT]10,900,Typed,,,,,, +Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,9,[ATCT]9,1357,Typed,,,,,, +Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCT,8,[ATCT]8,1000,Typed,,,,,, +Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,14,[TCTA]2 TCTG [TCTA]11,739,Typed,,,,,, +Sample1,D8S1179,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,13,TCTA TCTG [TCTA]11,95,-1_stutter,[TCTA]2 TCTG [TCTA]11,,739,,,0.129 +Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,13,[TCTA]2 TCTG [TCTA]10,89,-1_stutter,[TCTA]2 TCTG [TCTA]11,,739,,,0.12 +Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA,12,[TCTA]2 TCTG [TCTA]9,26,-2_stutter,[TCTA]2 TCTG [TCTA]11,,739,,,0.035 +Sample1,D8S1179,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,12,TCTA TCTG [TCTA]10,11,BelowAT,,,,,0.01, +Sample1,D9S1122,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,13,TAGA TCGA [TAGA]11,948,Typed,,,,,, +Sample1,D9S1122,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,12,TAGA TCGA [TAGA]10,108,-1_stutter,TAGA TCGA [TAGA]11,,948,,,0.114 +Sample1,D9S1122,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,11,[TAGA]11,991,Typed,,,,,, +Sample1,D9S1122,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,10,[TAGA]10,87,-1_stutter,[TAGA]11,,991,,,0.088 +Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,23,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2,1436,Typed,,,,,, +Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,22,[TTTC]3 TTTT TTCT [CTTT]14 CTCC [TTCC]2,262,-1_stutter,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2,,1436,,,0.182 +Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,21,[TTTC]3 TTTT TTCT [CTTT]13 CTCC [TTCC]2,48,BelowAT,,,,,0.013, +Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,20,[TTTC]3 TTTT TTCT [CTTT]12 CTCC [TTCC]2,1750,Typed,,,,,, +Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,18,[TTTC]3 TTTT TTCT [CTTT]10 CTCC [TTCC]2,181,Typed,,,,,, +Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,17,[TTTC]3 TTTT TTCT [CTTT]9 CTCC [TTCC]2,15,BelowAT,,,,,0.004, +Sample1,PENTA D,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,15,AAAAG [AAAGA]13,50,Typed,,,,,, +Sample1,PENTA D,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,13,AAAAG [AAAGA]13,1000,Typed,,,,,, +Sample1,PENTA E,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,7,[AAAGA]7,505,Typed,,,,,, +Sample1,TH01,AATGAATGAATGAATGAATGAATGAATG,7,[AATG]7,2197,Typed,,,,,, +Sample1,TH01,AATGAATGAATGAATGAATGAATG,6,[AATG]6,1632,Typed,,,,,, +Sample1,TH01,AATGAATGAATGAATGAATG,5,[AATG]5,66,BelowAT,,,,,0.017, +Sample1,TPOX,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,11,[AATG]11,15,BelowAT,,,,,1, \ No newline at end of file diff --git a/lusSTR/tests/data/NGS_stutter_test/STRmix_Files_sequence_info.csv b/lusSTR/tests/data/NGS_stutter_test/STRmix_Files_sequence_info.csv index 6d5ac112..16411c9d 100644 --- a/lusSTR/tests/data/NGS_stutter_test/STRmix_Files_sequence_info.csv +++ b/lusSTR/tests/data/NGS_stutter_test/STRmix_Files_sequence_info.csv @@ -1,26 +1,26 @@ SampleID,Locus,UAS_Output_Sequence,CE_Allele,UAS_Output_Bracketed_Notation,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter -Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,10.0,[ATCT]10,900,real_allele,,,,,, -Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,9.0,[ATCT]9,1357,real_allele,,,,,, -Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCT,8.0,[ATCT]8,1000,real_allele,,,,,, -Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,14.0,[TCTA]2 TCTG [TCTA]11,739,real_allele,,,,,, +Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,10.0,[ATCT]10,900,Typed,,,,,, +Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,9.0,[ATCT]9,1357,Typed,,,,,, +Sample1,D4S2408,ATCTATCTATCTATCTATCTATCTATCTATCT,8.0,[ATCT]8,1000,Typed,,,,,, +Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,14.0,[TCTA]2 TCTG [TCTA]11,739,Typed,,,,,, Sample1,D8S1179,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,13.0,TCTA TCTG [TCTA]11,95,-1_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.129 Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,13.0,[TCTA]2 TCTG [TCTA]10,89,-1_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.12 Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA,12.0,[TCTA]2 TCTG [TCTA]9,26,-2_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.035 Sample1,D8S1179,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,12.0,TCTA TCTG [TCTA]10,11,BelowAT,,,,,0.01, -Sample1,D9S1122,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,13.0,TAGA TCGA [TAGA]11,948,real_allele,,,,,, +Sample1,D9S1122,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,13.0,TAGA TCGA [TAGA]11,948,Typed,,,,,, Sample1,D9S1122,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,12.0,TAGA TCGA [TAGA]10,108,-1_stutter,TAGA TCGA [TAGA]11,,948.0,,,0.114 -Sample1,D9S1122,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,11.0,[TAGA]11,991,real_allele,,,,,, +Sample1,D9S1122,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,11.0,[TAGA]11,991,Typed,,,,,, Sample1,D9S1122,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,10.0,[TAGA]10,87,-1_stutter,[TAGA]11,,991.0,,,0.088 -Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,23.0,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2,1436,real_allele,,,,,, +Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,23.0,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2,1436,Typed,,,,,, Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,22.0,[TTTC]3 TTTT TTCT [CTTT]14 CTCC [TTCC]2,262,-1_stutter,[TTTC]3 TTTT TTCT [CTTT]15 CTCC [TTCC]2,,1436.0,,,0.182 Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,21.0,[TTTC]3 TTTT TTCT [CTTT]13 CTCC [TTCC]2,48,BelowAT,,,,,0.013, -Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,20.0,[TTTC]3 TTTT TTCT [CTTT]12 CTCC [TTCC]2,1750,real_allele,,,,,, -Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,18.0,[TTTC]3 TTTT TTCT [CTTT]10 CTCC [TTCC]2,181,real_allele,,,,,, +Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,20.0,[TTTC]3 TTTT TTCT [CTTT]12 CTCC [TTCC]2,1750,Typed,,,,,, +Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,18.0,[TTTC]3 TTTT TTCT [CTTT]10 CTCC [TTCC]2,181,Typed,,,,,, Sample1,FGA,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,17.0,[TTTC]3 TTTT TTCT [CTTT]9 CTCC [TTCC]2,15,BelowAT,,,,,0.004, -Sample1,PENTA D,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,15.0,AAAAG [AAAGA]13,50,real_allele,,,,,, -Sample1,PENTA D,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,13.0,AAAAG [AAAGA]13,1000,real_allele,,,,,, -Sample1,PENTA E,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,7.0,[AAAGA]7,505,real_allele,,,,,, -Sample1,TH01,AATGAATGAATGAATGAATGAATGAATG,7.0,[AATG]7,2197,real_allele,,,,,, -Sample1,TH01,AATGAATGAATGAATGAATGAATG,6.0,[AATG]6,1632,real_allele,,,,,, +Sample1,PENTA D,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,15.0,AAAAG [AAAGA]13,50,Typed,,,,,, +Sample1,PENTA D,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,13.0,AAAAG [AAAGA]13,1000,Typed,,,,,, +Sample1,PENTA E,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,7.0,[AAAGA]7,505,Typed,,,,,, +Sample1,TH01,AATGAATGAATGAATGAATGAATGAATG,7.0,[AATG]7,2197,Typed,,,,,, +Sample1,TH01,AATGAATGAATGAATGAATGAATG,6.0,[AATG]6,1632,Typed,,,,,, Sample1,TH01,AATGAATGAATGAATGAATG,5.0,[AATG]5,66,BelowAT,,,,,0.017, Sample1,TPOX,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,11.0,[AATG]11,15,BelowAT,,,,,1.0, diff --git a/lusSTR/tests/data/NGS_stutter_test/custom/test_stutter_sequence_info.csv b/lusSTR/tests/data/NGS_stutter_test/custom/test_stutter_sequence_info.csv index b03f67e8..210e758e 100644 --- a/lusSTR/tests/data/NGS_stutter_test/custom/test_stutter_sequence_info.csv +++ b/lusSTR/tests/data/NGS_stutter_test/custom/test_stutter_sequence_info.csv @@ -1,19 +1,19 @@ SampleID,Locus,Custom_Range_Sequence,CE_Allele,Custom_Bracketed_Notation,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter -Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,14.0,[TCTA]2 TCTG [TCTA]11,739,real_allele,,,,,, +Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,14.0,[TCTA]2 TCTG [TCTA]11,739,Typed,,,,,, Sample1,D8S1179,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,13.0,TCTA TCTG [TCTA]11,95,-1_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.129 Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,13.0,[TCTA]2 TCTG [TCTA]10,89,-1_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.12 Sample1,D8S1179,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA,12.0,[TCTA]2 TCTG [TCTA]9,26,-2_stutter,[TCTA]2 TCTG [TCTA]11,,739.0,,,0.035 Sample1,D8S1179,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,12.0,TCTA TCTG [TCTA]10,11,BelowAT,,,,,0.01, -Sample1,FGA,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,23.0,[GGAA]2 GGAG [AAAG]15 AGAA AAAA [GAAA]3,1436,real_allele,,,,,, +Sample1,FGA,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,23.0,[GGAA]2 GGAG [AAAG]15 AGAA AAAA [GAAA]3,1436,Typed,,,,,, Sample1,FGA,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,22.0,[GGAA]2 GGAG [AAAG]14 AGAA AAAA [GAAA]3,262,-1_stutter,[GGAA]2 GGAG [AAAG]15 AGAA AAAA [GAAA]3,,1436.0,,,0.182 Sample1,FGA,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,21.0,[GGAA]2 GGAG [AAAG]13 AGAA AAAA [GAAA]3,48,BelowAT,,,,,0.013, -Sample1,FGA,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,20.0,[GGAA]2 GGAG [AAAG]12 AGAA AAAA [GAAA]3,1750,real_allele,,,,,, -Sample1,FGA,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,18.0,[GGAA]2 GGAG [AAAG]10 AGAA AAAA [GAAA]3,181,real_allele,,,,,, +Sample1,FGA,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,20.0,[GGAA]2 GGAG [AAAG]12 AGAA AAAA [GAAA]3,1750,Typed,,,,,, +Sample1,FGA,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,18.0,[GGAA]2 GGAG [AAAG]10 AGAA AAAA [GAAA]3,181,Typed,,,,,, Sample1,FGA,GGAAGGAAGGAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAAAGAAAGAAAGAAA,17.0,[GGAA]2 GGAG [AAAG]9 AGAA AAAA [GAAA]3,15,BelowAT,,,,,0.004, Sample1,PENTA D,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,14.0,[AAAGA]14,22,+1_stutter,[AAAGA]13,,1000.0,,,0.022 -Sample1,PENTA D,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,13.0,[AAAGA]13,1000,real_allele,,,,,, -Sample1,PENTA E,TCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTT,7.0,[TCTTT]7,505,real_allele,,,,,, -Sample1,TH01,ATGGTGAATGAATGAATGAATGAATGAATGAATGAGGGA,7.0,ATGG TG [AATG]7 AGGG A,2197,real_allele,,,,,, -Sample1,TH01,ATGGTGAATGAATGAATGAATGAATGAATGAGGGA,6.0,ATGG TG [AATG]6 AGGG A,1632,real_allele,,,,,, +Sample1,PENTA D,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,13.0,[AAAGA]13,1000,Typed,,,,,, +Sample1,PENTA E,TCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTT,7.0,[TCTTT]7,505,Typed,,,,,, +Sample1,TH01,ATGGTGAATGAATGAATGAATGAATGAATGAATGAGGGA,7.0,ATGG TG [AATG]7 AGGG A,2197,Typed,,,,,, +Sample1,TH01,ATGGTGAATGAATGAATGAATGAATGAATGAGGGA,6.0,ATGG TG [AATG]6 AGGG A,1632,Typed,,,,,, Sample1,TH01,ATGGTGAATGAATGAATGAATGAATGAGGGA,5.0,ATGG TG [AATG]5 AGGG A,66,BelowAT,,,,,0.017, Sample1,TPOX,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,11.0,[AATG]11,15,BelowAT,,,,,1.0, diff --git a/lusSTR/tests/data/RU_stutter_test/STRmix_Files_sequence_info.csv b/lusSTR/tests/data/RU_stutter_test/STRmix_Files_sequence_info.csv index 0e518665..861284af 100644 --- a/lusSTR/tests/data/RU_stutter_test/STRmix_Files_sequence_info.csv +++ b/lusSTR/tests/data/RU_stutter_test/STRmix_Files_sequence_info.csv @@ -1,24 +1,24 @@ SampleID,Locus,CE_Allele,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter -Sample1,D4S2408,10.0,900,real_allele,,,,,, -Sample1,D4S2408,9.0,1357,real_allele,,,,,, -Sample1,D4S2408,8.0,1000,real_allele,,,,,, -Sample1,D8S1179,14.0,869,real_allele,,,,,, +Sample1,D4S2408,10.0,900,Typed,,,,,, +Sample1,D4S2408,9.0,1357,Typed,,,,,, +Sample1,D4S2408,8.0,1000,Typed,,,,,, +Sample1,D8S1179,14.0,869,Typed,,,,,, Sample1,D8S1179,13.0,184,-1_stutter,14.0,,869.0,,,0.212 Sample1,D8S1179,12.0,37,-2_stutter,14.0,,869.0,,,0.201 -Sample1,D9S1122,13.0,948,real_allele,,,,,, +Sample1,D9S1122,13.0,948,Typed,,,,,, Sample1,D9S1122,12.0,108,-1_stutter/+1_stutter,13.0,11.0,948.0,991.0,, -Sample1,D9S1122,11.0,991,real_allele,,,,,, +Sample1,D9S1122,11.0,991,Typed,,,,,, Sample1,D9S1122,10.0,87,-1_stutter,11.0,,991.0,,,0.088 -Sample1,FGA,23.0,1436,real_allele,,,,,, +Sample1,FGA,23.0,1436,Typed,,,,,, Sample1,FGA,22.0,262,-1_stutter,23.0,,1436.0,,,0.182 Sample1,FGA,21.0,48,BelowAT,,,,,0.013, -Sample1,FGA,20.0,1750,real_allele,,,,,, -Sample1,FGA,18.0,181,real_allele,,,,,, +Sample1,FGA,20.0,1750,Typed,,,,,, +Sample1,FGA,18.0,181,Typed,,,,,, Sample1,FGA,17.0,15,BelowAT,,,,,0.004, -Sample1,PENTA D,15.0,50,real_allele,,,,,, -Sample1,PENTA D,13.0,1000,real_allele,,,,,, -Sample1,PENTA E,7.0,505,real_allele,,,,,, -Sample1,TH01,7.0,2197,real_allele,,,,,, -Sample1,TH01,6.0,1632,real_allele,,,,,, +Sample1,PENTA D,15.0,50,Typed,,,,,, +Sample1,PENTA D,13.0,1000,Typed,,,,,, +Sample1,PENTA E,7.0,505,Typed,,,,,, +Sample1,TH01,7.0,2197,Typed,,,,,, +Sample1,TH01,6.0,1632,Typed,,,,,, Sample1,TH01,5.0,66,BelowAT,,,,,0.017, Sample1,TPOX,11.0,15,BelowAT,,,,,1.0, diff --git a/lusSTR/tests/data/RU_stutter_test/test_filtering_EFMoutput_sequence_info.csv b/lusSTR/tests/data/RU_stutter_test/test_filtering_EFMoutput_sequence_info.csv index 0e518665..35930479 100644 --- a/lusSTR/tests/data/RU_stutter_test/test_filtering_EFMoutput_sequence_info.csv +++ b/lusSTR/tests/data/RU_stutter_test/test_filtering_EFMoutput_sequence_info.csv @@ -1,24 +1,24 @@ -SampleID,Locus,CE_Allele,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter -Sample1,D4S2408,10.0,900,real_allele,,,,,, -Sample1,D4S2408,9.0,1357,real_allele,,,,,, -Sample1,D4S2408,8.0,1000,real_allele,,,,,, -Sample1,D8S1179,14.0,869,real_allele,,,,,, -Sample1,D8S1179,13.0,184,-1_stutter,14.0,,869.0,,,0.212 -Sample1,D8S1179,12.0,37,-2_stutter,14.0,,869.0,,,0.201 -Sample1,D9S1122,13.0,948,real_allele,,,,,, -Sample1,D9S1122,12.0,108,-1_stutter/+1_stutter,13.0,11.0,948.0,991.0,, -Sample1,D9S1122,11.0,991,real_allele,,,,,, -Sample1,D9S1122,10.0,87,-1_stutter,11.0,,991.0,,,0.088 -Sample1,FGA,23.0,1436,real_allele,,,,,, -Sample1,FGA,22.0,262,-1_stutter,23.0,,1436.0,,,0.182 -Sample1,FGA,21.0,48,BelowAT,,,,,0.013, -Sample1,FGA,20.0,1750,real_allele,,,,,, -Sample1,FGA,18.0,181,real_allele,,,,,, -Sample1,FGA,17.0,15,BelowAT,,,,,0.004, -Sample1,PENTA D,15.0,50,real_allele,,,,,, -Sample1,PENTA D,13.0,1000,real_allele,,,,,, -Sample1,PENTA E,7.0,505,real_allele,,,,,, -Sample1,TH01,7.0,2197,real_allele,,,,,, -Sample1,TH01,6.0,1632,real_allele,,,,,, -Sample1,TH01,5.0,66,BelowAT,,,,,0.017, -Sample1,TPOX,11.0,15,BelowAT,,,,,1.0, +SampleID,Locus,CE_Allele,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter +Sample1,D4S2408,10,900,Typed,,,,,, +Sample1,D4S2408,9,1357,Typed,,,,,, +Sample1,D4S2408,8,1000,Typed,,,,,, +Sample1,D8S1179,14,869,Typed,,,,,, +Sample1,D8S1179,13,184,-1_stutter,14,,869,,,0.212 +Sample1,D8S1179,12,37,-2_stutter,14,,869,,,0.201 +Sample1,D9S1122,13,948,Typed,,,,,, +Sample1,D9S1122,12,108,-1_stutter/+1_stutter,13,11,948,991,, +Sample1,D9S1122,11,991,Typed,,,,,, +Sample1,D9S1122,10,87,-1_stutter,11,,991,,,0.088 +Sample1,FGA,23,1436,Typed,,,,,, +Sample1,FGA,22,262,-1_stutter,23,,1436,,,0.182 +Sample1,FGA,21,48,BelowAT,,,,,0.013, +Sample1,FGA,20,1750,Typed,,,,,, +Sample1,FGA,18,181,Typed,,,,,, +Sample1,FGA,17,15,BelowAT,,,,,0.004, +Sample1,PENTA D,15,50,Typed,,,,,, +Sample1,PENTA D,13,1000,Typed,,,,,, +Sample1,PENTA E,7,505,Typed,,,,,, +Sample1,TH01,7,2197,Typed,,,,,, +Sample1,TH01,6,1632,Typed,,,,,, +Sample1,TH01,5,66,BelowAT,,,,,0.017, +Sample1,TPOX,11,15,BelowAT,,,,,1, \ No newline at end of file diff --git a/lusSTR/tests/data/powerseq_example_sexloci_sequence_info.csv b/lusSTR/tests/data/powerseq_example_sexloci_sequence_info.csv index 9b257083..feb0a9e0 100644 --- a/lusSTR/tests/data/powerseq_example_sexloci_sequence_info.csv +++ b/lusSTR/tests/data/powerseq_example_sexloci_sequence_info.csv @@ -1,43 +1,43 @@ SampleID,Locus,UAS_Output_Sequence,CE_Allele,UAS_Output_Bracketed_Notation,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter -powerseq_example,DYS19,TAGATAGATAGATAGGTAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,14.0,[TAGA]3 TAGG [TAGA]11,13539,real_allele,,,,,, +powerseq_example,DYS19,TAGATAGATAGATAGGTAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,14.0,[TAGA]3 TAGG [TAGA]11,13539,Typed,,,,,, powerseq_example,DYS19,TAGATAGATAGATAGGTAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,13.0,[TAGA]3 TAGG [TAGA]10,1326,-1_stutter,[TAGA]3 TAGG [TAGA]11,,13539.0,,,0.098 -powerseq_example,DYS385A-B,AAGGAAGGAAGGAAGGAGAAAGAAAGTAAAAAAGAAAGAAAGAGAAAAAGAGAAAAAGAAAGAAAGAGAAGAAAGAGAAAGAGGAAAGAGAAAGAAAGGAAGGAAGGAAGGAAGGAAGGGAAAGAAATAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAA,16.0,AAG [GAAG]3 GAGA AAGA AAGT AAAA [AAGA]3 GAAA AAGA GAAA [AAGA]3 GAAG AAAG AGAA AGAG GAAA GAGA AAGA [AAGG]6 [GAAA]2 TAAA [GAAA]13 GAGA AAAA,3932,real_allele,,,,,, -powerseq_example,DYS385A-B,AAGGAAGGAAGGAAGGAGAAAGAAAGTAAAAAAGAAAGAAAGAGAAAAAGAGAAAAAGAAAGAAAGAGAAGAAAGAGAAAGAGGAAAGAGAAAGAAAGGAAGGAAGGAAGGAAGGAAGGGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAA,13.0,AAG [GAAG]3 GAGA AAGA AAGT AAAA [AAGA]3 GAAA AAGA GAAA [AAGA]3 GAAG AAAG AGAA AGAG GAAA GAGA AAGA [AAGG]6 [GAAA]13 GAGA AAAA,4446,real_allele,,,,,, -powerseq_example,DYS389II,TCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCATTATACCTACTTCTGTATCCAACTCTCATCTGTATTATCTATGTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,31.0,[TCTG]4 [TCTA]13 TCAT TATA CCTA CTTC TGTA TCCA ACTC TCAT CTGT ATTA TCTA TGTA [TCTG]3 [TCTA]11,2593,real_allele,,,,,, +powerseq_example,DYS385A-B,AAGGAAGGAAGGAAGGAGAAAGAAAGTAAAAAAGAAAGAAAGAGAAAAAGAGAAAAAGAAAGAAAGAGAAGAAAGAGAAAGAGGAAAGAGAAAGAAAGGAAGGAAGGAAGGAAGGAAGGGAAAGAAATAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAA,16.0,AAG [GAAG]3 GAGA AAGA AAGT AAAA [AAGA]3 GAAA AAGA GAAA [AAGA]3 GAAG AAAG AGAA AGAG GAAA GAGA AAGA [AAGG]6 [GAAA]2 TAAA [GAAA]13 GAGA AAAA,3932,Typed,,,,,, +powerseq_example,DYS385A-B,AAGGAAGGAAGGAAGGAGAAAGAAAGTAAAAAAGAAAGAAAGAGAAAAAGAGAAAAAGAAAGAAAGAGAAGAAAGAGAAAGAGGAAAGAGAAAGAAAGGAAGGAAGGAAGGAAGGAAGGGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAAA,13.0,AAG [GAAG]3 GAGA AAGA AAGT AAAA [AAGA]3 GAAA AAGA GAAA [AAGA]3 GAAG AAAG AGAA AGAG GAAA GAGA AAGA [AAGG]6 [GAAA]13 GAGA AAAA,4446,Typed,,,,,, +powerseq_example,DYS389II,TCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCATTATACCTACTTCTGTATCCAACTCTCATCTGTATTATCTATGTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,31.0,[TCTG]4 [TCTA]13 TCAT TATA CCTA CTTC TGTA TCCA ACTC TCAT CTGT ATTA TCTA TGTA [TCTG]3 [TCTA]11,2593,Typed,,,,,, powerseq_example,DYS389II,TCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCATTATACCTACTTCTGTATCCAACTCTCATCTGTATTATCTATGTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,30.0,[TCTG]4 [TCTA]13 TCAT TATA CCTA CTTC TGTA TCCA ACTC TCAT CTGT ATTA TCTA TGTA [TCTG]3 [TCTA]10,288,-1_stutter,[TCTG]4 [TCTA]13 TCAT TATA CCTA CTTC TGTA TCCA ACTC TCAT CTGT ATTA TCTA TGTA [TCTG]3 [TCTA]11,,2593.0,,,0.111 -powerseq_example,DYS390,TCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTGTCTATCTATCTATCTA,24.0,[TCTA]2 [TCTG]8 [TCTA]11 TCTG [TCTA]4,9090,real_allele,,,,,, +powerseq_example,DYS390,TCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTGTCTATCTATCTATCTA,24.0,[TCTA]2 [TCTG]8 [TCTA]11 TCTG [TCTA]4,9090,Typed,,,,,, powerseq_example,DYS390,TCTATCTATCTGTCTGTCTGTCTGTCTGTCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTGTCTATCTATCTATCTA,23.0,[TCTA]2 [TCTG]8 [TCTA]10 TCTG [TCTA]4,658,-1_stutter,[TCTA]2 [TCTG]8 [TCTA]11 TCTG [TCTA]4,,9090.0,,,0.072 -powerseq_example,DYS391,TGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTG,10.0,TG TCTG [TCTA]10 TCTG,12298,real_allele,,,,,, +powerseq_example,DYS391,TGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTG,10.0,TG TCTG [TCTA]10 TCTG,12298,Typed,,,,,, powerseq_example,DYS391,TGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTG,9.0,TG TCTG [TCTA]9 TCTG,1086,-1_stutter,TG TCTG [TCTA]10 TCTG,,12298.0,,,0.088 -powerseq_example,DYS392,TATTATTATTATTATTATTATTATTATTATTATTATTAT,13.0,[TAT]13,15505,real_allele,,,,,, +powerseq_example,DYS392,TATTATTATTATTATTATTATTATTATTATTATTATTAT,13.0,[TAT]13,15505,Typed,,,,,, powerseq_example,DYS392,TATTATTATTATTATTATTATTATTATTATTATTAT,12.0,[TAT]12,2565,-1_stutter,[TAT]13,,15505.0,,,0.165 -powerseq_example,DYS393,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATATGTATGTCTTTTCTATGAGACATA,13.0,[AGAT]13 [ATGT]2 CTTT TCTA TGAG ACAT A,19034,real_allele,,,,,, +powerseq_example,DYS393,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATATGTATGTCTTTTCTATGAGACATA,13.0,[AGAT]13 [ATGT]2 CTTT TCTA TGAG ACAT A,19034,Typed,,,,,, powerseq_example,DYS393,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATATGTATGTCTTTTCTATGAGACATA,12.0,[AGAT]12 [ATGT]2 CTTT TCTA TGAG ACAT A,1946,-1_stutter,[AGAT]13 [ATGT]2 CTTT TCTA TGAG ACAT A,,19034.0,,,0.102 -powerseq_example,DYS437,TCTATCTATCTATCTATCTATCTATCTATCTATCTGTCTGTCTATCTATCTATCTA,14.0,[TCTA]8 [TCTG]2 [TCTA]4,9559,real_allele,,,,,, +powerseq_example,DYS437,TCTATCTATCTATCTATCTATCTATCTATCTATCTGTCTGTCTATCTATCTATCTA,14.0,[TCTA]8 [TCTG]2 [TCTA]4,9559,Typed,,,,,, powerseq_example,DYS437,TCTATCTATCTATCTATCTATCTATCTATCTGTCTGTCTATCTATCTATCTA,13.0,[TCTA]7 [TCTG]2 [TCTA]4,522,-1_stutter,[TCTA]8 [TCTG]2 [TCTA]4,,9559.0,,,0.055 -powerseq_example,DYS438,TTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTC,9.0,[TTTTC]9,15233,real_allele,,,,,, +powerseq_example,DYS438,TTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTC,9.0,[TTTTC]9,15233,Typed,,,,,, powerseq_example,DYS438,TTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTC,8.0,[TTTTC]8,411,-1_stutter,[TTTTC]9,,15233.0,,,0.027 -powerseq_example,DYS439,AAATAGAAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA,12.0,AAAT AGAA [GATA]12,25067,real_allele,,,,,, +powerseq_example,DYS439,AAATAGAAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA,12.0,AAAT AGAA [GATA]12,25067,Typed,,,,,, powerseq_example,DYS439,AAATAGAAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA,11.0,AAAT AGAA [GATA]11,2052,-1_stutter,AAAT AGAA [GATA]12,,25067.0,,,0.082 -powerseq_example,DYS448,AGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATATAGAGATAGAGAGATAGAGATAGAGATAGATAGATAGAGAAAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGAT,19.0,[AGAGAT]11 [ATAGAG]2 [AGATAG]3 ATAGAT AGAGAA [AGAGAT]8,2823,real_allele,,,,,, +powerseq_example,DYS448,AGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATATAGAGATAGAGAGATAGAGATAGAGATAGATAGATAGAGAAAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGAT,19.0,[AGAGAT]11 [ATAGAG]2 [AGATAG]3 ATAGAT AGAGAA [AGAGAT]8,2823,Typed,,,,,, powerseq_example,DYS448,AGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATATAGAGATAGAGAGATAGAGATAGAGATAGAGAGATAGAGAAAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGATAGAGAT,19.0,[AGAGAT]11 [ATAGAG]2 [AGATAG]3 AGAGAT AGAGAA [AGAGAT]8,88,BelowAT,,,,,0.03, -powerseq_example,DYS456,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATATTCCATTAGTTCTGTCCCTCTAGAGAACCCTAATACATCAGTTTAAGAA,17.0,[AGAT]17 ATTC CATT AGTT CTGT CCCT CTAG AGAA CCCT AATA CATC AGTT TAAG AA,11109,real_allele,,,,,, -powerseq_example,DYS456,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATATTCCATTAGTTCTGTCCCTCTAGAGAACCCTAATACATCAGTTTAAGAA,16.0,[AGAT]16 ATTC CATT AGTT CTGT CCCT CTAG AGAA CCCT AATA CATC AGTT TAAG AA,1834,real_allele,,,,,, -powerseq_example,DYS458,GAAAGAAAGAAAAGGAAGGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGGAGGGTGGGCGTGGTGGCTCATGCTTGTAATGCCAGAACTTTGGGAGGCCGAGGTGG,17.0,[GAAA]3 AG GAAG [GAAA]17 GGAG GGTG GGCG TGGT GGCT CATG CTTG TAAT GCCA GAAC TTTG GGAG GCCG AGGT GG,10541,real_allele,,,,,, -powerseq_example,DYS458,GAAAGAAAGAAAAGGAAGGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGGAGGGTGGGCGTGGTGGCTCATGCTTGTAATGCCAGAACTTTGGGAGGCCGAGGTGG,16.0,[GAAA]3 AG GAAG [GAAA]16 GGAG GGTG GGCG TGGT GGCT CATG CTTG TAAT GCCA GAAC TTTG GGAG GCCG AGGT GG,1660,real_allele,,,,,, -powerseq_example,DYS481,CTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTT,22.0,[CTT]22,12071,real_allele,,,,,, +powerseq_example,DYS456,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATATTCCATTAGTTCTGTCCCTCTAGAGAACCCTAATACATCAGTTTAAGAA,17.0,[AGAT]17 ATTC CATT AGTT CTGT CCCT CTAG AGAA CCCT AATA CATC AGTT TAAG AA,11109,Typed,,,,,, +powerseq_example,DYS456,AGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATATTCCATTAGTTCTGTCCCTCTAGAGAACCCTAATACATCAGTTTAAGAA,16.0,[AGAT]16 ATTC CATT AGTT CTGT CCCT CTAG AGAA CCCT AATA CATC AGTT TAAG AA,1834,Typed,,,,,, +powerseq_example,DYS458,GAAAGAAAGAAAAGGAAGGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGGAGGGTGGGCGTGGTGGCTCATGCTTGTAATGCCAGAACTTTGGGAGGCCGAGGTGG,17.0,[GAAA]3 AG GAAG [GAAA]17 GGAG GGTG GGCG TGGT GGCT CATG CTTG TAAT GCCA GAAC TTTG GGAG GCCG AGGT GG,10541,Typed,,,,,, +powerseq_example,DYS458,GAAAGAAAGAAAAGGAAGGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGGAGGGTGGGCGTGGTGGCTCATGCTTGTAATGCCAGAACTTTGGGAGGCCGAGGTGG,16.0,[GAAA]3 AG GAAG [GAAA]16 GGAG GGTG GGCG TGGT GGCT CATG CTTG TAAT GCCA GAAC TTTG GGAG GCCG AGGT GG,1660,Typed,,,,,, +powerseq_example,DYS481,CTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTT,22.0,[CTT]22,12071,Typed,,,,,, powerseq_example,DYS481,CTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTT,21.0,[CTT]21,2848,-1_stutter,[CTT]22,,12071.0,,,0.236 -powerseq_example,DYS533,TATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATC,12.0,[TATC]12,8598,real_allele,,,,,, +powerseq_example,DYS533,TATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATC,12.0,[TATC]12,8598,Typed,,,,,, powerseq_example,DYS533,TATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATC,11.0,[TATC]11,735,-1_stutter,[TATC]12,,8598.0,,,0.085 -powerseq_example,DYS549,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA,13.0,[GATA]13,17735,real_allele,,,,,, +powerseq_example,DYS549,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA,13.0,[GATA]13,17735,Typed,,,,,, powerseq_example,DYS549,GATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATA,12.0,[GATA]12,1559,-1_stutter,[GATA]13,,17735.0,,,0.088 -powerseq_example,DYS570,TTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTC,17.0,[TTTC]17,14991,real_allele,,,,,, +powerseq_example,DYS570,TTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTC,17.0,[TTTC]17,14991,Typed,,,,,, powerseq_example,DYS570,TTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTC,16.0,[TTTC]16,1561,-1_stutter,[TTTC]17,,14991.0,,,0.104 -powerseq_example,DYS576,AAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAG,18.0,[AAAG]18,9658,real_allele,,,,,, -powerseq_example,DYS576,AAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAG,17.0,[AAAG]17,1530,real_allele,,,,,, -powerseq_example,DYS635,TCTATCTATCTATCTATGTATGTATCTATCTATGTATGTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,21.0,[TCTA]4 [TGTA]2 [TCTA]2 [TGTA]2 [TCTA]11,14753,real_allele,,,,,, +powerseq_example,DYS576,AAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAG,18.0,[AAAG]18,9658,Typed,,,,,, +powerseq_example,DYS576,AAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAG,17.0,[AAAG]17,1530,Typed,,,,,, +powerseq_example,DYS635,TCTATCTATCTATCTATGTATGTATCTATCTATGTATGTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,21.0,[TCTA]4 [TGTA]2 [TCTA]2 [TGTA]2 [TCTA]11,14753,Typed,,,,,, powerseq_example,DYS635,TCTATCTATCTATCTATGTATGTATCTATCTATGTATGTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,20.0,[TCTA]4 [TGTA]2 [TCTA]2 [TGTA]2 [TCTA]10,1399,-1_stutter,[TCTA]4 [TGTA]2 [TCTA]2 [TGTA]2 [TCTA]11,,14753.0,,,0.095 -powerseq_example,DYS643,CTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTCTTTT,10.0,[CTTTT]10 CTTTC TTTT,9866,real_allele,,,,,, +powerseq_example,DYS643,CTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTCTTTT,10.0,[CTTTT]10 CTTTC TTTT,9866,Typed,,,,,, powerseq_example,DYS643,CTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTTCTTTCTTTT,9.0,[CTTTT]9 CTTTC TTTT,287,-1_stutter,[CTTTT]10 CTTTC TTTT,,9866.0,,,0.029 -powerseq_example,Y-GATA-H4,AGATAGATAGATAGATCTATAGATAGATAGGTAGGTAGGTAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,11.0,AGA [TAGA]3 TCTA [TAGA]2 [TAGG]3 [TAGA]11,8052,real_allele,,,,,, +powerseq_example,Y-GATA-H4,AGATAGATAGATAGATCTATAGATAGATAGGTAGGTAGGTAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,11.0,AGA [TAGA]3 TCTA [TAGA]2 [TAGG]3 [TAGA]11,8052,Typed,,,,,, powerseq_example,Y-GATA-H4,AGATAGATAGATAGATCTATAGATAGATAGGTAGGTAGGTAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,10.0,AGA [TAGA]3 TCTA [TAGA]2 [TAGG]3 [TAGA]10,662,-1_stutter,AGA [TAGA]3 TCTA [TAGA]2 [TAGG]3 [TAGA]11,,8052.0,,,0.082 diff --git a/lusSTR/tests/test_filters.py b/lusSTR/tests/test_filters.py index 5ffa7da0..e3255d9f 100644 --- a/lusSTR/tests/test_filters.py +++ b/lusSTR/tests/test_filters.py @@ -58,12 +58,12 @@ def test_forward_stutter_threshold(perc, perc_stut, reads, forward_threshold): "al_reads, called_allele_type, stut_perc", [ (None, 0.18, 0, 18, 100, None, 15, "-1_stutter", 0.15), - ("real_allele", 0.18, 0, 18, 100, None, 20, "real_allele", None), + ("Typed", 0.18, 0, 18, 100, None, 20, "Typed", None), (None, 0.18, 0, 18, 100, None, 20, None, None), ("+1_stutter", 0.18, 0, 18, 100, 200, 20, "-1_stutter/+1_stutter", None), - ("+1_stutter", 0.18, 0, 18, 100, 200, 30, "real_allele", None), + ("+1_stutter", 0.18, 0, 18, 100, 200, 30, "Typed", None), ("-2_stutter", 0.18, 0, 18, 100, 100, 30, "-1_stutter/-2_stutter", None), - ("-2_stutter", 0.18, 0, 18, 100, 100, 40, "real_allele", None), + ("-2_stutter", 0.18, 0, 18, 100, 100, 40, "Typed", None), ], ) def test_minus1stutter( @@ -95,12 +95,12 @@ def test_minus1stutter( "ref_reads, al_reads, called_allele_type, stut_perc", [ (None, 0.18, 0, 18, None, 100, 15, "-2_stutter", 0.15), - ("real_allele", 0.18, 0, 18, None, 100, 20, "real_allele", None), + ("Typed", 0.18, 0, 18, None, 100, 20, "Typed", None), (None, 0.18, 0, 18, None, 100, 20, None, None), ("+1_stutter", 0.18, 0, 18, 100, 200, 20, "+1_stutter/-2_stutter", None), - ("+1_stutter", 0.18, 0, 18, 100, 200, 30, "real_allele", None), + ("+1_stutter", 0.18, 0, 18, 100, 200, 30, "Typed", None), ("-1_stutter", 0.18, 0, 18, 100, 100, 30, "-1_stutter/-2_stutter", None), - ("-1_stutter", 0.18, 0, 18, 100, 100, 40, "real_allele", None), + ("-1_stutter", 0.18, 0, 18, 100, 100, 40, "Typed", None), ], ) def test_minus2stutter( @@ -132,12 +132,12 @@ def test_minus2stutter( "al_reads, called_allele_type, stut_perc", [ (None, 0.18, 0, 100, None, 3, "+1_stutter", 0.03), - ("real_allele", 0.18, 0, 100, None, 20, "real_allele", None), + ("Typed", 0.18, 0, 100, None, 20, "Typed", None), (None, 0.18, 0, 100, None, 20, None, None), ("-1_stutter", 0.18, 0, 100, 200, 3, "-1_stutter/+1_stutter", None), - ("-1_stutter", 0.18, 0, 100, 200, 50, "real_allele", None), + ("-1_stutter", 0.18, 0, 100, 200, 50, "Typed", None), ("-2_stutter", 0.18, 0, 100, 100, 3, "+1_stutter/-2_stutter", None), - ("-2_stutter", 0.18, 0, 100, 100, 40, "real_allele", None), + ("-2_stutter", 0.18, 0, 100, 100, 40, "Typed", None), ], ) def test_plus1stutter( diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py index 9b2342b2..16e0fd4d 100644 --- a/lusSTR/wrappers/filter.py +++ b/lusSTR/wrappers/filter.py @@ -146,9 +146,9 @@ def process_strs(dict_loc, datatype, seq_col, brack_col): def EFM_output(profile, outfile, profile_type, data_type, col, sex, separate=False): if profile_type == "reference": - profile = profile[profile.allele_type == "real_allele"] + profile = profile.query("allele_type == 'Typed'") else: - profile = profile[profile.allele_type != "BelowAT"] + profile = profile.query("allele_type != ['BelowAT', 'Deleted']") efm_profile = populate_efm_profile(profile, data_type, col, sex) if separate: write_sample_specific_efm_profiles(efm_profile, profile_type, data_type, outfile) @@ -254,9 +254,9 @@ def determine_max_num_alleles(allele_heights): def STRmix_output(profile, outdir, profile_type, data_type, seq_col): Path(outdir).mkdir(parents=True, exist_ok=True) if profile_type == "reference": - filtered_df = profile[profile.allele_type == "real_allele"] + filtered_df = profile.query("allele_type == 'Typed'") else: - filtered_df = profile[profile.allele_type != "BelowAT"] + filtered_df = profile.query("allele_type != ['BelowAT', 'Deleted']") if data_type == "ce": strmix_profile = strmix_ce_processing(filtered_df) elif data_type == "lusplus": @@ -349,13 +349,12 @@ def format_ref_table(new_rows, sample_data, datatype): return sort_df -def marker_plots(df, output_name, sex): - Path("MarkerPlots").mkdir(parents=True, exist_ok=True) +def marker_plots(df, output_name, sex, wd="."): + Path(f"{wd}/MarkerPlots").mkdir(parents=True, exist_ok=True) df["CE_Allele"] = df["CE_Allele"].astype(float) - filt_df = df[df["allele_type"] == "real_allele"] + filt_df = df[df["allele_type"] == "Typed"] for sample_id in df["SampleID"].unique(): - # sample_id = f"{id}_ystrs" if sex else id - with PdfPages(f"MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf: + with PdfPages(f"{wd}/MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf: make_plot(filt_df, sample_id, filters=True, at=False) pdf.savefig() make_plot(df, sample_id) @@ -366,13 +365,13 @@ def marker_plots(df, output_name, sex): def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True): sample_df = df[df["SampleID"] == sample_id].copy() - # sample_id = f"{id}_sexchr" if sex else id conditions = [ - sample_df["allele_type"].str.contains("real"), + sample_df["allele_type"].str.contains("Typed"), sample_df["allele_type"].str.contains("BelowAT"), sample_df["allele_type"].str.contains("stutter"), + sample_df["allele_type"].str.contains("Deleted"), ] - values = ["Typed", "BelowAT", "Stutter"] + values = ["Typed", "BelowAT", "Stutter", "Deleted"] sample_df.loc[:, "Type"] = np.select(conditions, values) max_reads = max(sample_df["Reads"]) n = 100 if max_reads > 1000 else 10 @@ -383,7 +382,7 @@ def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True): for marker in sample_df["Locus"].unique(): if marker in strs or marker in ystrs: n += 1 - colors = {"Typed": "g", "Stutter": "b", "BelowAT": "r"} + colors = {"Typed": "green", "Stutter": "blue", "BelowAT": "red", "Deleted": "purple"} marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele") ax = fig.add_subplot(6, 5, n) p = ax.bar( @@ -469,7 +468,7 @@ def process_input( else "Forward_Strand_Bracketed_Notation" ) if nofiltering: - full_df["allele_type"] = "real_allele" + full_df["allele_type"] = "Typed" marker_plots(full_df, input_name, sex) if output_type == "efm" or output_type == "mpsproto": EFM_output(full_df, outpath, profile_type, data_type, brack_col, sex, separate) diff --git a/setup.py b/setup.py index dd07d54b..d40c5d15 100755 --- a/setup.py +++ b/setup.py @@ -50,6 +50,7 @@ "numpy==1.26.4", "streamlit>=1.31.0", "streamlit_option_menu>=0.3.12", + "plotly==5.24.1", ], entry_points={"console_scripts": ["lusstr = lusSTR.cli:main"]}, scripts=glob.glob("lusSTR/scripts/*"),