diff --git a/lusSTR/scripts/filter_settings.py b/lusSTR/scripts/filter_settings.py index 1d6b5b5d..859ce736 100644 --- a/lusSTR/scripts/filter_settings.py +++ b/lusSTR/scripts/filter_settings.py @@ -531,6 +531,4 @@ def same_size_filter(df, metadata, datatype): else: final_df = pd.concat([final_df, df_filt]) final_df = final_df.reset_index(drop=True) - if datatype == "lusplus": - final_df = final_df.drop("CE_Allele", axis=1) return final_df diff --git a/lusSTR/scripts/marker.py b/lusSTR/scripts/marker.py index b95d7d61..1817faf5 100644 --- a/lusSTR/scripts/marker.py +++ b/lusSTR/scripts/marker.py @@ -388,12 +388,18 @@ def convert(self): else: if "GGGCTGCCTA" in self.uas_sequence: break_point = self.uas_sequence.index("GGGCTGCCTA") + 10 - else: + bracketed_form = ( + f"{collapse_repeats_by_length(self.uas_sequence[:break_point], 4)} " + f"{collapse_repeats_by_length(self.uas_sequence[break_point:], 4)}" + ) + elif "TTTT" in self.uas_sequence: break_point = self.uas_sequence.index("TTTT") + 14 - bracketed_form = ( - f"{collapse_repeats_by_length(self.uas_sequence[:break_point], 4)} " - f"{collapse_repeats_by_length(self.uas_sequence[break_point:], 4)}" - ) + bracketed_form = ( + f"{collapse_repeats_by_length(self.uas_sequence[:break_point], 4)} " + f"{collapse_repeats_by_length(self.uas_sequence[break_point:], 4)}" + ) + else: + bracketed_form = "" return bracketed_form diff --git a/lusSTR/scripts/repeat.py b/lusSTR/scripts/repeat.py index 7175e56b..bd34789b 100644 --- a/lusSTR/scripts/repeat.py +++ b/lusSTR/scripts/repeat.py @@ -163,6 +163,8 @@ def repeat_copy_number(bf, repeat): The input is a sequence string collapsed to bracketed sequence form. """ longest = 0 + if bf == "": + return 0 for block in bf.split(" "): if block == repeat: if 1 > longest: diff --git a/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv b/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv index a093c1fd..9bcfd9a0 100644 --- a/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv +++ b/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv @@ -1,24 +1,24 @@ -SampleID,Locus,LUS_Plus,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter,CE_Allele -Sample1,D4S2408,10_10_0,1022,real_allele,,,,,,, -Sample1,D4S2408,9_9_0,116,-1_stutter/+1_stutter,10_10_0,8_8_0,1022.0,1050.0,,, -Sample1,D4S2408,8_8_0,1050,real_allele,,,,,,, -Sample1,D8S1179,14_12_1_0,869,real_allele,,,,,,, -Sample1,D8S1179,13_11_1_0,184,-1_stutter,14_12_1_0,,869.0,,,0.212, -Sample1,D8S1179,12_10_1_0,37,-2_stutter,14_12_1_0,,869.0,,,0.201, -Sample1,D9S1122,13_11,948,real_allele,,,,,,, -Sample1,D9S1122,12_10,108,-1_stutter,13_11,,948.0,,,0.114, -Sample1,D9S1122,11_11,991,real_allele,,,,,,, -Sample1,D9S1122,10_10,87,-1_stutter,11_11,,991.0,,,0.088, -Sample1,FGA,23_15_3_0,1436,real_allele,,,,,,, -Sample1,FGA,22_14_3_0,262,-1_stutter,23_15_3_0,,1436.0,,,0.182, -Sample1,FGA,21_13_3_0,48,BelowAT,,,,,0.013,, -Sample1,FGA,20_12_3_0,1750,real_allele,,,,,,, -Sample1,FGA,18_10_3_0,181,real_allele,,,,,,, -Sample1,FGA,17_9_3_0,15,BelowAT,,,,,0.004,, -Sample1,PENTA D,15_15,50,real_allele,,,,,,, -Sample1,PENTA D,13_13,1000,real_allele,,,,,,, -Sample1,PENTA E,7_7,505,real_allele,,,,,,,7.0 -Sample1,TH01,7_7,2197,real_allele,,,,,,, -Sample1,TH01,6_6,1632,real_allele,,,,,,, -Sample1,TH01,5_5,66,BelowAT,,,,,0.017,, -Sample1,TPOX,11_11,15,BelowAT,,,,,1.0,,11.0 +SampleID,Locus,CE_Allele,LUS_Plus,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter +Sample1,D4S2408,10.0,10_10_0,1022,real_allele,,,,,, +Sample1,D4S2408,9.0,9_9_0,116,-1_stutter/+1_stutter,10_10_0,8_8_0,1022.0,1050.0,, +Sample1,D4S2408,8.0,8_8_0,1050,real_allele,,,,,, +Sample1,D8S1179,14.0,14_12_1_0,869,real_allele,,,,,, +Sample1,D8S1179,13.0,13_11_1_0,184,-1_stutter,14_12_1_0,,869.0,,,0.212 +Sample1,D8S1179,12.0,12_10_1_0,37,-2_stutter,14_12_1_0,,869.0,,,0.201 +Sample1,D9S1122,13.0,13_11,948,real_allele,,,,,, +Sample1,D9S1122,12.0,12_10,108,-1_stutter,13_11,,948.0,,,0.114 +Sample1,D9S1122,11.0,11_11,991,real_allele,,,,,, +Sample1,D9S1122,10.0,10_10,87,-1_stutter,11_11,,991.0,,,0.088 +Sample1,FGA,23.0,23_15_3_0,1436,real_allele,,,,,, +Sample1,FGA,22.0,22_14_3_0,262,-1_stutter,23_15_3_0,,1436.0,,,0.182 +Sample1,FGA,21.0,21_13_3_0,48,BelowAT,,,,,0.013, +Sample1,FGA,20.0,20_12_3_0,1750,real_allele,,,,,, +Sample1,FGA,18.0,18_10_3_0,181,real_allele,,,,,, +Sample1,FGA,17.0,17_9_3_0,15,BelowAT,,,,,0.004, +Sample1,PENTA D,15.0,15_15,50,real_allele,,,,,, +Sample1,PENTA D,13.0,13_13,1000,real_allele,,,,,, +Sample1,PENTA E,7.0,7_7,505,real_allele,,,,,, +Sample1,TH01,7.0,7_7,2197,real_allele,,,,,, +Sample1,TH01,6.0,6_6,1632,real_allele,,,,,, +Sample1,TH01,5.0,5_5,66,BelowAT,,,,,0.017, +Sample1,TPOX,11.0,11_11,15,BelowAT,,,,,1.0, diff --git a/lusSTR/tests/data/NGS_stutter_test/Sample1_nofilter.csv b/lusSTR/tests/data/NGS_stutter_test/Sample1_nofilter.csv index b83fce36..7531c6f3 100644 --- a/lusSTR/tests/data/NGS_stutter_test/Sample1_nofilter.csv +++ b/lusSTR/tests/data/NGS_stutter_test/Sample1_nofilter.csv @@ -1,28 +1,28 @@ Locus,CE Allele,Allele Seq,Reads -D4S2408,8,ATCTATCTATCTATCTATCTATCTATCTATCT,1000 -D4S2408,9,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,1357 -D4S2408,10,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,900 -D8S1179,12,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA,26 -D8S1179,12,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,11 -D8S1179,13,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,95 -D8S1179,13,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,89 -D8S1179,14,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,739 -D8S1179,14,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,130 -D9S1122,10,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,87 -D9S1122,11,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,991 -D9S1122,12,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,108 -D9S1122,13,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,948 -FGA,17,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,15 -FGA,18,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,181 -FGA,20,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1750 -FGA,21,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,48 -FGA,22,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,262 -FGA,23,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1436 -PentaD,13,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,1000 -PentaD,15,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,50 -PentaE,7,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,505 -TH01,5,AATGAATGAATGAATGAATG,66 -TH01,6,AATGAATGAATGAATGAATGAATG,1632 -TH01,7,AATGAATGAATGAATGAATGAATGAATG,2197 -TPOX,11,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,15 -vWA,16,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,6 +D4S2408,8.0,ATCTATCTATCTATCTATCTATCTATCTATCT,1000 +D4S2408,9.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,1357 +D4S2408,10.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,900 +D8S1179,12.0,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA,26 +D8S1179,12.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,11 +D8S1179,13.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,95 +D8S1179,13.0,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,89 +D8S1179,14.0,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,739 +D8S1179,14.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,130 +D9S1122,10.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,87 +D9S1122,11.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,991 +D9S1122,12.0,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,108 +D9S1122,13.0,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,948 +FGA,17.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,15 +FGA,18.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,181 +FGA,20.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1750 +FGA,21.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,48 +FGA,22.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,262 +FGA,23.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1436 +PentaD,13.0,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,1000 +PentaD,15.0,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,50 +PentaE,7.0,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,505 +TH01,5.0,AATGAATGAATGAATGAATG,66 +TH01,6.0,AATGAATGAATGAATGAATGAATG,1632 +TH01,7.0,AATGAATGAATGAATGAATGAATGAATG,2197 +TPOX,11.0,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,15 +vWA,16.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,6 diff --git a/lusSTR/tests/test_suite.py b/lusSTR/tests/test_suite.py index cf1aaee0..25632b71 100644 --- a/lusSTR/tests/test_suite.py +++ b/lusSTR/tests/test_suite.py @@ -323,24 +323,15 @@ def test_snakemake(command, output, format_out, convert_out, all_out, tmp_path): assert filecmp.cmp(exp_output, obs_output) is True -@pytest.mark.parametrize( - "sex", - [True, False], -) -def test_marker_plots(sex, tmp_path): +def test_marker_plots(tmp_path): inputfile = data_file("UAS_bulk_input/Positive Control Sample Details Report 2315.xlsx") exp_output = str(tmp_path / "MarkerPlots/lusstr_output_Positive_Control_marker_plots.pdf") sex_exp = str(tmp_path / "MarkerPlots/lusstr_output_Positive_Control_sexchr_marker_plots.pdf") - if sex: - arglist = ["config", "-w", str(tmp_path), "--input", str(inputfile), "--sex"] - else: - arglist = ["config", "-w", str(tmp_path), "--input", str(inputfile)] + arglist = ["config", "-w", str(tmp_path), "--input", str(inputfile)] lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist)) - snakemake_arglist = ["strs", "convert", "-w", str(tmp_path)] + snakemake_arglist = ["strs", "all", "-w", str(tmp_path)] lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(snakemake_arglist)) assert os.path.exists(exp_output) is True - if sex: - assert os.path.exists(sex_exp) is True def test_genemarker(tmp_path): diff --git a/lusSTR/wrappers/convert.py b/lusSTR/wrappers/convert.py index 652a70fb..beb950db 100644 --- a/lusSTR/wrappers/convert.py +++ b/lusSTR/wrappers/convert.py @@ -12,8 +12,6 @@ import csv import json -import math -import matplotlib.pyplot as plt import numpy as np import os import pandas as pd @@ -23,7 +21,6 @@ from lusSTR.scripts.repeat import collapse_all_repeats, collapse_repeats_by_length from lusSTR.scripts.repeat import sequence_to_bracketed_form, split_by_n from lusSTR.scripts.repeat import reverse_complement, reverse_complement_bracketed -from matplotlib.backends.backend_pdf import PdfPages from pathlib import Path @@ -101,9 +98,8 @@ def format_table(input, software, kit="forenseq"): ] flanks_list.append(flank_summary) continue - marker = STRMarkerObject(locus, sequence, software, kit=kit) - if locus == "D12S391" and kit == "powerseq": + if locus == "D12S391" and kit == "powerseq" and software == "straitrazor": if "." in str(marker.canonical): check_sr += 1 if check_sr > 10: @@ -185,48 +181,6 @@ def sort_table(table): return sorted_table -def marker_plots(df, output_name, sex=False): - Path("MarkerPlots").mkdir(parents=True, exist_ok=True) - df["CE_Allele"] = df["CE_Allele"].astype(float) - for id in df["SampleID"].unique(): - sample_id = f"{id}_sexchr" if sex else id - with PdfPages(f"MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf: - make_plot(df, id, sex, sameyaxis=False) - pdf.savefig() - make_plot(df, id, sex) - pdf.savefig() - - -def make_plot(df, id, sex=False, sameyaxis=True): - sample_df = df[df["SampleID"] == id] - sample_id = f"{id}_sexchr" if sex else id - max_reads = max(sample_df["Reads"]) - n = 100 if max_reads > 1000 else 10 - max_yvalue = int(math.ceil(max_reads / n)) * n - increase_value = int(math.ceil((max_yvalue / 5)) / n) * n - fig = plt.figure(figsize=(31, 31)) if sex is True else plt.figure(figsize=(30, 30)) - n = 0 - for marker in sample_df["Locus"].unique(): - n += 1 - marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele") - ax = fig.add_subplot(6, 6, n) if sex is True else fig.add_subplot(6, 5, n) - ax.bar(marker_df["CE_Allele"], marker_df["Reads"]) - if sameyaxis: - ax.set_yticks(np.arange(0, max_yvalue, increase_value)) - ax.set_xticks( - np.arange(min(marker_df["CE_Allele"]) - 1, max(marker_df["CE_Allele"]) + 2, 1.0) - ) - ax.title.set_text(marker) - if sameyaxis: - plt.text( - 0.4, 0.95, "Marker Plots With Same Y-Axis Scale", transform=fig.transFigure, size=24 - ) - else: - plt.text( - 0.4, 0.95, "Marker Plots With Custom Y-Axis Scale", transform=fig.transFigure, size=24 - ) - - def main(input, out, kit, software, sex, nocombine): input = str(input) out = str(out) @@ -248,7 +202,6 @@ def main(input, out, kit, software, sex, nocombine): sex_final_table.to_csv(f"{output_name}_sexloci.txt", sep="\t", index=False) else: sex_final_table.to_csv(f"{output_name}_sexloci.txt", sep="\t", index=False) - marker_plots(sex_final_table, output_name, sex=True) if software != "uas": if not autosomal_final_table.empty: autosomal_flank_table.to_csv(f"{output_name}_flanks.txt", sep="\t", index=False) @@ -260,7 +213,6 @@ def main(input, out, kit, software, sex, nocombine): autosomal_final_table.to_csv(out, sep="\t", index=False) else: autosomal_final_table.to_csv(out, sep="\t", index=False) - marker_plots(autosomal_final_table, output_name) if __name__ == "__main__": diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py index 42817c4e..cbcf8052 100644 --- a/lusSTR/wrappers/filter.py +++ b/lusSTR/wrappers/filter.py @@ -16,6 +16,9 @@ import json import lusSTR from lusSTR.scripts.filter_settings import filters, flags +import math +from matplotlib.backends.backend_pdf import PdfPages +import matplotlib.pyplot as plt import numpy as np import os import pandas as pd @@ -322,6 +325,98 @@ def format_ref_table(new_rows, sample_data, datatype): return sort_df +def marker_plots(df, output_name): + Path("MarkerPlots").mkdir(parents=True, exist_ok=True) + df["CE_Allele"] = df["CE_Allele"].astype(float) + filt_df = df[df["allele_type"] == "real_allele"] + for sample_id in df["SampleID"].unique(): + # sample_id = f"{id}_sexchr" if sex else id + with PdfPages(f"MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf: + make_plot(filt_df, sample_id, filters=True, at=False) + pdf.savefig() + make_plot(df, sample_id) + pdf.savefig() + make_plot(df, sample_id, sameyaxis=True) + pdf.savefig() + + +def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True): + sample_df = df[df["SampleID"] == sample_id].copy() + # sample_id = f"{id}_sexchr" if sex else id + conditions = [ + sample_df["allele_type"].str.contains("real"), + sample_df["allele_type"].str.contains("BelowAT"), + sample_df["allele_type"].str.contains("stutter"), + ] + values = ["Typed", "BelowAT", "Stutter"] + sample_df.loc[:, "Type"] = np.select(conditions, values) + max_reads = max(sample_df["Reads"]) + n = 100 if max_reads > 1000 else 10 + max_yvalue = int(math.ceil(max_reads / n)) * n + increase_value = int(math.ceil((max_yvalue / 5)) / n) * n + fig = plt.figure(figsize=(30, 30)) + n = 0 + for marker in sample_df["Locus"].unique(): + n += 1 + colors = {"Typed": "g", "Stutter": "b", "BelowAT": "r"} + marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele") + ax = fig.add_subplot(6, 5, n) + p = ax.bar( + marker_df["CE_Allele"], + marker_df["Reads"], + edgecolor="k", + color=[colors[i] for i in marker_df["Type"]], + ) + if at: + at = get_at(marker_df, marker) + ax.axhline(at, linestyle="--", color="k") + ax.text(round(min(marker_df["CE_Allele"])) - 0.9, at + (at * 0.1), f"AT", size=12) + labels = marker_df["Type"].unique() + handles = [plt.Rectangle((0, 0), 1, 1, color=colors[l]) for l in labels] + if not filters: + plt.legend(handles, labels, title="Allele Type") + else: + for i, row in marker_df.iterrows(): + marker_df.loc[i, "Label"] = ( + str(int(marker_df.loc[i, "CE_Allele"])) + if ".0" in str(marker_df.loc[i, "CE_Allele"]) + else str(marker_df.loc[i, "CE_Allele"]) + ) + ax.bar_label(p, labels=marker_df["Label"]) + if sameyaxis: + ax.set_yticks(np.arange(0, max_yvalue, increase_value)) + ax.set_xticks( + np.arange( + round(min(marker_df["CE_Allele"]) - 1), round(max(marker_df["CE_Allele"])) + 2, 1.0 + ) + ) + ax.title.set_text(marker) + if sameyaxis: + title = "Marker Plots for All Alleles With Same Y-Axis Scale" + elif filters: + title = "Marker Plots for Typed Alleles With Custom Y-Axis Scale" + else: + title = "Marker Plots for All Alleles With Custom Y-Axis Scale" + plt.text(0.4, 0.95, title, transform=fig.transFigure, size=24) + + +def get_at(df, locus): + metadata = filter_marker_data[locus] + thresh_use = metadata["AnalyticalThresholdUse"] + at_st = float(metadata["AnalyticalThresholdStaticCount"]) + at_dy = metadata["AnalyticalThresholdDynamicPercent"] + at_dy_num = df["Reads"].sum() * float(at_dy) + if thresh_use.lower() == "both": + at = at_st if at_st > at_dy_num else at_dy_num + elif thresh_use.lower() == "static": + at = at_st + elif thresh_use.lower() == "dynamic": + at = at_dy_num + else: + raise ValueError("Incorrect AT specified in filters.json. Please check and re-run.") + return at + + def main( input, output_type, profile_type, data_type, output_dir, info, separate, nofilters, strand ): @@ -343,6 +438,7 @@ def main( ) if nofilters: full_df["allele_type"] = "real_allele" + marker_plots(full_df, outpath) if output_type == "efm" or output_type == "mpsproto": EFM_output(full_df, outpath, profile_type, data_type, brack_col, separate) else: @@ -350,6 +446,7 @@ def main( else: dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])} final_df, flags_df = process_strs(dict_loc, data_type, seq_col) + marker_plots(final_df, outpath) if output_type == "efm" or output_type == "mpsproto": EFM_output(final_df, outpath, profile_type, data_type, brack_col, separate) else: