From 506690e1a67800dbcc639b6a366e1257e18525e4 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 10 Jan 2024 13:11:33 -0500
Subject: [PATCH 01/10] changed how marker plots are created [skip ci]

---
 lusSTR/scripts/marker.py   | 17 +++++++---
 lusSTR/scripts/repeat.py   | 19 +++++------
 lusSTR/wrappers/convert.py | 47 ---------------------------
 lusSTR/wrappers/filter.py  | 66 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 88 insertions(+), 61 deletions(-)

diff --git a/lusSTR/scripts/marker.py b/lusSTR/scripts/marker.py
index b95d7d61..898af645 100644
--- a/lusSTR/scripts/marker.py
+++ b/lusSTR/scripts/marker.py
@@ -386,14 +386,21 @@ def convert(self):
         if len(self.uas_sequence) < 110:
             bracketed_form = collapse_repeats_by_length(self.uas_sequence, 4)
         else:
+            print(self.uas_sequence)
             if "GGGCTGCCTA" in self.uas_sequence:
                 break_point = self.uas_sequence.index("GGGCTGCCTA") + 10
-            else:
+                bracketed_form = (
+                    f"{collapse_repeats_by_length(self.uas_sequence[:break_point], 4)} "
+                    f"{collapse_repeats_by_length(self.uas_sequence[break_point:], 4)}"
+                )
+            elif "TTTT" in self.uas_sequence:
                 break_point = self.uas_sequence.index("TTTT") + 14
-            bracketed_form = (
-                f"{collapse_repeats_by_length(self.uas_sequence[:break_point], 4)} "
-                f"{collapse_repeats_by_length(self.uas_sequence[break_point:], 4)}"
-            )
+                bracketed_form = (
+                    f"{collapse_repeats_by_length(self.uas_sequence[:break_point], 4)} "
+                    f"{collapse_repeats_by_length(self.uas_sequence[break_point:], 4)}"
+                )
+            else:
+                bracketed_form = ""
         return bracketed_form
 
 
diff --git a/lusSTR/scripts/repeat.py b/lusSTR/scripts/repeat.py
index 7175e56b..76ff49c4 100644
--- a/lusSTR/scripts/repeat.py
+++ b/lusSTR/scripts/repeat.py
@@ -163,15 +163,16 @@ def repeat_copy_number(bf, repeat):
     The input is a sequence string collapsed to bracketed sequence form.
     """
     longest = 0
-    for block in bf.split(" "):
-        if block == repeat:
-            if 1 > longest:
-                longest = 1
-        match = re.match(r"\[" + repeat + r"\](\d+)", block)
-        if match:
-            length = int(match.group(1))
-            if length > longest:
-                longest = length
+    if bf != "":
+        for block in bf.split(" "):
+            if block == repeat:
+                if 1 > longest:
+                    longest = 1
+            match = re.match(r"\[" + repeat + r"\](\d+)", block)
+            if match:
+                length = int(match.group(1))
+                if length > longest:
+                    longest = length
     return str(longest)
 
 
diff --git a/lusSTR/wrappers/convert.py b/lusSTR/wrappers/convert.py
index 652a70fb..39ffb810 100644
--- a/lusSTR/wrappers/convert.py
+++ b/lusSTR/wrappers/convert.py
@@ -13,7 +13,6 @@
 import csv
 import json
 import math
-import matplotlib.pyplot as plt
 import numpy as np
 import os
 import pandas as pd
@@ -23,7 +22,6 @@
 from lusSTR.scripts.repeat import collapse_all_repeats, collapse_repeats_by_length
 from lusSTR.scripts.repeat import sequence_to_bracketed_form, split_by_n
 from lusSTR.scripts.repeat import reverse_complement, reverse_complement_bracketed
-from matplotlib.backends.backend_pdf import PdfPages
 from pathlib import Path
 
 
@@ -101,7 +99,6 @@ def format_table(input, software, kit="forenseq"):
             ]
             flanks_list.append(flank_summary)
             continue
-
         marker = STRMarkerObject(locus, sequence, software, kit=kit)
         if locus == "D12S391" and kit == "powerseq":
             if "." in str(marker.canonical):
@@ -185,48 +182,6 @@ def sort_table(table):
     return sorted_table
 
 
-def marker_plots(df, output_name, sex=False):
-    Path("MarkerPlots").mkdir(parents=True, exist_ok=True)
-    df["CE_Allele"] = df["CE_Allele"].astype(float)
-    for id in df["SampleID"].unique():
-        sample_id = f"{id}_sexchr" if sex else id
-        with PdfPages(f"MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
-            make_plot(df, id, sex, sameyaxis=False)
-            pdf.savefig()
-            make_plot(df, id, sex)
-            pdf.savefig()
-
-
-def make_plot(df, id, sex=False, sameyaxis=True):
-    sample_df = df[df["SampleID"] == id]
-    sample_id = f"{id}_sexchr" if sex else id
-    max_reads = max(sample_df["Reads"])
-    n = 100 if max_reads > 1000 else 10
-    max_yvalue = int(math.ceil(max_reads / n)) * n
-    increase_value = int(math.ceil((max_yvalue / 5)) / n) * n
-    fig = plt.figure(figsize=(31, 31)) if sex is True else plt.figure(figsize=(30, 30))
-    n = 0
-    for marker in sample_df["Locus"].unique():
-        n += 1
-        marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele")
-        ax = fig.add_subplot(6, 6, n) if sex is True else fig.add_subplot(6, 5, n)
-        ax.bar(marker_df["CE_Allele"], marker_df["Reads"])
-        if sameyaxis:
-            ax.set_yticks(np.arange(0, max_yvalue, increase_value))
-        ax.set_xticks(
-            np.arange(min(marker_df["CE_Allele"]) - 1, max(marker_df["CE_Allele"]) + 2, 1.0)
-        )
-        ax.title.set_text(marker)
-    if sameyaxis:
-        plt.text(
-            0.4, 0.95, "Marker Plots With Same Y-Axis Scale", transform=fig.transFigure, size=24
-        )
-    else:
-        plt.text(
-            0.4, 0.95, "Marker Plots With Custom Y-Axis Scale", transform=fig.transFigure, size=24
-        )
-
-
 def main(input, out, kit, software, sex, nocombine):
     input = str(input)
     out = str(out)
@@ -248,7 +203,6 @@ def main(input, out, kit, software, sex, nocombine):
                 sex_final_table.to_csv(f"{output_name}_sexloci.txt", sep="\t", index=False)
         else:
             sex_final_table.to_csv(f"{output_name}_sexloci.txt", sep="\t", index=False)
-        marker_plots(sex_final_table, output_name, sex=True)
     if software != "uas":
         if not autosomal_final_table.empty:
             autosomal_flank_table.to_csv(f"{output_name}_flanks.txt", sep="\t", index=False)
@@ -260,7 +214,6 @@ def main(input, out, kit, software, sex, nocombine):
             autosomal_final_table.to_csv(out, sep="\t", index=False)
     else:
         autosomal_final_table.to_csv(out, sep="\t", index=False)
-    marker_plots(autosomal_final_table, output_name)
 
 
 if __name__ == "__main__":
diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 42817c4e..4d767abb 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -16,6 +16,9 @@
 import json
 import lusSTR
 from lusSTR.scripts.filter_settings import filters, flags
+import math
+from matplotlib.backends.backend_pdf import PdfPages
+import matplotlib.pyplot as plt
 import numpy as np
 import os
 import pandas as pd
@@ -322,6 +325,68 @@ def format_ref_table(new_rows, sample_data, datatype):
     return sort_df
 
 
+def marker_plots(df, output_name, sex=False):
+    Path("MarkerPlots").mkdir(parents=True, exist_ok=True)
+    df["CE_Allele"] = df["CE_Allele"].astype(float)
+    filt_df = df[df["allele_type"] == "real_allele"]
+    for id in df["SampleID"].unique():
+        sample_id = f"{id}_sexchr" if sex else id
+        with PdfPages(f"MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
+            make_plot(filt_df, id, sex, filters=True)
+            pdf.savefig()
+            make_plot(df, id, sex)
+            pdf.savefig()
+            make_plot(df, id, sex, sameyaxis=True)
+            pdf.savefig()
+
+
+def make_plot(df, id, sex=False, sameyaxis=False, filters=False):
+    sample_df = df[df["SampleID"] == id]
+    sample_id = f"{id}_sexchr" if sex else id
+    sample_df["Type"] = sample_df["allele_type"]
+    conditions = [
+        sample_df["allele_type"].str.contains("real"),
+        sample_df["allele_type"].str.contains("BelowAT"),
+        sample_df["allele_type"].str.contains("stutter"),
+    ]
+    values = ["Real", "BelowAT", "Stutter"]
+    sample_df["Type"] = np.select(conditions, values)
+    max_reads = max(sample_df["Reads"])
+    n = 100 if max_reads > 1000 else 10
+    max_yvalue = int(math.ceil(max_reads / n)) * n
+    increase_value = int(math.ceil((max_yvalue / 5)) / n) * n
+    fig = plt.figure(figsize=(31, 31)) if sex is True else plt.figure(figsize=(30, 30))
+    n = 0
+    for marker in sample_df["Locus"].unique():
+        n += 1
+        colors = {"Real": "g", "Stutter": "b", "BelowAT": "r"}
+        marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele")
+        ax = fig.add_subplot(6, 6, n) if sex is True else fig.add_subplot(6, 5, n)
+        ax.bar(
+            marker_df["CE_Allele"],
+            marker_df["Reads"],
+            edgecolor="k",
+            color=[colors[i] for i in marker_df["Type"]],
+        )
+        labels = marker_df["Type"].unique()
+        handles = [plt.Rectangle((0, 0), 1, 1, color=colors[l]) for l in labels]
+        if not filters:
+            plt.legend(handles, labels, title="Allele Type")
+        if sameyaxis:
+            ax.set_yticks(np.arange(0, max_yvalue, increase_value))
+        ax.set_xticks(
+            np.arange(min(marker_df["CE_Allele"]) - 1, max(marker_df["CE_Allele"]) + 2, 1.0)
+        )
+        ax.title.set_text(marker)
+    if sameyaxis:
+        title = "Marker Plots for All Alleles With Same Y-Axis Scale"
+    elif filters:
+        title = "Marker Plots for True Alleles With Custom Y-Axis Scale"
+    else:
+        title = "Marker Plots for All Alleles With Custom Y-Axis Scale"
+    plt.text(0.4, 0.95, title, transform=fig.transFigure, size=24)
+
+
 def main(
     input, output_type, profile_type, data_type, output_dir, info, separate, nofilters, strand
 ):
@@ -350,6 +415,7 @@ def main(
     else:
         dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])}
         final_df, flags_df = process_strs(dict_loc, data_type, seq_col)
+        marker_plots(final_df, outpath, sex=False)
         if output_type == "efm" or output_type == "mpsproto":
             EFM_output(final_df, outpath, profile_type, data_type, brack_col, separate)
         else:

From 2a6fd83824daf8b9c28425a44c0475713d77381c Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Wed, 10 Jan 2024 13:15:25 -0500
Subject: [PATCH 02/10] remove print statement [skip ci]

---
 lusSTR/scripts/marker.py   | 1 -
 lusSTR/wrappers/convert.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/lusSTR/scripts/marker.py b/lusSTR/scripts/marker.py
index 898af645..1817faf5 100644
--- a/lusSTR/scripts/marker.py
+++ b/lusSTR/scripts/marker.py
@@ -386,7 +386,6 @@ def convert(self):
         if len(self.uas_sequence) < 110:
             bracketed_form = collapse_repeats_by_length(self.uas_sequence, 4)
         else:
-            print(self.uas_sequence)
             if "GGGCTGCCTA" in self.uas_sequence:
                 break_point = self.uas_sequence.index("GGGCTGCCTA") + 10
                 bracketed_form = (
diff --git a/lusSTR/wrappers/convert.py b/lusSTR/wrappers/convert.py
index 39ffb810..fa8db047 100644
--- a/lusSTR/wrappers/convert.py
+++ b/lusSTR/wrappers/convert.py
@@ -12,7 +12,6 @@
 
 import csv
 import json
-import math
 import numpy as np
 import os
 import pandas as pd

From 504c142216421ab504d180288f7f7d423d8fd512 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Thu, 11 Jan 2024 06:40:54 -0500
Subject: [PATCH 03/10] added AT line [skip ci]

---
 lusSTR/wrappers/filter.py | 51 ++++++++++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 4d767abb..6002cdd5 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -325,49 +325,54 @@ def format_ref_table(new_rows, sample_data, datatype):
     return sort_df
 
 
-def marker_plots(df, output_name, sex=False):
+def marker_plots(df, output_name):
     Path("MarkerPlots").mkdir(parents=True, exist_ok=True)
     df["CE_Allele"] = df["CE_Allele"].astype(float)
     filt_df = df[df["allele_type"] == "real_allele"]
-    for id in df["SampleID"].unique():
-        sample_id = f"{id}_sexchr" if sex else id
+    for sample_id in df["SampleID"].unique():
+        # sample_id = f"{id}_sexchr" if sex else id
         with PdfPages(f"MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf:
-            make_plot(filt_df, id, sex, filters=True)
+            make_plot(filt_df, sample_id, filters=True, at=False)
             pdf.savefig()
-            make_plot(df, id, sex)
+            make_plot(df, sample_id)
             pdf.savefig()
-            make_plot(df, id, sex, sameyaxis=True)
+            make_plot(df, sample_id, sameyaxis=True)
             pdf.savefig()
 
 
-def make_plot(df, id, sex=False, sameyaxis=False, filters=False):
-    sample_df = df[df["SampleID"] == id]
-    sample_id = f"{id}_sexchr" if sex else id
+def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True):
+    sample_df = df[df["SampleID"] == sample_id]
+    # sample_id = f"{id}_sexchr" if sex else id
     sample_df["Type"] = sample_df["allele_type"]
     conditions = [
         sample_df["allele_type"].str.contains("real"),
         sample_df["allele_type"].str.contains("BelowAT"),
         sample_df["allele_type"].str.contains("stutter"),
     ]
-    values = ["Real", "BelowAT", "Stutter"]
+    values = ["Typed", "BelowAT", "Stutter"]
     sample_df["Type"] = np.select(conditions, values)
     max_reads = max(sample_df["Reads"])
     n = 100 if max_reads > 1000 else 10
     max_yvalue = int(math.ceil(max_reads / n)) * n
     increase_value = int(math.ceil((max_yvalue / 5)) / n) * n
-    fig = plt.figure(figsize=(31, 31)) if sex is True else plt.figure(figsize=(30, 30))
+    fig = plt.figure(figsize=(30, 30))
     n = 0
     for marker in sample_df["Locus"].unique():
         n += 1
-        colors = {"Real": "g", "Stutter": "b", "BelowAT": "r"}
+        colors = {"Typed": "g", "Stutter": "b", "BelowAT": "r"}
         marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele")
-        ax = fig.add_subplot(6, 6, n) if sex is True else fig.add_subplot(6, 5, n)
+        # print(marker_df)
+        ax = fig.add_subplot(6, 5, n)
         ax.bar(
             marker_df["CE_Allele"],
             marker_df["Reads"],
             edgecolor="k",
             color=[colors[i] for i in marker_df["Type"]],
         )
+        if at:
+            at = get_at(marker_df, marker)
+            ax.axhline(at, linestyle="--", color="k")
+            ax.text(min(marker_df["CE_Allele"]) - 0.9, at + (at * 0.1), f"AT", size=12)
         labels = marker_df["Type"].unique()
         handles = [plt.Rectangle((0, 0), 1, 1, color=colors[l]) for l in labels]
         if not filters:
@@ -387,6 +392,23 @@ def make_plot(df, id, sex=False, sameyaxis=False, filters=False):
     plt.text(0.4, 0.95, title, transform=fig.transFigure, size=24)
 
 
+def get_at(df, locus):
+    metadata = filter_marker_data[locus]
+    thresh_use = metadata["AnalyticalThresholdUse"]
+    at_st = float(metadata["AnalyticalThresholdStaticCount"])
+    at_dy = metadata["AnalyticalThresholdDynamicPercent"]
+    at_dy_num = df["Reads"].sum() * float(at_dy)
+    if thresh_use.lower() == "both":
+        at = at_st if at_st > at_dy_num else at_dy_num
+    elif thresh_use.lower() == "static":
+        at = at_st
+    elif thresh_use.lower() == "dynamic":
+        at = at_dy_num
+    else:
+        raise ValueError("Incorrect AT specified in filters.json. Please check and re-run.")
+    return at
+
+
 def main(
     input, output_type, profile_type, data_type, output_dir, info, separate, nofilters, strand
 ):
@@ -408,6 +430,7 @@ def main(
     )
     if nofilters:
         full_df["allele_type"] = "real_allele"
+        marker_plots(full_df, outpath)
         if output_type == "efm" or output_type == "mpsproto":
             EFM_output(full_df, outpath, profile_type, data_type, brack_col, separate)
         else:
@@ -415,7 +438,7 @@ def main(
     else:
         dict_loc = {k: v for k, v in full_df.groupby(["SampleID", "Locus"])}
         final_df, flags_df = process_strs(dict_loc, data_type, seq_col)
-        marker_plots(final_df, outpath, sex=False)
+        marker_plots(final_df, outpath)
         if output_type == "efm" or output_type == "mpsproto":
             EFM_output(final_df, outpath, profile_type, data_type, brack_col, separate)
         else:

From 677f2c5329fbea7104aa8a90d46e2a3bbb7ceea7 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Thu, 11 Jan 2024 06:41:15 -0500
Subject: [PATCH 04/10] updated test_suite [skip ci]

---
 lusSTR/tests/test_suite.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/lusSTR/tests/test_suite.py b/lusSTR/tests/test_suite.py
index cf1aaee0..25632b71 100644
--- a/lusSTR/tests/test_suite.py
+++ b/lusSTR/tests/test_suite.py
@@ -323,24 +323,15 @@ def test_snakemake(command, output, format_out, convert_out, all_out, tmp_path):
     assert filecmp.cmp(exp_output, obs_output) is True
 
 
-@pytest.mark.parametrize(
-    "sex",
-    [True, False],
-)
-def test_marker_plots(sex, tmp_path):
+def test_marker_plots(tmp_path):
     inputfile = data_file("UAS_bulk_input/Positive Control Sample Details Report 2315.xlsx")
     exp_output = str(tmp_path / "MarkerPlots/lusstr_output_Positive_Control_marker_plots.pdf")
     sex_exp = str(tmp_path / "MarkerPlots/lusstr_output_Positive_Control_sexchr_marker_plots.pdf")
-    if sex:
-        arglist = ["config", "-w", str(tmp_path), "--input", str(inputfile), "--sex"]
-    else:
-        arglist = ["config", "-w", str(tmp_path), "--input", str(inputfile)]
+    arglist = ["config", "-w", str(tmp_path), "--input", str(inputfile)]
     lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(arglist))
-    snakemake_arglist = ["strs", "convert", "-w", str(tmp_path)]
+    snakemake_arglist = ["strs", "all", "-w", str(tmp_path)]
     lusSTR.cli.main(lusSTR.cli.get_parser().parse_args(snakemake_arglist))
     assert os.path.exists(exp_output) is True
-    if sex:
-        assert os.path.exists(sex_exp) is True
 
 
 def test_genemarker(tmp_path):

From fdc00866274bc318d4700debbcebcc3766f2f47a Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Thu, 11 Jan 2024 09:52:49 -0500
Subject: [PATCH 05/10] updated tests

---
 lusSTR/scripts/filter_settings.py             |  4 +-
 .../LUSPlus_sequence_info.csv                 | 48 ++++++++---------
 .../NGS_stutter_test/Sample1_nofilter.csv     | 54 +++++++++----------
 lusSTR/wrappers/filter.py                     |  3 +-
 4 files changed, 54 insertions(+), 55 deletions(-)

diff --git a/lusSTR/scripts/filter_settings.py b/lusSTR/scripts/filter_settings.py
index 1d6b5b5d..f3f60461 100644
--- a/lusSTR/scripts/filter_settings.py
+++ b/lusSTR/scripts/filter_settings.py
@@ -531,6 +531,6 @@ def same_size_filter(df, metadata, datatype):
         else:
             final_df = pd.concat([final_df, df_filt])
     final_df = final_df.reset_index(drop=True)
-    if datatype == "lusplus":
-        final_df = final_df.drop("CE_Allele", axis=1)
+    # if datatype == "lusplus":
+    #    final_df = final_df.drop("CE_Allele", axis=1)
     return final_df
diff --git a/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv b/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv
index a093c1fd..9bcfd9a0 100644
--- a/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv
+++ b/lusSTR/tests/data/LUSPlus_stutter_test/LUSPlus_sequence_info.csv
@@ -1,24 +1,24 @@
-SampleID,Locus,LUS_Plus,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter,CE_Allele
-Sample1,D4S2408,10_10_0,1022,real_allele,,,,,,,
-Sample1,D4S2408,9_9_0,116,-1_stutter/+1_stutter,10_10_0,8_8_0,1022.0,1050.0,,,
-Sample1,D4S2408,8_8_0,1050,real_allele,,,,,,,
-Sample1,D8S1179,14_12_1_0,869,real_allele,,,,,,,
-Sample1,D8S1179,13_11_1_0,184,-1_stutter,14_12_1_0,,869.0,,,0.212,
-Sample1,D8S1179,12_10_1_0,37,-2_stutter,14_12_1_0,,869.0,,,0.201,
-Sample1,D9S1122,13_11,948,real_allele,,,,,,,
-Sample1,D9S1122,12_10,108,-1_stutter,13_11,,948.0,,,0.114,
-Sample1,D9S1122,11_11,991,real_allele,,,,,,,
-Sample1,D9S1122,10_10,87,-1_stutter,11_11,,991.0,,,0.088,
-Sample1,FGA,23_15_3_0,1436,real_allele,,,,,,,
-Sample1,FGA,22_14_3_0,262,-1_stutter,23_15_3_0,,1436.0,,,0.182,
-Sample1,FGA,21_13_3_0,48,BelowAT,,,,,0.013,,
-Sample1,FGA,20_12_3_0,1750,real_allele,,,,,,,
-Sample1,FGA,18_10_3_0,181,real_allele,,,,,,,
-Sample1,FGA,17_9_3_0,15,BelowAT,,,,,0.004,,
-Sample1,PENTA D,15_15,50,real_allele,,,,,,,
-Sample1,PENTA D,13_13,1000,real_allele,,,,,,,
-Sample1,PENTA E,7_7,505,real_allele,,,,,,,7.0
-Sample1,TH01,7_7,2197,real_allele,,,,,,,
-Sample1,TH01,6_6,1632,real_allele,,,,,,,
-Sample1,TH01,5_5,66,BelowAT,,,,,0.017,,
-Sample1,TPOX,11_11,15,BelowAT,,,,,1.0,,11.0
+SampleID,Locus,CE_Allele,LUS_Plus,Reads,allele_type,parent_allele1,parent_allele2,allele1_ref_reads,allele2_ref_reads,perc_noise,perc_stutter
+Sample1,D4S2408,10.0,10_10_0,1022,real_allele,,,,,,
+Sample1,D4S2408,9.0,9_9_0,116,-1_stutter/+1_stutter,10_10_0,8_8_0,1022.0,1050.0,,
+Sample1,D4S2408,8.0,8_8_0,1050,real_allele,,,,,,
+Sample1,D8S1179,14.0,14_12_1_0,869,real_allele,,,,,,
+Sample1,D8S1179,13.0,13_11_1_0,184,-1_stutter,14_12_1_0,,869.0,,,0.212
+Sample1,D8S1179,12.0,12_10_1_0,37,-2_stutter,14_12_1_0,,869.0,,,0.201
+Sample1,D9S1122,13.0,13_11,948,real_allele,,,,,,
+Sample1,D9S1122,12.0,12_10,108,-1_stutter,13_11,,948.0,,,0.114
+Sample1,D9S1122,11.0,11_11,991,real_allele,,,,,,
+Sample1,D9S1122,10.0,10_10,87,-1_stutter,11_11,,991.0,,,0.088
+Sample1,FGA,23.0,23_15_3_0,1436,real_allele,,,,,,
+Sample1,FGA,22.0,22_14_3_0,262,-1_stutter,23_15_3_0,,1436.0,,,0.182
+Sample1,FGA,21.0,21_13_3_0,48,BelowAT,,,,,0.013,
+Sample1,FGA,20.0,20_12_3_0,1750,real_allele,,,,,,
+Sample1,FGA,18.0,18_10_3_0,181,real_allele,,,,,,
+Sample1,FGA,17.0,17_9_3_0,15,BelowAT,,,,,0.004,
+Sample1,PENTA D,15.0,15_15,50,real_allele,,,,,,
+Sample1,PENTA D,13.0,13_13,1000,real_allele,,,,,,
+Sample1,PENTA E,7.0,7_7,505,real_allele,,,,,,
+Sample1,TH01,7.0,7_7,2197,real_allele,,,,,,
+Sample1,TH01,6.0,6_6,1632,real_allele,,,,,,
+Sample1,TH01,5.0,5_5,66,BelowAT,,,,,0.017,
+Sample1,TPOX,11.0,11_11,15,BelowAT,,,,,1.0,
diff --git a/lusSTR/tests/data/NGS_stutter_test/Sample1_nofilter.csv b/lusSTR/tests/data/NGS_stutter_test/Sample1_nofilter.csv
index b83fce36..7531c6f3 100644
--- a/lusSTR/tests/data/NGS_stutter_test/Sample1_nofilter.csv
+++ b/lusSTR/tests/data/NGS_stutter_test/Sample1_nofilter.csv
@@ -1,28 +1,28 @@
 Locus,CE Allele,Allele Seq,Reads
-D4S2408,8,ATCTATCTATCTATCTATCTATCTATCTATCT,1000
-D4S2408,9,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,1357
-D4S2408,10,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,900
-D8S1179,12,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA,26
-D8S1179,12,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,11
-D8S1179,13,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,95
-D8S1179,13,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,89
-D8S1179,14,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,739
-D8S1179,14,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,130
-D9S1122,10,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,87
-D9S1122,11,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,991
-D9S1122,12,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,108
-D9S1122,13,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,948
-FGA,17,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,15
-FGA,18,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,181
-FGA,20,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1750
-FGA,21,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,48
-FGA,22,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,262
-FGA,23,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1436
-PentaD,13,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,1000
-PentaD,15,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,50
-PentaE,7,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,505
-TH01,5,AATGAATGAATGAATGAATG,66
-TH01,6,AATGAATGAATGAATGAATGAATG,1632
-TH01,7,AATGAATGAATGAATGAATGAATGAATG,2197
-TPOX,11,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,15
-vWA,16,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,6
+D4S2408,8.0,ATCTATCTATCTATCTATCTATCTATCTATCT,1000
+D4S2408,9.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCT,1357
+D4S2408,10.0,ATCTATCTATCTATCTATCTATCTATCTATCTATCTATCT,900
+D8S1179,12.0,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTA,26
+D8S1179,12.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,11
+D8S1179,13.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,95
+D8S1179,13.0,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,89
+D8S1179,14.0,TCTATCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,739
+D8S1179,14.0,TCTATCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTA,130
+D9S1122,10.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,87
+D9S1122,11.0,TAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,991
+D9S1122,12.0,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,108
+D9S1122,13.0,TAGATCGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGA,948
+FGA,17.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,15
+FGA,18.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,181
+FGA,20.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1750
+FGA,21.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,48
+FGA,22.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,262
+FGA,23.0,TTTCTTTCTTTCTTTTTTCTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTCCTTCCTTCC,1436
+PentaD,13.0,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,1000
+PentaD,15.0,AAAAGAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,50
+PentaE,7.0,AAAGAAAAGAAAAGAAAAGAAAAGAAAAGAAAAGA,505
+TH01,5.0,AATGAATGAATGAATGAATG,66
+TH01,6.0,AATGAATGAATGAATGAATGAATG,1632
+TH01,7.0,AATGAATGAATGAATGAATGAATGAATG,2197
+TPOX,11.0,AATGAATGAATGAATGAATGAATGAATGAATGAATGAATGAATG,15
+vWA,16.0,TCTATCTGTCTGTCTGTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCCATCTA,6
diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 6002cdd5..1d47d73e 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -361,7 +361,6 @@ def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True):
         n += 1
         colors = {"Typed": "g", "Stutter": "b", "BelowAT": "r"}
         marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele")
-        # print(marker_df)
         ax = fig.add_subplot(6, 5, n)
         ax.bar(
             marker_df["CE_Allele"],
@@ -386,7 +385,7 @@ def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True):
     if sameyaxis:
         title = "Marker Plots for All Alleles With Same Y-Axis Scale"
     elif filters:
-        title = "Marker Plots for True Alleles With Custom Y-Axis Scale"
+        title = "Marker Plots for Typed Alleles With Custom Y-Axis Scale"
     else:
         title = "Marker Plots for All Alleles With Custom Y-Axis Scale"
     plt.text(0.4, 0.95, title, transform=fig.transFigure, size=24)

From f0873dce325b3962a65bc52e873afe66e50bfd0e Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Thu, 11 Jan 2024 09:54:28 -0500
Subject: [PATCH 06/10] removed hashed code

---
 lusSTR/scripts/filter_settings.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lusSTR/scripts/filter_settings.py b/lusSTR/scripts/filter_settings.py
index f3f60461..859ce736 100644
--- a/lusSTR/scripts/filter_settings.py
+++ b/lusSTR/scripts/filter_settings.py
@@ -531,6 +531,4 @@ def same_size_filter(df, metadata, datatype):
         else:
             final_df = pd.concat([final_df, df_filt])
     final_df = final_df.reset_index(drop=True)
-    # if datatype == "lusplus":
-    #    final_df = final_df.drop("CE_Allele", axis=1)
     return final_df

From 3a24edf6b8547f2fa6a980ca8e3b5cb21c9a534a Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Thu, 11 Jan 2024 11:48:32 -0500
Subject: [PATCH 07/10] updated convert

---
 lusSTR/wrappers/convert.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lusSTR/wrappers/convert.py b/lusSTR/wrappers/convert.py
index fa8db047..beb950db 100644
--- a/lusSTR/wrappers/convert.py
+++ b/lusSTR/wrappers/convert.py
@@ -99,7 +99,7 @@ def format_table(input, software, kit="forenseq"):
             flanks_list.append(flank_summary)
             continue
         marker = STRMarkerObject(locus, sequence, software, kit=kit)
-        if locus == "D12S391" and kit == "powerseq":
+        if locus == "D12S391" and kit == "powerseq" and software == "straitrazor":
             if "." in str(marker.canonical):
                 check_sr += 1
                 if check_sr > 10:

From 922a539c3ccdfb1052395d532a6ba98516735e85 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Thu, 11 Jan 2024 13:06:18 -0500
Subject: [PATCH 08/10] fixed copy warning

---
 lusSTR/wrappers/filter.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 1d47d73e..57d394d9 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -341,16 +341,15 @@ def marker_plots(df, output_name):
 
 
 def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True):
-    sample_df = df[df["SampleID"] == sample_id]
+    sample_df = df[df["SampleID"] == sample_id].copy()
     # sample_id = f"{id}_sexchr" if sex else id
-    sample_df["Type"] = sample_df["allele_type"]
     conditions = [
         sample_df["allele_type"].str.contains("real"),
         sample_df["allele_type"].str.contains("BelowAT"),
         sample_df["allele_type"].str.contains("stutter"),
     ]
     values = ["Typed", "BelowAT", "Stutter"]
-    sample_df["Type"] = np.select(conditions, values)
+    sample_df.loc[:, "Type"] = np.select(conditions, values)
     max_reads = max(sample_df["Reads"])
     n = 100 if max_reads > 1000 else 10
     max_yvalue = int(math.ceil(max_reads / n)) * n

From e534317314332d22381bcb055e3ac1ff2e5781ed Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Thu, 11 Jan 2024 15:08:16 -0500
Subject: [PATCH 09/10] added allele name to typed alleles plot

---
 lusSTR/wrappers/filter.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/lusSTR/wrappers/filter.py b/lusSTR/wrappers/filter.py
index 57d394d9..cbcf8052 100644
--- a/lusSTR/wrappers/filter.py
+++ b/lusSTR/wrappers/filter.py
@@ -361,7 +361,7 @@ def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True):
         colors = {"Typed": "g", "Stutter": "b", "BelowAT": "r"}
         marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele")
         ax = fig.add_subplot(6, 5, n)
-        ax.bar(
+        p = ax.bar(
             marker_df["CE_Allele"],
             marker_df["Reads"],
             edgecolor="k",
@@ -370,15 +370,25 @@ def make_plot(df, sample_id, sameyaxis=False, filters=False, at=True):
         if at:
             at = get_at(marker_df, marker)
             ax.axhline(at, linestyle="--", color="k")
-            ax.text(min(marker_df["CE_Allele"]) - 0.9, at + (at * 0.1), f"AT", size=12)
+            ax.text(round(min(marker_df["CE_Allele"])) - 0.9, at + (at * 0.1), f"AT", size=12)
         labels = marker_df["Type"].unique()
         handles = [plt.Rectangle((0, 0), 1, 1, color=colors[l]) for l in labels]
         if not filters:
             plt.legend(handles, labels, title="Allele Type")
+        else:
+            for i, row in marker_df.iterrows():
+                marker_df.loc[i, "Label"] = (
+                    str(int(marker_df.loc[i, "CE_Allele"]))
+                    if ".0" in str(marker_df.loc[i, "CE_Allele"])
+                    else str(marker_df.loc[i, "CE_Allele"])
+                )
+            ax.bar_label(p, labels=marker_df["Label"])
         if sameyaxis:
             ax.set_yticks(np.arange(0, max_yvalue, increase_value))
         ax.set_xticks(
-            np.arange(min(marker_df["CE_Allele"]) - 1, max(marker_df["CE_Allele"]) + 2, 1.0)
+            np.arange(
+                round(min(marker_df["CE_Allele"]) - 1), round(max(marker_df["CE_Allele"])) + 2, 1.0
+            )
         )
         ax.title.set_text(marker)
     if sameyaxis:

From 3d478ac8bd2e8bdbdbb8e216f01d5ea4e3ff9e60 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Fri, 12 Jan 2024 06:17:46 -0500
Subject: [PATCH 10/10] updated empty bf code

---
 lusSTR/scripts/repeat.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/lusSTR/scripts/repeat.py b/lusSTR/scripts/repeat.py
index 76ff49c4..bd34789b 100644
--- a/lusSTR/scripts/repeat.py
+++ b/lusSTR/scripts/repeat.py
@@ -163,16 +163,17 @@ def repeat_copy_number(bf, repeat):
     The input is a sequence string collapsed to bracketed sequence form.
     """
     longest = 0
-    if bf != "":
-        for block in bf.split(" "):
-            if block == repeat:
-                if 1 > longest:
-                    longest = 1
-            match = re.match(r"\[" + repeat + r"\](\d+)", block)
-            if match:
-                length = int(match.group(1))
-                if length > longest:
-                    longest = length
+    if bf == "":
+        return 0
+    for block in bf.split(" "):
+        if block == repeat:
+            if 1 > longest:
+                longest = 1
+        match = re.match(r"\[" + repeat + r"\](\d+)", block)
+        if match:
+            length = int(match.group(1))
+            if length > longest:
+                longest = length
     return str(longest)