From d8fedfb5342e6b017b068e744d653059ed9820e4 Mon Sep 17 00:00:00 2001
From: rnmitchell <rebecca.mitchell5987@gmail.com>
Date: Mon, 8 Apr 2024 15:09:17 -0400
Subject: [PATCH] changing the way snp binned files are named

---
 lusSTR/data/snp_config.yaml     |  6 ++++--
 lusSTR/tests/test_snps.py       |  2 +-
 lusSTR/wrappers/snps_convert.py | 35 ++++++++++++++++++++++++++++-----
 3 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/lusSTR/data/snp_config.yaml b/lusSTR/data/snp_config.yaml
index 38ed3c1..5f5879a 100644
--- a/lusSTR/data/snp_config.yaml
+++ b/lusSTR/data/snp_config.yaml
@@ -12,7 +12,9 @@ types: "all" ## choices are "all", "i" (identity SNPs only), "p" (phenotype only
 nofilter: False ## True/False if no filtering is desired; if False, will remove any allele designated as Not Typed
 
 ## convert settings
-strand: "forward" ## forward/uas; indicates which oritentation to report the alleles for the ForenSeq SNPs; uas indicates the orientation as reported by the UAS or the forward strand
+strand: "forward" ## forward/uas; strand orientation to report
 references: "" ## list IDs of the samples to be run as references in EFM
 separate: false ## True/False; if want to separate samples into individual files for use in EFM
-thresh: 0.03 ## Analytical threshold value
\ No newline at end of file
+thresh: 0.03 ## Analytical threshold value
+
+
diff --git a/lusSTR/tests/test_snps.py b/lusSTR/tests/test_snps.py
index 3d977d9..f8578f3 100644
--- a/lusSTR/tests/test_snps.py
+++ b/lusSTR/tests/test_snps.py
@@ -239,7 +239,7 @@ def test_snp_bins(tmp_path):
         print(fh.read(), end="")
     assert filecmp.cmp(exp_out, obs_out) is True
     for snp_set in range(0, 10):
-        path = tmp_path / f"evidence_samples/Kin_pos_1ng_snp_evidence_set{snp_set}.csv"
+        path = tmp_path / f"evidence_samples/Kin_pos_1ng_set{snp_set}.csv"
         assert path.is_file()
 
 
diff --git a/lusSTR/wrappers/snps_convert.py b/lusSTR/wrappers/snps_convert.py
index f55e52e..9776fc7 100644
--- a/lusSTR/wrappers/snps_convert.py
+++ b/lusSTR/wrappers/snps_convert.py
@@ -37,9 +37,7 @@ def create_output_table(sample_df, orientation, separate, output_type, software)
         allele_col = "Forward_Strand_Allele"
     all_samples_df = pd.DataFrame()
     for sample in sample_df["SampleID"].unique():
-        indiv_df = sample_df[
-            (sample_df["SampleID"] == sample) & (sample_df["Issues"] != "Contains untyped allele")
-        ]
+        indiv_df = sample_df[sample_df["SampleID"] == sample]
         compiled_table = create_sample_df(indiv_df, output_type, allele_col)
         if software != "uas":
             compiled_table = check_allele_calls(compiled_table, output_type)
@@ -80,7 +78,7 @@ def bin_snps(sample_file, output_type, sample):
         bin_df["Sample Name"] = bin_df["Sample Name"] + "_set" + str(snp_num)
         compiled_table = pd.concat([compiled_table, bin_df])
         bin_df.to_csv(
-            f"{output_type}_samples/{sample}_snp_{output_type}_set{snp_num}.csv",
+            f"{output_type}_samples/{sample}_set{snp_num}.csv",
             index=False,
             sep="\t",
         )
@@ -97,9 +95,36 @@ def create_sample_df(indiv_df, output_type, all_col):
     try:
         compiled_table.columns = ["Marker", "Allele 1", "Allele 2", "Height 1", "Height 2"]
     except ValueError:
-        print("Too many alleles!")
+        try:
+            compiled_table.columns = [
+                "Marker",
+                "Allele 1",
+                "Allele 2",
+                "Allele 3",
+                "Height 1",
+                "Height 2",
+                "Height 3",
+            ]
+        except ValueError:
+            compiled_table.columns = [
+                "Marker",
+                "Allele 1",
+                "Allele 2",
+                "Allele 3",
+                "Allele 4",
+                "Height 1",
+                "Height 2",
+                "Height 3",
+                "Height 4",
+            ]
+            if len(compiled_table[compiled_table["Allele 4"].notna()]) > 0:
+                compiled_table = compiled_table.drop(compiled_table.columns[[4, 8]], axis=1)
+        if len(compiled_table[compiled_table["Allele 3"].notna()]) > 0:
+            print(compiled_table)
+            compiled_table = compiled_table.drop(compiled_table.columns[[3, 6]], axis=1)
     if output_type == "reference":
         for i, row in compiled_table.iterrows():
+            print(compiled_table.loc[i, "Height 2"])
             if pd.isnull(compiled_table.loc[i, "Height 2"]):
                 compiled_table.loc[i, "Allele 2"] = compiled_table.loc[i, "Allele 1"]
         compiled_table = compiled_table[["Marker", "Allele 1", "Allele 2"]]