Skip to content

Commit

Permalink
changing the way snp binned files are named
Browse files Browse the repository at this point in the history
  • Loading branch information
rnmitchell committed Apr 8, 2024
1 parent 2ef8d92 commit d8fedfb
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 8 deletions.
6 changes: 4 additions & 2 deletions lusSTR/data/snp_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ types: "all" ## choices are "all", "i" (identity SNPs only), "p" (phenotype only
nofilter: False ## True/False if no filtering is desired; if False, will remove any allele designated as Not Typed

## convert settings
strand: "forward" ## forward/uas; indicates which oritentation to report the alleles for the ForenSeq SNPs; uas indicates the orientation as reported by the UAS or the forward strand
strand: "forward" ## forward/uas; strand orientation to report
references: "" ## list IDs of the samples to be run as references in EFM
separate: false ## True/False; if want to separate samples into individual files for use in EFM
thresh: 0.03 ## Analytical threshold value
thresh: 0.03 ## Analytical threshold value


2 changes: 1 addition & 1 deletion lusSTR/tests/test_snps.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def test_snp_bins(tmp_path):
print(fh.read(), end="")
assert filecmp.cmp(exp_out, obs_out) is True
for snp_set in range(0, 10):
path = tmp_path / f"evidence_samples/Kin_pos_1ng_snp_evidence_set{snp_set}.csv"
path = tmp_path / f"evidence_samples/Kin_pos_1ng_set{snp_set}.csv"
assert path.is_file()


Expand Down
35 changes: 30 additions & 5 deletions lusSTR/wrappers/snps_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ def create_output_table(sample_df, orientation, separate, output_type, software)
allele_col = "Forward_Strand_Allele"
all_samples_df = pd.DataFrame()
for sample in sample_df["SampleID"].unique():
indiv_df = sample_df[
(sample_df["SampleID"] == sample) & (sample_df["Issues"] != "Contains untyped allele")
]
indiv_df = sample_df[sample_df["SampleID"] == sample]
compiled_table = create_sample_df(indiv_df, output_type, allele_col)
if software != "uas":
compiled_table = check_allele_calls(compiled_table, output_type)
Expand Down Expand Up @@ -80,7 +78,7 @@ def bin_snps(sample_file, output_type, sample):
bin_df["Sample Name"] = bin_df["Sample Name"] + "_set" + str(snp_num)
compiled_table = pd.concat([compiled_table, bin_df])
bin_df.to_csv(
f"{output_type}_samples/{sample}_snp_{output_type}_set{snp_num}.csv",
f"{output_type}_samples/{sample}_set{snp_num}.csv",
index=False,
sep="\t",
)
Expand All @@ -97,9 +95,36 @@ def create_sample_df(indiv_df, output_type, all_col):
try:
compiled_table.columns = ["Marker", "Allele 1", "Allele 2", "Height 1", "Height 2"]
except ValueError:
print("Too many alleles!")
try:
compiled_table.columns = [
"Marker",
"Allele 1",
"Allele 2",
"Allele 3",
"Height 1",
"Height 2",
"Height 3",
]
except ValueError:
compiled_table.columns = [
"Marker",
"Allele 1",
"Allele 2",
"Allele 3",
"Allele 4",
"Height 1",
"Height 2",
"Height 3",
"Height 4",
]
if len(compiled_table[compiled_table["Allele 4"].notna()]) > 0:
compiled_table = compiled_table.drop(compiled_table.columns[[4, 8]], axis=1)
if len(compiled_table[compiled_table["Allele 3"].notna()]) > 0:
print(compiled_table)
compiled_table = compiled_table.drop(compiled_table.columns[[3, 6]], axis=1)
if output_type == "reference":
for i, row in compiled_table.iterrows():
print(compiled_table.loc[i, "Height 2"])
if pd.isnull(compiled_table.loc[i, "Height 2"]):
compiled_table.loc[i, "Allele 2"] = compiled_table.loc[i, "Allele 1"]
compiled_table = compiled_table[["Marker", "Allele 1", "Allele 2"]]
Expand Down

0 comments on commit d8fedfb

Please sign in to comment.