-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Create Marker Plots #61
Changes from 10 commits
a23ec18
949b195
e4feae9
324c58b
fab6566
f34c69d
3ca3eed
b8a902b
18d52be
15f6f9f
0d99cd1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,9 @@ | |
|
||
import csv | ||
import json | ||
import math | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
import os | ||
import pandas as pd | ||
import re | ||
|
@@ -20,6 +23,8 @@ | |
from lusSTR.scripts.repeat import collapse_all_repeats, collapse_repeats_by_length | ||
from lusSTR.scripts.repeat import sequence_to_bracketed_form, split_by_n | ||
from lusSTR.scripts.repeat import reverse_complement, reverse_complement_bracketed | ||
from matplotlib.backends.backend_pdf import PdfPages | ||
from pathlib import Path | ||
|
||
|
||
with open(get_str_metadata_file(), "r") as fh: | ||
|
@@ -170,6 +175,48 @@ def sort_table(table): | |
return sorted_table | ||
|
||
|
||
def marker_plots(df, output_name, sex): | ||
Path("MarkerPlots").mkdir(parents=True, exist_ok=True) | ||
df["CE_Allele"] = df["CE_Allele"].astype(float) | ||
for id in df["SampleID"].unique(): | ||
sample_id = f"{id}_sexchr" if sex else id | ||
with PdfPages(f"MarkerPlots/{output_name}_{sample_id}_marker_plots.pdf") as pdf: | ||
make_plot(df, id, sex, False) | ||
pdf.savefig() | ||
make_plot(df, id, sex, True) | ||
pdf.savefig() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That way, when someone is messing with the code 18 months from now and they're trying to remember what that dang 4th argument to the make_plot(df, id, sex, sameyaxis=False) |
||
|
||
|
||
def make_plot(df, id, sex, sameyaxis): | ||
sample_df = df[df["SampleID"] == id] | ||
sample_id = f"{id}_sexchr" if sex else id | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion for a marginal improvement in readability: make the |
||
max_reads = max(sample_df["Reads"]) | ||
n = 100 if max_reads > 1000 else 10 | ||
max_yvalue = int(math.ceil(max_reads / n)) * n | ||
increase_value = int(math.ceil((max_yvalue / 5)) / n) * n | ||
Comment on lines
+194
to
+196
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is to make nice round values (10, 20, 100, 200, etc.) when determining the max y value of the dataset and what the incremental increase should be for the plots. The first line determines whether to round to 10s or 100s depending on the highest # of reads. |
||
fig = plt.figure(figsize=(31, 31)) if sex is True else plt.figure(figsize=(30, 30)) | ||
n = 0 | ||
for marker in sample_df["Locus"].unique(): | ||
n += 1 | ||
marker_df = sample_df[sample_df["Locus"] == marker].sort_values(by="CE_Allele") | ||
ax = fig.add_subplot(6, 6, n) if sex is True else fig.add_subplot(6, 5, n) | ||
ax.bar(marker_df["CE_Allele"], marker_df["Reads"]) | ||
if sameyaxis: | ||
ax.set_yticks(np.arange(0, max_yvalue, increase_value)) | ||
ax.set_xticks( | ||
np.arange(min(marker_df["CE_Allele"]) - 1, max(marker_df["CE_Allele"]) + 2, 1.0) | ||
) | ||
ax.title.set_text(marker) | ||
if sameyaxis: | ||
plt.text( | ||
0.4, 0.95, "Marker Plots With Same Y-Axis Scale", transform=fig.transFigure, size=24 | ||
) | ||
else: | ||
plt.text( | ||
0.4, 0.95, "Marker Plots With Custom Y-Axis Scale", transform=fig.transFigure, size=24 | ||
) | ||
|
||
|
||
def main(input, out, kit, uas, sex, nocombine): | ||
input = str(input) | ||
out = str(out) | ||
|
@@ -191,6 +238,7 @@ def main(input, out, kit, uas, sex, nocombine): | |
sex_final_table.to_csv(f"{output_name}_sexloci.txt", sep="\t", index=False) | ||
else: | ||
sex_final_table.to_csv(f"{output_name}_sexloci.txt", sep="\t", index=False) | ||
marker_plots(sex_final_table, output_name, True) | ||
if not uas: | ||
if not autosomal_final_table.empty: | ||
autosomal_flank_table.to_csv(f"{output_name}_flanks.txt", sep="\t", index=False) | ||
|
@@ -202,6 +250,7 @@ def main(input, out, kit, uas, sex, nocombine): | |
autosomal_final_table.to_csv(out, sep="\t", index=False) | ||
else: | ||
autosomal_final_table.to_csv(out, sep="\t", index=False) | ||
marker_plots(autosomal_final_table, output_name, False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar suggestion with the marker_plots(autosomal_final_table, output_name, sex_chr=False) |
||
|
||
|
||
if __name__ == "__main__": | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo: "for its authenticity".