Skip to content

Commit

Permalink
paper cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
davmlaw committed Mar 2, 2023
1 parent e8cfa19 commit c8aee43
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 1 deletion.
File renamed without changes.
2 changes: 1 addition & 1 deletion paper/combine_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
df = pd.read_csv(filename)
df = df[columns]
df_list.append(df)
df_combined.sort_values("date").to_csv("combo.csv", index=False)
df_combined.sort_values("date").to_csv("hgvs_searches_combined.csv", index=False)


29 changes: 29 additions & 0 deletions paper/investigate_fails.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/env python3

import math
import re
import sys
import pandas as pd
from pysam.libcfaidx import FastaFile
import pyhgvs
from cdot.pyhgvs.pyhgvs_transcript import JSONPyHGVSTranscriptFactory


def main():
if len(sys.argv) != 1:
sys.stderr.write(f"Usage {sys.argv[0]} hgvs_searches_combined.csv\n")
sys.exit(1)

filename = sys.argv[1]
df = pd.read_csv(filename)

non_resolve_mask = df["can_resolve"] is False
hgvs_errors_df = df[non_resolve_mask]

genome = FastaFile("/data/annotation/fasta/GCF_000001405.25_GRCh37.p13_genomic.fna.gz")
factory = JSONPyHGVSTranscriptFactory(["/home/dlawrence/Downloads/cdot-0.2.12.refseq.grch37_grch38.json.gz",
"/home/dlawrence/Downloads/cdot-0.2.12.ensembl.grch37_grch38.json.gz"])


if __name__ == "__main__":
main()

0 comments on commit c8aee43

Please sign in to comment.