From 5f2f4b08ce5957240baa31ee1d617ed018412ce1 Mon Sep 17 00:00:00 2001 From: Mark Cowley Date: Fri, 8 Nov 2019 00:44:40 +1100 Subject: [PATCH] major documentation update --- INSTALL.md | 30 +++++++------- README.md | 93 +++++++++++++++++++++++--------------------- TODO.md | 5 +-- mitylib/_version.py | 2 +- mitylib/normalise.py | 1 - mitylib/report.py | 1 + 6 files changed, 69 insertions(+), 63 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 669f32d..b921a9f 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,17 +1,17 @@ # Docker The simplest way to run mity is via docker: - docker run drmjc/mity:0.0.1b15 -h + docker run drmjc/mity:0.0.1b40 -h # pip -If you have freebayes >=1.2 and gsort installed, then pip should work well +If you have freebayes >=1.2 and Brent Pederson's gsort installed, then pip should work well - VERSION=0.0.1b15 + VERSION=0.0.1b40 pip3 install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple mity==$VERSION # manual installation If you would prefer to install mity on a fresh Ubuntu installation, the following should work. -We have tested this on a fresh Ubuntu 14.04 image; We use `pyenv` to install python 3.7.4 and there +We have tested this on a fresh Ubuntu 14.04 image; We use `pyenv` to install python 3.7.4, though there are a number of alternatives. YMMV. # install dependencies @@ -39,7 +39,7 @@ are a number of alternatives. YMMV. # Python 3.7.4 pip install --upgrade pip export PATH=$PATH:.local/bin:$HOME/.pyenv/versions/3.7.4/bin - # merge DNAnexus' PYTHONPATH with this from PYTHON3 + # if running on a DNANexus cloud instance, then merge DNAnexus' PYTHONPATH with this from PYTHON3 export PYTHONPATH=/home/linuxbrew/.linuxbrew/lib/python3.7/site-packages:/usr/share/dnanexus/lib/python2.7/site-packages @@ -60,13 +60,15 @@ Then install the system dependencies: freebayes (>=1.2.0), htslib (tabix+bgzip), Either install mity globally: - export PYTHONPATH=/usr/share/dnanexus/lib/python2.7/site-packages - export PYTHONPATH=/usr/local/lib/python3.5/dist-packages:/usr/lib/python3/dist-packages:/usr/share/dnanexus/lib/python2.7/site-packages + # for most users + export PYTHONPATH=/usr/local/lib/python3.7/dist-packages:/usr/lib/python3/dist-packages + # for those using a DNANexus cloud instance + export PYTHONPATH=/usr/local/lib/python3.7/dist-packages:/usr/lib/python3/dist-packages:/usr/share/dnanexus/lib/python2.7/site-packages # fix a python version incompatibility bug in futures sudo perl -pi -e 's|raise exception_type, self._exception, self._traceback|raise Exception(self._exception).with_traceback(self._traceback)|' /usr/share/dnanexus/lib/python2.7/site-packages/concurrent/futures/_base.py - VERSION=0.0.1b15 + VERSION=0.0.1b40 pip3 install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple mity==$VERSION Or install mity using a virtualenv @@ -76,17 +78,17 @@ Or install mity using a virtualenv python3 -m venv . source bin/activate ./bin/pip install wheel - VERSION=0.0.1b15 + VERSION=0.0.1b40 ./bin/pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple mity==$VERSION -# test +# test on example data (These URLs valid until 26/7/2020) wget https://dl.dnanex.us/F/D/XJfjx2X139ZkzY7b29QQKBppzfj9p5V794Bfqf4G/A1.dedup.realigned.recalibrated.chrMT.bam wget https://dl.dnanex.us/F/D/qyV40Qgfj6Jgy3zZfJ07vkgXqZvJ6Fb2kXb24fyv/A1.dedup.realigned.recalibrated.chrMT.bam.bai - wget https://dl.dnanex.us/F/D/pVG7PjZy4qKBB6ZKbkkF0X6kB0kxf7ZzjpK7fXjY/hs37d5.fasta-index.tar.gz - tar -xzvf hs37d5.fasta-index.tar.gz; mv genome.dict hs37d5.dict; mv genome.fa hs37d5.fa; mv genome.fa.fai hs37d5.fa.fai - -# test post-docker + mity call --normalise A1.dedup.realigned.recalibrated.chrMT.bam + mity report A1.dedup.realigned.recalibrated.chrMT.mity.vcf.gz + +# test using docker wget https://dl.dnanex.us/F/D/XJfjx2X139ZkzY7b29QQKBppzfj9p5V794Bfqf4G/A1.dedup.realigned.recalibrated.chrMT.bam wget https://dl.dnanex.us/F/D/qyV40Qgfj6Jgy3zZfJ07vkgXqZvJ6Fb2kXb24fyv/A1.dedup.realigned.recalibrated.chrMT.bam.bai diff --git a/README.md b/README.md index ae1c0bf..9786a30 100644 --- a/README.md +++ b/README.md @@ -5,87 +5,92 @@ mity is a bioinformatic analysis pipeline designed to call mitochondrial SNV and * easily integrate with existing nuclear DNA analysis pipelines (mity merge) * provide an annotated report, designed for clinicians and researchers to interrogate - # Usage mity -h # Dependencies +* python3 (tested on 3.7.4) * freebayes >= 1.2.0 * bgzip + tabix * gsort (https://github.com/brentp/gsort) -* python3 (tested on 3.7.4) * pyvcf * xlsxwriter * pandas +# Installation +Installation instructions via Docker, pip, or manually are available in INSTALL.md + # Example Usage This is an example of calling variants in the Ashkenazim Trio. -First make sure mity is in your PATH variable. - -```bash -PATH="PATH_TO_MITY_FOLDER:${PATH}" -export PATH -``` - ## mity-call First run mity-call on three MT BAMs provided in mity/test_in -We can run it in normalised mode: +We can run it in normalised mode & recommend always using --normalise (or `mity report` won't work): ```bash mity call \ --prefix ashkenazim \ ---out-folder-path test_out/normalised \ ---min-alternate-fraction 0.5 \ +--out-folder-path test_out \ --region MT:1-500 \ --normalise \ ---p 0.001 \ test_in/HG002.hs37d5.2x250.small.MT.RG.bam \ test_in/HG003.hs37d5.2x250.small.MT.RG.bam \ test_in/HG004.hs37d5.2x250.small.MT.RG.bam ``` -This should create test_out/normalised/ashkenazim.mity.vcf.gz and test_out/normalised/ashkenazim.mity.vcf.gz.tbi - -We can run it without the normalisation: - -```bash -mity call \ ---prefix ashkenazim \ ---out-folder-path test_out/unnormalised \ ---min-alternate-fraction 0.5 \ ---region MT:1-500 \ ---p 0.001 \ -test_in/HG002.hs37d5.2x250.small.MT.RG.bam \ -test_in/HG003.hs37d5.2x250.small.MT.RG.bam \ -test_in/HG004.hs37d5.2x250.small.MT.RG.bam -``` - -This should create test_out/unnormalised/ashkenazim.mity.vcf.gz and test_out/unnormalised/ashkenazim.mity.vcf.gz.tbi +This will create `test_out/normalised/ashkenazim.mity.vcf.gz` (and tbi file). ## mity-report -We can create a mity report on the normalised VCF: +We can create a `mity report` on the normalised VCF: ```bash mity report \ --prefix ashkenazim \ ---min_vaf 0.1 \ ---out-folder-path /Users/putticc/Projects/mity/test_out/normalised \ -test_out/normalised/ashkenazim.mity.vcf.gz +--min_vaf 0.01 \ +--out-folder-path test_out \ +test_out/ashkenazim.mity.vcf.gz ``` +This will create: `test_out/ashkenazim.annotated_variants.csv` and `test_out/ashkenazim.annotated_variants.xlsx`. + +## mity-normalise +High-depth sequencing and sensitive variant calling can create many variants with more than 2 alleles, and in some +cases, joins two nearby variants separated by shared REF sequenced into a multi-nucleotide polymorphism +as discussed in the manuscript. Here, variant normalisation relates to decomposing the multi-allelic variants and +where possible, splitting multi-nucleotide polymorphisms into their cognate smaller variants. At the time of writing, +all variant decomposition tools we used failed to propagate the metadata in a multi-allelic variant to the split +variants which caused problems when reporting the quality scores associated with each variant. + +Technically you can run `mity call` and `mity normalise` separately, but since `mity report` requires a normalised +vcf file, we recommend running `mity call --normalise`. -This should create: test_out/normalised/ashkenazim.annotated_variants.csv and test_out/normalised/ashkenazim.annotated_variants.xlsx +## mity-merge +You can merge a nuclear vcf.gz file and a mity.vcf.gz file thereby replacing the MT calls from the nuclear VCF ( +presumably from a caller like HaplotypeCaller which is not able to sensitively call mitochondrial variants) with +the calls from mity. -On the unnormalised VCF - this doesn't work. ```bash -mity report \ +mity merge \ --prefix ashkenazim \ ---min_vaf 0.1 \ ---out-folder-path /Users/putticc/Projects/mity/test_out/unnormalised \ -test_out/unnormalised/ashkenazim.mity.vcf.gz +--mity_vcf test_out/ashkenazim.mity.vcf.gz \ +--nuclear_vcf todo-create-example-nuclear.vcf.gz ``` -## mity-merge + +# Recommendations for interpreting the report +Assuming that you are looking for a pathogenic variant underlying a patient with a rare genetic disorder potentially +caused by a Mitochondrial mutation, then we recommend the following strategy: +1. tier 1 or 2 variants included in the 'commercial_panels' column +2. tier 1 or 2 variants that match the clinical presentation and the phenotype in 'disease_mitomap', preferably +those that are annotated with Confirmed evidence in the 'status_mitomap' column +3. exclude common variants: anything linked to 'phylotree_haplotype', high 'phylotree_haplotype', high +'MGRB_frequency', high 'GenBank_frequency_mitomap'. +4. consider any remaining tier 1 or 2 variants that may have a predicted impact on tRNA +5. consider any remaining variants with high numbers of 'variant_references_mitomap' +5. if you have analysed multiple family members, consider variants who's level of 'variant_heteroplasmy' match the +disease burden # Acknowledgements -We thank the Kinghorn Centre for Clinical Genomics and collaborators, who helped -with feedback for running mity. -We thank Eric Talevich who's CNVkit helped us structure mity as a package +We would like to thank +* The Kinghorn Centre for Clinical Genomics and collaborators, who helped with feedback for running mity. +* The Genome in a Bottle consortium for providing the test data used here +* Eric Talevich who's CNVkit helped us structure mity as a package +* Erik Garrison for developing FreeBayes and his early feedback in optimising FreeBayes for sensitive variant detection. +* Brent Pederson for developing gsort \ No newline at end of file diff --git a/TODO.md b/TODO.md index 46339fe..5c23183 100644 --- a/TODO.md +++ b/TODO.md @@ -29,9 +29,8 @@ to support hg19 then? GRCh38 and GRCh37 are the same length. * update docker image once in main pip repo - PENDING # GitHub (pre-submission) -* CRITICAL: improve documentation -* CRITICAL: update INSTALL.md -* CRITICAL: ensure there is example usage +* merge branch back to master +* push to KCCG # DNAnexus * migrate app code to use the latest mity. either via an asset, or Docker image. diff --git a/mitylib/_version.py b/mitylib/_version.py index acd7cb7..2581159 100644 --- a/mitylib/_version.py +++ b/mitylib/_version.py @@ -1 +1 @@ -__version__ = "0.0.1b40" +__version__ = "0.0.1b41" diff --git a/mitylib/normalise.py b/mitylib/normalise.py index d7b87bd..1a09598 100755 --- a/mitylib/normalise.py +++ b/mitylib/normalise.py @@ -1278,7 +1278,6 @@ def do_normalise(vcf, out_file=None, p=0.002, SB_range=[0.1,0.9], min_MQMR=30, m :returns: Nothing. This creates a vcf.gz named out_file :rtype: None """ - print(p) if out_file is None: out_file = vcf.replace(".vcf.gz", ".norm.vcf.gz") diff --git a/mitylib/report.py b/mitylib/report.py index 0a5774b..854c4b0 100755 --- a/mitylib/report.py +++ b/mitylib/report.py @@ -3,6 +3,7 @@ import gzip import pandas import os.path +import xlsxwriter from .util import check_missing_file, create_prefix, make_hgvs, get_annot_file def make_table(variants, samples, vep_headers, impact_dict, min_vaf):