From 50daf00e029207bdec729ef525a7e8383f145b84 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Thu, 29 Sep 2022 14:45:24 +1000 Subject: [PATCH 01/10] add targeted option --- src/mykrobe/cmds/amr.py | 2 +- src/mykrobe/parser.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mykrobe/cmds/amr.py b/src/mykrobe/cmds/amr.py index b7351df7..5a048a69 100755 --- a/src/mykrobe/cmds/amr.py +++ b/src/mykrobe/cmds/amr.py @@ -293,7 +293,7 @@ def run(parser, args): cp.run() logger.debug("CoverageParser complete") - if ref_data["species_phylo_group"] is None: + if ref_data["species_phylo_group"] is None or args.targeted: phylogenetics = {} depths = [cp.estimate_depth()] else: diff --git a/src/mykrobe/parser.py b/src/mykrobe/parser.py index 1fde64b8..9ac30ec6 100644 --- a/src/mykrobe/parser.py +++ b/src/mykrobe/parser.py @@ -196,6 +196,12 @@ def __init__(self, *args, **kwargs): choices=["json", "csv", "json_and_csv"], default="csv", ) +parser_amr.add_argument( + "-T", + "--targeted", + action="store_true", + help=f"Sample is from targeted sequencing - not WGS", +) parser_amr.set_defaults(func=run_subtool) From 93500d569880dd5c3eb6aac2c86e8686ae386442 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Thu, 29 Sep 2022 14:45:30 +1000 Subject: [PATCH 02/10] Create combine.py --- scripts/combine.py | 163 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100755 scripts/combine.py diff --git a/scripts/combine.py b/scripts/combine.py new file mode 100755 index 00000000..a03f9be2 --- /dev/null +++ b/scripts/combine.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 +"""This script can be used to combine multiple mykrobe CSV/JSON output files into a +a sheet for easy comparison. The columns are drugs, the rows are samples, and the cells +are the susceptibility prediction. + +USAGE: + +# combine two CSV reports into a file called samples.csv +combine.py -o samples.csv sample1.csv sample2.csv + +# combine a JSON report with a CSV report +combine.py -o samples.csv sample1.json sample2.csv + +# combine all JSON and CSV files in a directory +combine.py -o samples.csv reports/ +""" +import json +import sys +import argparse +from pathlib import Path +from typing import Dict + + +def extract_csv_data(path: Path) -> Dict[str, str]: + report = dict() + with open(path) as fp: + header_fields = [s.strip('"') for s in next(fp).split(",")] + sample_idx = header_fields.index("sample") + prediction_idx = header_fields.index("susceptibility") + drug_idx = header_fields.index("drug") + species_idx = header_fields.index("species") + lineage_idx = header_fields.index("lineage") + + samples = set() + for row in fp: + fields = [s.strip('"') for s in row.split(",")] + sample = fields[sample_idx] + samples.add(sample) + drug = fields[drug_idx] + pred = fields[prediction_idx] + report["species"] = fields[species_idx] + report["lineage"] = fields[lineage_idx] + report[drug] = pred + + if len(samples) > 1: + eprint(f"ERROR: Got more than one sample in {path}") + sys.exit(1) + + report["sample"] = sample + print(report) + return report + + +def extract_json_data(path: Path) -> Dict[str, str]: + report = dict() + with open(path) as fp: + data = json.load(fp) + + samples = list(data.keys()) + if len(samples) > 1: + eprint(f"ERROR: Got moe than one sample in {path}") + sys.exit(1) + + report["sample"] = samples[0] + + for drug, info in data[samples[0]]["susceptibility"].items(): + pred = info["predict"] + report[drug] = pred + + phylo = data[samples[0]].get("phylogenetics", {}) + report["species"] = ";".join(list(phylo.get("species", {}).keys())) + lineages = [ + s.replace("lineage", "") for s in phylo.get("lineage", {}).get("lineage", {}) + ] + report["lineage"] = ";".join(lineages) + + return report + + +def extract_data(path: Path) -> Dict[str, str]: + if path.suffix == ".csv": + return extract_csv_data(path) + elif path.suffix == ".json": + return extract_json_data(path) + else: + eprint(f"ERROR: {path} is an unknown file type") + sys.exit(1) + + +def eprint(msg: str): + print(msg, file=sys.stderr) + + +def main(): + if sys.version_info < (3, 6, 0): + eprint("You need python 3.6 or later to run this script") + sys.exit(1) + + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + "src", + type=Path, + metavar="DIR/FILES", + nargs="+", + help="A directory containing mykrobe reports, or a list of mykrobe report files", + ) + parser.add_argument( + "-d", + "--delim", + help="Delimiting character to use in the output file [default: %(default)s]", + default=",", + ) + parser.add_argument( + "-o", + "--output", + help="File path to save output to [default: stdout]", + type=Path, + ) + + args = parser.parse_args() + + delim = args.delim + input_files = [] + for p in args.src: + if p.is_dir(): + input_files.append(list(p.glob("*.csv"))) + input_files.append(list(p.glob("*.json"))) + else: + input_files.append(p) + + eprint(f"Found {len(input_files)} mykrobe reports") + + drugs = set() + reports = dict() + for p in input_files: + data = extract_data(p) + reports[p] = data + for k in data: + if k in ("sample", "species", "lineage"): + continue + drugs.add(k) + + fp_out = args.output.open("w") if args.output is not None else sys.stdout + print(delim.join(["sample", "species", "lineage", *sorted(drugs)]), file=fp_out) + + for p, data in reports.items(): + row = [] + for k in ["sample", "species", "lineage"]: + row.append(data[k]) + + for drug in sorted(drugs): + row.append(data.get(drug, "")) + + print(delim.join(row), file=fp_out) + + fp_out.close() + + +if __name__ == "__main__": + main() From 6a880903d52396f52cd4f76e8d64a55c0866eca6 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Thu, 29 Sep 2022 14:56:11 +1000 Subject: [PATCH 03/10] add combine.py to PATH --- Dockerfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index bf69d36b..e3d9f4a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,4 +45,8 @@ RUN python -m pip install requests && python -m pip install . -vv # download panels RUN mykrobe panels update_metadata \ - && mykrobe panels update_species all \ No newline at end of file + && mykrobe panels update_species all + +# add combine script to path \ +COPY --chmod=755 ./scripts/combine.py "/${PROJECT}/scripts/combine.py" +ENV PATH="/${PROJECT}/scripts:$PATH" \ No newline at end of file From 87b6c96126c58cccc157a9c21a81720ab7fed925 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Fri, 30 Sep 2022 09:49:54 +1000 Subject: [PATCH 04/10] add panel name to ref data and logging --- src/mykrobe/cmds/amr.py | 13 +++++++++---- src/mykrobe/species_data/species_dir.py | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/mykrobe/cmds/amr.py b/src/mykrobe/cmds/amr.py index 5a048a69..211387af 100755 --- a/src/mykrobe/cmds/amr.py +++ b/src/mykrobe/cmds/amr.py @@ -156,6 +156,7 @@ def ref_data_from_args(args): "kmer": args.kmer, "version": "custom", "species_phylo_group": None, + "panel_name": "custom", } else: data_dir = DataDir(args.panels_dir) @@ -170,6 +171,7 @@ def ref_data_from_args(args): "kmer": species_dir.kmer(), "version": species_dir.version(), "species_phylo_group": species_dir.species_phylo_group(), + "panel_name": species_dir.panel_name, } if ref_data["lineage_json"] is None: @@ -259,11 +261,13 @@ def run(parser, args): and ref_data["lineage_json"] is None ): logger.info( - "Forcing --report_all_calls because species is 'custom' and options --custom_variant_to_resistance_json,--custom_lineage_json were not used" + "Forcing --report_all_calls because species is 'custom' and options " + "--custom_variant_to_resistance_json,--custom_lineage_json were not used" ) args.report_all_calls = True logger.info( - f"Running mykrobe predict using species {args.species}, and panel version {ref_data['version']}" + f"Running mykrobe predict using species {args.species}, panel version " + f"{ref_data['version']}, and panel {ref_data['panel_name']}" ) if args.tmp is None: @@ -272,10 +276,11 @@ def run(parser, args): if args.ont: args.expected_error_rate = ONT_E_RATE logger.info( - f"Set expected error rate to {args.expected_error_rate} because --ont flag was used" + f"Set expected error rate to {args.expected_error_rate} because --ont flag " + f"was used" ) args.ploidy = ONT_PLOIDY - logger.info(f"Set ploidy to {args.ploidy} because --ont flag used") + logger.info(f"Set ploidy to {args.ploidy} because --ont flag was used") # Run Cortex cp = CoverageParser( diff --git a/src/mykrobe/species_data/species_dir.py b/src/mykrobe/species_data/species_dir.py index caf0ea41..edc2c46f 100644 --- a/src/mykrobe/species_data/species_dir.py +++ b/src/mykrobe/species_data/species_dir.py @@ -14,6 +14,7 @@ def __init__(self, root_dir): if not os.path.exists(self.manifest_json): raise FileNotFoundError(f"Manifest file not found in species directory {self.root_dir}. Expected to find {self.manifest_json}.") self.panel = None + self.panel_name = None self.manifest = load_json(self.manifest_json) self.set_panel(self.default_panel()) From 8817134cfcc0a08307d6e1bde3aaac7cef138aa5 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Fri, 30 Sep 2022 09:50:10 +1000 Subject: [PATCH 05/10] add option to compress json --- src/mykrobe/cmds/amr.py | 21 +++++++++++---------- src/mykrobe/parser.py | 6 ++++++ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/mykrobe/cmds/amr.py b/src/mykrobe/cmds/amr.py index 211387af..7521aa6d 100755 --- a/src/mykrobe/cmds/amr.py +++ b/src/mykrobe/cmds/amr.py @@ -1,6 +1,7 @@ from __future__ import print_function import copy +import gzip import json import logging import random @@ -220,14 +221,6 @@ def write_outputs(args, base_json): del base_json[args.sample]["lineage_calls"] outputs["json"] = json.dumps(base_json, indent=4) - if len(outputs) == 0: - raise ValueError( - ( - f"Output format must be one of: csv,json,json_and_csv. Got " - f"'{args.output_format}'" - ) - ) - for output_type, output in outputs.items(): # write to file is specified by user, otherwise send to stdout if args.output: @@ -235,9 +228,17 @@ def write_outputs(args, base_json): outfile = args.output + "." + output_type else: outfile = args.output - with open(outfile, "w") as f: - f.write(output) + + if args.compress and output_type == "json": + p = outfile if outfile.split(".")[-1] == "gz" else f"{outfile}.gz" + with gzip.open(p, "w") as fout: + fout.write(output.encode("utf-8")) + else: + with open(outfile, "w") as f: + f.write(output) else: + if args.compress and output_type == "json": + output = gzip.compress(output) print(output) diff --git a/src/mykrobe/parser.py b/src/mykrobe/parser.py index 9ac30ec6..3e66214b 100644 --- a/src/mykrobe/parser.py +++ b/src/mykrobe/parser.py @@ -202,6 +202,12 @@ def __init__(self, *args, **kwargs): action="store_true", help=f"Sample is from targeted sequencing - not WGS", ) +parser_amr.add_argument( + "-z", + "--compress", + action="store_true", + help="Compress JSON output with gzip", +) parser_amr.set_defaults(func=run_subtool) From 856f09bc7736a2e4c5caace44d1974e780fd0bfa Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Wed, 5 Oct 2022 11:49:05 +1000 Subject: [PATCH 06/10] add targeted bed file option --- CHANGELOG.md | 4 ++ requirements.txt | 1 + setup.py | 1 + src/mykrobe/cmds/amr.py | 1 + src/mykrobe/parser.py | 4 +- src/mykrobe/typing/typer/genotyper.py | 69 +++++++++++++++++---------- src/mykrobe/utils.py | 34 +++++++++++++ 7 files changed, 88 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index adbc3a56..1f11cba3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased] +### Added + +- option to compress JSON output (`-z`/`--compress`) + ## 0.12.1 ### Fixed diff --git a/requirements.txt b/requirements.txt index effb63e5..b620ba23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ requests>=2.27.1 mongoengine>=0.24.1 cython>=0.29.28 numpy>=1.22.0 +intervaltree~=3.1 ## Testing tox pytest diff --git a/setup.py b/setup.py index 52f0aeec..395ded94 100644 --- a/setup.py +++ b/setup.py @@ -132,6 +132,7 @@ def read(*names, **kwargs): "mongoengine", "requests", "numpy", + "intervaltree~=3.1" ], entry_points={ "console_scripts": [ diff --git a/src/mykrobe/cmds/amr.py b/src/mykrobe/cmds/amr.py index 7521aa6d..081265e3 100755 --- a/src/mykrobe/cmds/amr.py +++ b/src/mykrobe/cmds/amr.py @@ -295,6 +295,7 @@ def run(parser, args): tmp_dir=args.tmp, skeleton_dir=args.skeleton_dir, memory=args.memory, + targeted=args.targeted, ) cp.run() logger.debug("CoverageParser complete") diff --git a/src/mykrobe/parser.py b/src/mykrobe/parser.py index 3e66214b..cec30bbc 100644 --- a/src/mykrobe/parser.py +++ b/src/mykrobe/parser.py @@ -199,8 +199,8 @@ def __init__(self, *args, **kwargs): parser_amr.add_argument( "-T", "--targeted", - action="store_true", - help=f"Sample is from targeted sequencing - not WGS", + help=f"BED file of regions that sequencing targeted", + type=str, ) parser_amr.add_argument( "-z", diff --git a/src/mykrobe/typing/typer/genotyper.py b/src/mykrobe/typing/typer/genotyper.py index 4ae1ac7b..8f49ca64 100755 --- a/src/mykrobe/typing/typer/genotyper.py +++ b/src/mykrobe/typing/typer/genotyper.py @@ -1,37 +1,34 @@ from __future__ import print_function -import os -import json + import csv -import glob import logging -import subprocess -from copy import copy +import re -from mykrobe import K +from mykrobe.cortex import McCortexGenoRunner from mykrobe.metagenomics import LineagePredictor - -from mykrobe.typing import SequenceProbeCoverage -from mykrobe.typing import VariantProbeCoverage -from mykrobe.typing import ProbeCoverage -from mykrobe.typing import Panel - +from mykrobe.typing import ( + Panel, + ProbeCoverage, + SequenceProbeCoverage, + VariantProbeCoverage, +) +from mykrobe.typing.typer.base import DEFAULT_ERROR_RATE, DEFAULT_MINOR_FREQ from mykrobe.typing.typer.presence import GeneCollectionTyper from mykrobe.typing.typer.variant import VariantTyper -from mykrobe.typing.typer.base import DEFAULT_MINOR_FREQ -from mykrobe.typing.typer.base import DEFAULT_ERROR_RATE - -from mykrobe.variants.schema.models import VariantCallSet +from mykrobe.utils import get_params, load_bed, median, split_var_name from mykrobe.variants.schema.models import Variant -from mykrobe.cortex import McCortexGenoRunner +from mykrobe import K -from mykrobe.utils import get_params -from mykrobe.utils import split_var_name -from mykrobe.utils import median +POS_REGEX = re.compile(r"var_name=[ACGT]+(?P\d+)[ACGT]+") logger = logging.getLogger(__name__) +class EmptyBedFile(Exception): + pass + + class CoverageParser(object): def __init__( self, @@ -47,6 +44,7 @@ def __init__( verbose=True, tmp_dir="tmp/", skeleton_dir="atlas/data/skeletons/", + targeted=None, ): self.sample = sample self.seq = seq @@ -64,6 +62,13 @@ def __init__( self.panels = [] self.threads = threads self.memory = memory + if targeted: + self.targeted = load_bed(targeted) + if self.targeted.is_empty(): + raise EmptyBedFile("Targeted BED file has no regions") + logging.info(f"Loaded {len(self.targeted)} regions from targeted BED file") + else: + self.targeted = None for panel_file_path in self.panel_file_paths: panel = Panel(panel_file_path) self.panels.append(panel) @@ -95,10 +100,11 @@ def estimate_depth(self): for variant_covg in self.variant_covgs.values(): if variant_covg.reference_coverage.median_depth > 0: depth.append(variant_covg.reference_coverage.median_depth) - for spcs in self.gene_presence_covgs.values(): - __median_depth = median([spc.median_depth for spc in spcs.values()]) - if __median_depth > 0: - depth.append(__median_depth) + if not self.targeted: + for spcs in self.gene_presence_covgs.values(): + __median_depth = median([spc.median_depth for spc in spcs.values()]) + if __median_depth > 0: + depth.append(__median_depth) _median = median(depth) if _median < 1: return 1 @@ -139,6 +145,10 @@ def _parse_covgs(self): klen, ) = self._parse_summary_covgs_row(row) allele_name = allele.split("?")[0] + + if not self._allele_is_in_target_region(allele): + continue + if self._is_variant_panel(allele_name): self._parse_variant_panel(row) else: @@ -262,6 +272,17 @@ def _parse_variant_panel(self, row): else: raise ValueError("probe_type must be ref or alt") + def _allele_is_in_target_region(self, allele: str) -> bool: + """Checks whether an allele falls within the targeted BED regions""" + if not self.targeted: + return True + + match = POS_REGEX.search(allele) + if not match: # no genome position, so it isn't a variant probe + return False + pos = int(match.group("pos")) - 1 # mykrobe panel is 1-based + return self.targeted.overlaps(pos) + class Genotyper(object): diff --git a/src/mykrobe/utils.py b/src/mykrobe/utils.py index cc89dbb0..095f75a4 100644 --- a/src/mykrobe/utils.py +++ b/src/mykrobe/utils.py @@ -1,12 +1,14 @@ import gzip import hashlib import json +import logging import os import re from pathlib import Path from typing import TextIO, Union from Bio.Seq import Seq +from intervaltree import Interval, IntervalTree def check_args(args): @@ -190,3 +192,35 @@ def get_first_chrom_name(fp: TextIO) -> str: if not header.startswith(">"): raise ValueError(f"Expected fasta file, but it did not start with '>'") return header[1:].split()[0] + + +def load_bed(fpath: str) -> IntervalTree: + tree = IntervalTree() + with open(fpath) as fp: + for line in map(str.rstrip, fp): + # skip headers https://en.wikipedia.org/wiki/BED_(file_format)#Header + if line.startswith(("#", "browser", "track")): + continue + try: + start, end = line.split("\t")[1:3] + except IndexError as err: + logging.error( + "BED file must contain at least three tab-separated columns: " + "chromosome, start (zero-based inclusive), end (zero-based " + "exclusive). There are less than three columns in this BED file" + ) + raise err + try: + start = int(start) + end = int(end) + except ValueError as err: + logging.error( + "BED file must contain at least three tab-separated columns: " + "chromosome, start (zero-based inclusive), end (zero-based " + "exclusive). We could not parse the start/end column into integers" + ) + raise err + iv = Interval(start, end) + tree.add(iv) + + return tree From 5791df7422865a27bb0e82562f3586aa392e164e Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Wed, 5 Oct 2022 13:13:35 +1000 Subject: [PATCH 07/10] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f11cba3..104770eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Added - option to compress JSON output (`-z`/`--compress`) +- specific support for targeted sequencing (`-T`/`--targeted`). Takes a BED file of the regions targeted to aid in the read depth estimation [[#28][28]] +- The script `scripts/combine.py` to combine mykrobe reports into a single CSV file ## 0.12.1 @@ -93,6 +95,8 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm eg `katG_S315X`. If gene is on reverse strand, then the wrong new amino acid change was being reported [[#127][127]]. +[28]: https://github.com/Mykrobe-tools/mykrobe/issues/28 + [113]: https://github.com/Mykrobe-tools/mykrobe/issues/113 [114]: https://github.com/Mykrobe-tools/mykrobe/issues/114 From d50535d4a7097ef1038317259c2de4a1b356eb41 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Wed, 5 Oct 2022 13:32:10 +1000 Subject: [PATCH 08/10] Update Dockerfile --- Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index e3d9f4a0..89aa6aec 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,12 +41,13 @@ RUN git clone --recursive -b geno_kmer_count https://github.com/phelimb/mccortex # install mykrobe WORKDIR "/${PROJECT}" -RUN python -m pip install requests && python -m pip install . -vv +RUN python -m pip install requests \ + && python -m pip install . -vv \ + && chmod +x /${PROJECT}/scripts/combine.py # download panels RUN mykrobe panels update_metadata \ && mykrobe panels update_species all # add combine script to path \ -COPY --chmod=755 ./scripts/combine.py "/${PROJECT}/scripts/combine.py" ENV PATH="/${PROJECT}/scripts:$PATH" \ No newline at end of file From aedded827cf0ed2c82250dc74aa1f13360ea8afe Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Fri, 7 Oct 2022 12:25:57 +1000 Subject: [PATCH 09/10] skip adding covg to estimate depths --- src/mykrobe/typing/typer/genotyper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mykrobe/typing/typer/genotyper.py b/src/mykrobe/typing/typer/genotyper.py index 8f49ca64..e723176c 100755 --- a/src/mykrobe/typing/typer/genotyper.py +++ b/src/mykrobe/typing/typer/genotyper.py @@ -97,7 +97,10 @@ def _run_cortex(self): def estimate_depth(self): depth = [] - for variant_covg in self.variant_covgs.values(): + + for variant_name, variant_covg in self.variant_covgs.items(): + if not self._allele_is_in_target_region(variant_name): + continue if variant_covg.reference_coverage.median_depth > 0: depth.append(variant_covg.reference_coverage.median_depth) if not self.targeted: @@ -146,9 +149,6 @@ def _parse_covgs(self): ) = self._parse_summary_covgs_row(row) allele_name = allele.split("?")[0] - if not self._allele_is_in_target_region(allele): - continue - if self._is_variant_panel(allele_name): self._parse_variant_panel(row) else: From 4baeebdb23e2028570e905f0806cb7b612c6fb7f Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Fri, 7 Oct 2022 15:39:44 +1000 Subject: [PATCH 10/10] don't exclude variants not in target regions --- src/mykrobe/typing/typer/genotyper.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/mykrobe/typing/typer/genotyper.py b/src/mykrobe/typing/typer/genotyper.py index e723176c..bab171f1 100755 --- a/src/mykrobe/typing/typer/genotyper.py +++ b/src/mykrobe/typing/typer/genotyper.py @@ -20,7 +20,7 @@ from mykrobe import K -POS_REGEX = re.compile(r"var_name=[ACGT]+(?P\d+)[ACGT]+") +POS_REGEX = re.compile(r"[ACGT]+(?P\d+)[ACGT]+") logger = logging.getLogger(__name__) @@ -99,7 +99,8 @@ def estimate_depth(self): depth = [] for variant_name, variant_covg in self.variant_covgs.items(): - if not self._allele_is_in_target_region(variant_name): + var = variant_name.split("-")[-1] + if not self._variant_is_in_target_region(var): continue if variant_covg.reference_coverage.median_depth > 0: depth.append(variant_covg.reference_coverage.median_depth) @@ -109,10 +110,7 @@ def estimate_depth(self): if __median_depth > 0: depth.append(__median_depth) _median = median(depth) - if _median < 1: - return 1 - else: - return _median + return max(1, _median) def remove_temporary_files(self): self.mc_cortex_runner.remove_temporary_files() @@ -272,12 +270,12 @@ def _parse_variant_panel(self, row): else: raise ValueError("probe_type must be ref or alt") - def _allele_is_in_target_region(self, allele: str) -> bool: + def _variant_is_in_target_region(self, variant: str) -> bool: """Checks whether an allele falls within the targeted BED regions""" if not self.targeted: return True - match = POS_REGEX.search(allele) + match = POS_REGEX.search(variant) if not match: # no genome position, so it isn't a variant probe return False pos = int(match.group("pos")) - 1 # mykrobe panel is 1-based