From 7b009b5ae0077632d59fe35e0f1faa2a7effd0a3 Mon Sep 17 00:00:00 2001 From: Dave Lawrence Date: Mon, 13 Nov 2023 15:49:59 +1030 Subject: [PATCH] Add a separate data version, separate client/data generation as much as possible. Add new GFFs --- CHANGELOG.md | 9 ++++ cdot/__init__.py | 4 +- cdot/assembly_helper.py | 52 ------------------- cdot/hgvs/dataproviders/json_data_provider.py | 21 +++++++- generate_transcript_data/all_transcripts.sh | 25 ++++----- generate_transcript_data/cdot_gene_info.py | 7 +-- generate_transcript_data/cdot_json.py | 14 ++--- .../ensembl_transcripts_chm13v2.sh | 9 ++-- .../ensembl_transcripts_grch37.sh | 2 +- .../ensembl_transcripts_grch38.sh | 2 +- .../json_encoders.py | 0 .../json_schema_version.py | 3 ++ .../refseq_transcripts_chm13v2.sh | 22 ++++++-- .../refseq_transcripts_grch38.sh | 18 ++++++- setup.cfg | 2 +- 15 files changed, 98 insertions(+), 92 deletions(-) delete mode 100644 cdot/assembly_helper.py rename {cdot => generate_transcript_data}/json_encoders.py (100%) create mode 100644 generate_transcript_data/json_schema_version.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 7099f03..a8ce5f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,18 @@ ## [unreleased] +### Added + +- New GFFs: RefSeq RS_2023_10, Ensembl VEP110 +- #66 - We now store 'Note' field (thanks holtgrewe for suggestion) +- Added requirements.txt for 'generate_transcript_data' sections +- client / JSON data schema version compatability check + ### Changed - #57 - Correctly handle retrieving genomic position and dealing w/indels in GFF (thanks ltnetcase for reporting) - #60 - Fix for missing protein IDs due to Genbank / GenBank (thanks holtgrewe) +- #64 - Split code/data versions. json.gz are now labelled according to data schema version (thanks holtgrewe) +- Renamed 'CHM13v2.0' to 'T2T-CHM13v2.0' so it could work with biocommons bioutils ## [0.2.21] - 2023-08-14 diff --git a/cdot/__init__.py b/cdot/__init__.py index 6d1a5b7..273459c 100644 --- a/cdot/__init__.py +++ b/cdot/__init__.py @@ -1,7 +1,7 @@ __version__ = "0.2.21" -def get_json_schema_version(): +def get_data_schema_int(version: str) -> int: """ Return an int which increments upon breaking changes - ie anything other than patch """ - major, minor, patch = __version__.split(".") + major, minor, patch = version.split(".") return 1000 * int(major) + int(minor) diff --git a/cdot/assembly_helper.py b/cdot/assembly_helper.py deleted file mode 100644 index f0ba6cc..0000000 --- a/cdot/assembly_helper.py +++ /dev/null @@ -1,52 +0,0 @@ -from bioutils.assemblies import make_ac_name_map, make_name_ac_map - -# Generated via: -# import pandas as pd -# assembly_report = "./GCF_009914755.1_T2T-CHM13v2.0_assembly_report.txt" -# names = ["Sequence-Name", "Sequence-Role", "Assigned-Molecule", "Assigned-Molecule-Location/Type", "GenBank-Accn", "Relationship", "RefSeq-Accn", "Assembly-Unit", "Sequence-Length", "UCSC-style-name"] -# df = pd.read_csv(assembly_report, comment='#', sep='\t', header=None, names=names) -# equals_mask = df["Relationship"] == '=' # MT not there -# df = df[equals_mask] -# ac_name_map = dict(df[["RefSeq-Accn", "Sequence-Name"]].values) - -T2T_CHM13v2 = { - 'NC_060925.1': '1', - 'NC_060926.1': '2', - 'NC_060927.1': '3', - 'NC_060928.1': '4', - 'NC_060929.1': '5', - 'NC_060930.1': '6', - 'NC_060931.1': '7', - 'NC_060932.1': '8', - 'NC_060933.1': '9', - 'NC_060934.1': '10', - 'NC_060935.1': '11', - 'NC_060936.1': '12', - 'NC_060937.1': '13', - 'NC_060938.1': '14', - 'NC_060939.1': '15', - 'NC_060940.1': '16', - 'NC_060941.1': '17', - 'NC_060942.1': '18', - 'NC_060943.1': '19', - 'NC_060944.1': '20', - 'NC_060945.1': '21', - 'NC_060946.1': '22', - 'NC_060947.1': 'X', - 'NC_060948.1': 'Y' -} - -def get_ac_name_map(assembly_name): - if assembly_name == "CHM13v2.0": - return T2T_CHM13v2 - elif assembly_name == "GRCh37": - assembly_name = 'GRCh37.p13' # Original build didn't have MT - return make_ac_name_map(assembly_name) - - -def get_name_ac_map(assembly_name): - if assembly_name == "CHM13v2.0": - return {name: ac for ac, name in T2T_CHM13v2.items()} - elif assembly_name == "GRCh37": - assembly_name = 'GRCh37.p13' # Original build didn't have MT - return make_name_ac_map(assembly_name) diff --git a/cdot/hgvs/dataproviders/json_data_provider.py b/cdot/hgvs/dataproviders/json_data_provider.py index 0306aed..90428d7 100644 --- a/cdot/hgvs/dataproviders/json_data_provider.py +++ b/cdot/hgvs/dataproviders/json_data_provider.py @@ -12,7 +12,15 @@ from intervaltree import IntervalTree from typing import List -from cdot.assembly_helper import get_ac_name_map +from bioutils.assemblies import make_ac_name_map, make_name_ac_map + +from cdot import get_data_schema_int, __version__ + +def get_ac_name_map(assembly_name): + if assembly_name == "GRCh37": + assembly_name = 'GRCh37.p13' # Original build didn't have MT + return make_ac_name_map(assembly_name) + class AbstractJSONDataProvider(Interface): # All cdot data is 'splign', it's the method used in NCBI/Ensembl GTFs, and we also only pull out 'splign' from UTA @@ -42,6 +50,7 @@ def __init__(self, assemblies: List[str] = None, mode=None, cache=None, seqfetch for assembly_name, contig_map in self.assembly_maps.items(): self.assembly_by_contig.update({contig: assembly_name for contig in contig_map.keys()}) + @abc.abstractmethod def _get_transcript(self, tx_ac): pass @@ -260,6 +269,12 @@ def get_tx_for_gene(self, gene): def get_tx_for_region(self, alt_ac, alt_aln_method, start_i, end_i): pass + def _validate_schema_compatability(self, json_schema_version: str): + """ Raise an error if versions out of sync """ + cdot_client_data_schema_int = get_data_schema_int(__version__) + cdot_data_schema_version = get_data_schema_int(json_schema_version) + if cdot_client_data_schema_int < cdot_data_schema_version: + raise ValueError(f"This cdot client ({__version__}) cannot read {json_schema_version=} - please upgrade.") class LocalDataProvider(AbstractJSONDataProvider): """ For JSON and Redis providers (implemented in cdot_rest) @@ -359,7 +374,9 @@ def __init__(self, file_or_filename_list, mode=None, cache=None, seqfetcher=None for g in genes.values(): if gene_symbol := g.get("gene_symbol"): self.genes[gene_symbol] = g - self.cdot_data_version = tuple(int(v) for v in data["cdot_version"].split(".")) + cdot_data_version_str = data["cdot_version"] + self._validate_schema_compatability(cdot_data_version_str) + self.cdot_data_version = tuple(int(v) for v in cdot_data_version_str.split(".")) super().__init__(assemblies=assemblies, mode=mode, cache=cache, seqfetcher=seqfetcher) diff --git a/generate_transcript_data/all_transcripts.sh b/generate_transcript_data/all_transcripts.sh index f7b39bd..6029004 100755 --- a/generate_transcript_data/all_transcripts.sh +++ b/generate_transcript_data/all_transcripts.sh @@ -8,11 +8,12 @@ BASE_DIR=$(dirname ${FULL_PATH_TO_SCRIPT}) # Python scripts will import via generate_transcript_data export PYTHONPATH=${BASE_DIR}/.. -CDOT_VERSION=$(${BASE_DIR}/cdot_json.py --version) +CDOT_DATA_VERSION=$(${BASE_DIR}/cdot_json.py --version) +echo "Generating all transcripts for cdot data version ${CDOT_DATA_VERSION}" # This needs to be passed to called bash scripts, so they are invoked with "." to use these variables -export GENE_INFO_JSON=$(pwd)/Homo_sapiens.gene-info-${CDOT_VERSION}.json.gz +export GENE_INFO_JSON=$(pwd)/Homo_sapiens.gene-info-${CDOT_DATA_VERSION}.json.gz if [[ ! -e ${GENE_INFO_JSON} ]]; then ${BASE_DIR}/gene_info.sh @@ -34,17 +35,17 @@ cd GRCh38 ${BASE_DIR}/refseq_transcripts_grch38.sh cd .. -mkdir -p CHM13v2.0 -cd CHM13v2.0 +mkdir -p T2T-CHM13v2.0 +cd T2T-CHM13v2.0 ${BASE_DIR}/refseq_transcripts_chm13v2.sh cd .. # Combine genome builds (we're in refseq dir) -REFSEQ_COMBO=cdot-${CDOT_VERSION}.refseq.grch37_grch38.json.gz +REFSEQ_COMBO=cdot-${CDOT_DATA_VERSION}.refseq.grch37_grch38.json.gz if [[ ! -e ${REFSEQ_COMBO} ]]; then ${BASE_DIR}/cdot_json.py combine_builds \ - --grch37 GRCh37/cdot-${CDOT_VERSION}.refseq.grch37.json.gz \ - --grch38 GRCh38/cdot-${CDOT_VERSION}.refseq.grch38.json.gz \ + --grch37 GRCh37/cdot-${CDOT_DATA_VERSION}.refseq.grch37.json.gz \ + --grch38 GRCh38/cdot-${CDOT_DATA_VERSION}.refseq.grch38.json.gz \ --output ${REFSEQ_COMBO} fi @@ -64,18 +65,18 @@ cd GRCh38 ${BASE_DIR}/ensembl_transcripts_grch38.sh cd .. -mkdir -p CHM13v2.0 -cd CHM13v2.0 +mkdir -p T2T-CHM13v2.0 +cd T2T-CHM13v2.0 ${BASE_DIR}/ensembl_transcripts_chm13v2.sh cd .. # Combine genome builds (we're in ensembl dir) -ENSEMBL_COMBO=cdot-${CDOT_VERSION}.ensembl.grch37_grch38.json.gz +ENSEMBL_COMBO=cdot-${CDOT_DATA_VERSION}.ensembl.grch37_grch38.json.gz if [[ ! -e ${ENSEMBL_COMBO} ]]; then ${BASE_DIR}/cdot_json.py combine_builds \ - --grch37 GRCh37/cdot-${CDOT_VERSION}.ensembl.grch37.json.gz \ - --grch38 GRCh38/cdot-${CDOT_VERSION}.ensembl.grch38.json.gz \ + --grch37 GRCh37/cdot-${CDOT_DATA_VERSION}.ensembl.grch37.json.gz \ + --grch38 GRCh38/cdot-${CDOT_DATA_VERSION}.ensembl.grch38.json.gz \ --output ${ENSEMBL_COMBO} fi diff --git a/generate_transcript_data/cdot_gene_info.py b/generate_transcript_data/cdot_gene_info.py index b435eb9..2bc3429 100755 --- a/generate_transcript_data/cdot_gene_info.py +++ b/generate_transcript_data/cdot_gene_info.py @@ -5,15 +5,12 @@ import json import os from argparse import ArgumentParser -from csv import DictReader from datetime import datetime from typing import Iterable, Iterator, List, TypeVar import cdot from Bio import Entrez -from cdot.json_encoders import SortedSetEncoder -from io import BytesIO -from lxml import etree +from json_encoders import SortedSetEncoder T = TypeVar("T") @@ -45,7 +42,7 @@ def _get_entrez_gene_summary(id_list): web_env = result["WebEnv"] query_key = result["QueryKey"] data = Entrez.esummary(db="gene", webenv=web_env, query_key=query_key) - document = Entrez.read(data, ignore_errors=True) # Need recent BioPython + document = Entrez.read(data, ignore_errors=True, validate=False) # Need recent BioPython return document["DocumentSummarySet"]["DocumentSummary"] diff --git a/generate_transcript_data/cdot_json.py b/generate_transcript_data/cdot_json.py index 36a9c35..09ef354 100755 --- a/generate_transcript_data/cdot_json.py +++ b/generate_transcript_data/cdot_json.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import gzip +import ijson import json import logging import re @@ -8,13 +9,12 @@ from argparse import ArgumentParser from collections import defaultdict, Counter from csv import DictReader +from pyhgvs import CDNACoord -import cdot -import ijson from cdot.pyhgvs.pyhgvs_transcript import PyHGVSTranscriptFactory from generate_transcript_data.gff_parser import GTFParser, GFF3Parser -from cdot.json_encoders import SortedSetEncoder -from pyhgvs import CDNACoord +from generate_transcript_data.json_encoders import SortedSetEncoder +from generate_transcript_data.json_schema_version import JSON_SCHEMA_VERSION def _setup_arg_parser(): @@ -270,7 +270,7 @@ def _cigar_to_gap_and_length(cigar): def write_cdot_json(filename, genes, transcript_versions, genome_builds, refseq_gene_summary_api_retrieval_date=None): print(f"Writing cdot file: '{filename}'") data = { - "cdot_version": cdot.__version__, + "cdot_version": JSON_SCHEMA_VERSION, "genome_builds": genome_builds, "transcripts": transcript_versions, } @@ -382,7 +382,7 @@ def combine_builds(args): with gzip.open(args.output, 'wt') as outfile: data = { "transcripts": transcripts, - "cdot_version": cdot.__version__, + "cdot_version": JSON_SCHEMA_VERSION, "genome_builds": list(genome_build_file.keys()), } if genes: @@ -399,7 +399,7 @@ def main(): parser = _setup_arg_parser() args = parser.parse_args() if args.version: - print(cdot.__version__) + print(JSON_SCHEMA_VERSION) sys.exit(0) if args.subcommand is None: diff --git a/generate_transcript_data/ensembl_transcripts_chm13v2.sh b/generate_transcript_data/ensembl_transcripts_chm13v2.sh index d22d70a..3a833e0 100755 --- a/generate_transcript_data/ensembl_transcripts_chm13v2.sh +++ b/generate_transcript_data/ensembl_transcripts_chm13v2.sh @@ -4,6 +4,7 @@ set -e BASE_DIR=$(dirname ${BASH_SOURCE[0]}) CDOT_VERSION=$(${BASE_DIR}/cdot_json.py --version) +GENOME_BUILD=T2T-CHM13v2.0 if [[ -z ${GENE_INFO_JSON} ]]; then echo "You need to set environment variable GENE_INFO_JSON, pointing to the filename produced by cdot_gene_info.py" @@ -14,18 +15,18 @@ merge_args=() for release in 2022_06 2022_07; do filename=Homo_sapiens-GCA_009914755.4-${release}-genes.gff3.gz url=https://ftp.ensembl.org/pub/rapid-release/species/Homo_sapiens/GCA_009914755.4/ensembl/geneset/${release}/${filename} - cdot_file=cdot-${CDOT_VERSION}.$(basename $filename .gz).json.gz + cdot_file=cdot-${CDOT_VERSION}.ensembl.$(basename $filename .gz).json.gz if [[ ! -e ${filename} ]]; then wget ${url} fi if [[ ! -e ${cdot_file} ]]; then - ${BASE_DIR}/cdot_json.py gff3_to_json "${filename}" --url "${url}" --genome-build=CHM13v2.0 --output "${cdot_file}" --gene-info-json="${GENE_INFO_JSON}" + ${BASE_DIR}/cdot_json.py gff3_to_json "${filename}" --url "${url}" --genome-build=${GENOME_BUILD} --output "${cdot_file}" --gene-info-json="${GENE_INFO_JSON}" fi merge_args+=(${cdot_file}) done -merged_file="cdot-${CDOT_VERSION}.ensembl.CHM13v2.0.json.gz" +merged_file="cdot-${CDOT_VERSION}.ensembl.${GENOME_BUILD}.json.gz" if [[ ! -e ${merged_file} ]]; then - ${BASE_DIR}/cdot_json.py merge_historical ${merge_args[@]} --genome-build=CHM13v2.0 --output "${merged_file}" + ${BASE_DIR}/cdot_json.py merge_historical ${merge_args[@]} --genome-build=${GENOME_BUILD} --output "${merged_file}" fi diff --git a/generate_transcript_data/ensembl_transcripts_grch37.sh b/generate_transcript_data/ensembl_transcripts_grch37.sh index cba17a1..0d7db04 100755 --- a/generate_transcript_data/ensembl_transcripts_grch37.sh +++ b/generate_transcript_data/ensembl_transcripts_grch37.sh @@ -21,7 +21,7 @@ for release in 82 85 87; do # Switched to using GTFs as they contain protein version filename=Homo_sapiens.GRCh37.${release}.gff3.gz url=ftp://ftp.ensembl.org/pub/grch37/release-${release}/gff3/homo_sapiens/${filename} - cdot_file=cdot-${CDOT_VERSION}.$(basename $filename .gz).json.gz + cdot_file=cdot-${CDOT_VERSION}.ensembl.$(basename $filename .gz).json.gz if [[ ! -e ${filename} ]]; then wget ${url} fi diff --git a/generate_transcript_data/ensembl_transcripts_grch38.sh b/generate_transcript_data/ensembl_transcripts_grch38.sh index bf621d6..cccea40 100755 --- a/generate_transcript_data/ensembl_transcripts_grch38.sh +++ b/generate_transcript_data/ensembl_transcripts_grch38.sh @@ -29,7 +29,7 @@ for release in 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 # Switched to using GTFs as they contain protein version filename=Homo_sapiens.GRCh38.${release}.gff3.gz url=ftp://ftp.ensembl.org/pub/release-${release}/gff3/homo_sapiens/${filename} - cdot_file=cdot-${CDOT_VERSION}.$(basename $filename .gz).json.gz + cdot_file=cdot-${CDOT_VERSION}.ensembl.$(basename $filename .gz).json.gz if [[ ! -e ${filename} ]]; then wget ${url} diff --git a/cdot/json_encoders.py b/generate_transcript_data/json_encoders.py similarity index 100% rename from cdot/json_encoders.py rename to generate_transcript_data/json_encoders.py diff --git a/generate_transcript_data/json_schema_version.py b/generate_transcript_data/json_schema_version.py new file mode 100644 index 0000000..0fd4ae4 --- /dev/null +++ b/generate_transcript_data/json_schema_version.py @@ -0,0 +1,3 @@ +# After 0.2.22 we split version into separate code (pip) and data schema versions +# The cdot client will use its own major/minor to determine whether it can read these data files +JSON_SCHEMA_VERSION = "0.2.22" diff --git a/generate_transcript_data/refseq_transcripts_chm13v2.sh b/generate_transcript_data/refseq_transcripts_chm13v2.sh index 5e73a37..82f2b9f 100755 --- a/generate_transcript_data/refseq_transcripts_chm13v2.sh +++ b/generate_transcript_data/refseq_transcripts_chm13v2.sh @@ -4,6 +4,7 @@ set -e BASE_DIR=$(dirname ${BASH_SOURCE[0]}) CDOT_VERSION=$(${BASE_DIR}/cdot_json.py --version) +GENOME_BUILD=T2T-CHM13v2.0 if [[ -z ${GENE_INFO_JSON} ]]; then echo "You need to set environment variable GENE_INFO_JSON, pointing to the filename produced by cdot_gene_info.py" @@ -20,7 +21,7 @@ if [[ ! -e ${filename} ]]; then wget ${url} --output-document=${filename} fi if [[ ! -e ${cdot_file} ]]; then - ${BASE_DIR}/cdot_json.py gff3_to_json "${filename}" --url "${url}" --genome-build=CHM13v2.0 --output "${cdot_file}" --gene-info-json="${GENE_INFO_JSON}" + ${BASE_DIR}/cdot_json.py gff3_to_json "${filename}" --url "${url}" --genome-build=${GENOME_BUILD} --output "${cdot_file}" --gene-info-json="${GENE_INFO_JSON}" fi merge_args+=(${cdot_file}) @@ -33,12 +34,25 @@ if [[ ! -e ${filename} ]]; then wget ${url} --output-document=${filename} fi if [[ ! -e ${cdot_file} ]]; then - ${BASE_DIR}/cdot_json.py gff3_to_json "${filename}" --url "${url}" --genome-build=CHM13v2.0 --output "${cdot_file}" --gene-info-json="${GENE_INFO_JSON}" + ${BASE_DIR}/cdot_json.py gff3_to_json "${filename}" --url "${url}" --genome-build=${GENOME_BUILD} --output "${cdot_file}" --gene-info-json="${GENE_INFO_JSON}" fi merge_args+=(${cdot_file}) -merged_file="cdot-${CDOT_VERSION}.refseq.CHM13v2.0.json.gz" +filename=GCF_009914755.1_T2T-CHM13v2.0_genomic.RS_2023_10.gff.gz +url=https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/GCF_009914755.1-RS_2023_10/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz +cdot_file=cdot-${CDOT_VERSION}.$(basename $filename .gz).json.gz + +if [[ ! -e ${filename} ]]; then + wget ${url} --output-document=${filename} +fi +if [[ ! -e ${cdot_file} ]]; then + ${BASE_DIR}/cdot_json.py gff3_to_json "${filename}" --url "${url}" --genome-build=${GENOME_BUILD} --output "${cdot_file}" --gene-info-json="${GENE_INFO_JSON}" +fi +merge_args+=(${cdot_file}) + + +merged_file="cdot-${CDOT_VERSION}.refseq.${GENOME_BUILD}.json.gz" if [[ ! -e ${merged_file} ]]; then echo "Creating ${merged_file}" - ${BASE_DIR}/cdot_json.py merge_historical ${merge_args[@]} --genome-build=CHM13v2.0 --output "${merged_file}" + ${BASE_DIR}/cdot_json.py merge_historical ${merge_args[@]} --genome-build=${GENOME_BUILD} --output "${merged_file}" fi \ No newline at end of file diff --git a/generate_transcript_data/refseq_transcripts_grch38.sh b/generate_transcript_data/refseq_transcripts_grch38.sh index 213fdab..6b54ea6 100755 --- a/generate_transcript_data/refseq_transcripts_grch38.sh +++ b/generate_transcript_data/refseq_transcripts_grch38.sh @@ -153,10 +153,23 @@ if [[ ! -e ${cdot_file} ]]; then fi merge_args+=(${cdot_file}) -## Latest +## Dated versions filename=GCF_000001405.40_GRCh38.p14_genomic.RS_2023_03.gff.gz url=https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/GCF_000001405.40-RS_2023_03/GCF_000001405.40_GRCh38.p14_genomic.gff.gz +cdot_file=cdot-${CDOT_VERSION}.ensembl.$(basename $filename .gz).json.gz + +if [[ ! -e ${filename} ]]; then + wget ${url} --output-document=${filename} +fi +if [[ ! -e ${cdot_file} ]]; then + ${BASE_DIR}/cdot_json.py gff3_to_json "${filename}" --url "${url}" --genome-build=GRCh38 --output "${cdot_file}" --gene-info-json="${GENE_INFO_JSON}" +fi +merge_args+=(${cdot_file}) + + +filename=GCF_000001405.40_GRCh38.p14_genomic.RS_2023_10.gff.gz +url=https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/GCF_000001405.40-RS_2023_10/GCF_000001405.40_GRCh38.p14_genomic.gff.gz cdot_file=cdot-${CDOT_VERSION}.$(basename $filename .gz).json.gz if [[ ! -e ${filename} ]]; then @@ -168,6 +181,9 @@ fi merge_args+=(${cdot_file}) + + + merged_file="cdot-${CDOT_VERSION}.refseq.grch38.json.gz" if [[ ! -e ${merged_file} ]]; then echo "Creating ${merged_file}" diff --git a/setup.cfg b/setup.cfg index 22c4142..02f5d5d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -21,7 +21,7 @@ python_requires = >=3.8 install_requires = requests intervaltree - bioutils + bioutils>=0.5.8 lazy [options.packages.find]