Skip to content

Commit

Permalink
and a bunch of pre-commit
Browse files Browse the repository at this point in the history
  • Loading branch information
MattWellie committed Jan 10, 2024
1 parent f11d597 commit dad420b
Show file tree
Hide file tree
Showing 39 changed files with 187 additions and 183 deletions.
14 changes: 10 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,30 @@ repos:
- id: check-added-large-files

- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.37.0
rev: v0.38.0
hooks:
- id: markdownlint
args: ["--config", ".markdownlint.json"]

- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
name: isort (python)

- repo: https://github.com/ambv/black
rev: 22.10.0
rev: 23.12.1
hooks:
- id: black

- repo: https://github.com/charliermarsh/ruff-pre-commit
# Ruff version.
rev: "v0.1.0"
rev: v0.1.11
hooks:
- id: ruff

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.6.1
rev: v1.8.0
hooks:
- id: mypy
args:
Expand Down
25 changes: 10 additions & 15 deletions comparison/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,37 @@
"""
# mypy: ignore-errors
import json
import logging
import os
import re
import sys
from argparse import ArgumentParser
from collections import defaultdict
from csv import DictReader
from enum import Enum
import logging
import re
import sys
from typing import Any

from argparse import ArgumentParser
from cloudpathlib import AnyPath
from cyvcf2 import VCFReader
import hail as hl
from peddy import Ped

import hail as hl

from cpg_utils.config import get_config
from cpg_utils.hail_batch import init_batch

from reanalysis.hail_filter_and_label import (
CONFLICTING,
LOFTEE_HC,
PATHOGENIC,
extract_annotations,
filter_matrix_by_ac,
filter_on_quality_flags,
filter_to_population_rare,
filter_to_well_normalised,
green_and_new_from_panelapp,
CONFLICTING,
LOFTEE_HC,
PATHOGENIC,
)

from reanalysis.utils import read_json_from_path, canonical_contigs_from_vcf
from reanalysis.utils import canonical_contigs_from_vcf, read_json_from_path

SAMPLE_NUM_RE = re.compile(r'sample_[0-9]+')
SAMPLE_ALT_TEMPLATE = 'num_alt_alleles_{}'
Expand Down Expand Up @@ -167,7 +167,6 @@ def common_format_aip(results_dict: dict[str, Any]) -> CommonDict:

# collect all per-sample results into a separate index
for sample, variants in results_dict.items():

for var in variants:
coords = var['var_data']['coords']
sample_dict[sample].append(
Expand Down Expand Up @@ -207,7 +206,6 @@ def common_format_seqr(seqr: str, affected: list[str]) -> CommonDict:
]

for entry in seqr_parser:

# get all valid tags
tags = [
Confidence(tag)
Expand Down Expand Up @@ -324,7 +322,6 @@ def find_missing(aip_results: CommonDict, seqr_results: CommonDict) -> CommonDic
)

for sample in common_samples:

# only finds discrepancies, not Matched results - revise
sample_discrepancies = [
variant
Expand Down Expand Up @@ -388,12 +385,10 @@ def check_in_vcf(vcf_path: str, variants: CommonDict) -> tuple[CommonDict, Commo
# iterate over all samples, and corresponding lists
for sample, var_list in variants.items():
for var in var_list:

# assume missing until confirmed otherwise
found = False
normalised_chrom, coordinates = var.get_cyvcf2_pos(vcf_contigs)
for vcf_var in vcf_handler(coordinates):

# check position and alleles
if (
vcf_var.CHROM == normalised_chrom
Expand Down
8 changes: 3 additions & 5 deletions comparison/comparison_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,24 @@
import logging
import os
import sys

from argparse import ArgumentParser

import hailtop.batch as hb

from cpg_utils.config import get_config
from cpg_utils.git import (
prepare_git_job,
get_git_commit_ref_of_current_repository,
get_organisation_name_from_current_directory,
get_repo_name_from_current_directory,
prepare_git_job,
)
from cpg_utils.hail_batch import (
authenticate_cloud_credentials_in_job,
copy_common_env,
image_path,
remote_tmpdir,
output_path,
remote_tmpdir,
)
from cpg_utils.config import get_config


# local script references
COMPARISON_SCRIPT = os.path.join(os.path.dirname(__file__), 'comparison.py')
Expand Down
2 changes: 0 additions & 2 deletions helpers/forbidden_gene_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ def find_version(panel_id: int, all_dates: list[str]) -> dict[str, str | None]:

# iterate through all activities on this panel
for activity in activities:

# cast the activity datetime to day-resolution
activity_date = datetime.strptime(
activity['created'].split('T')[0], '%Y-%m-%d'
Expand Down Expand Up @@ -200,7 +199,6 @@ def main(panels: str | None, out_path: str, dates: list[str]):

# check over all dates
for date in dates:

logging.info(f'Running the date {date}')

date_genes = set()
Expand Down
13 changes: 7 additions & 6 deletions helpers/prepare_aip_cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,21 @@
- generates the cohort-specific TOML file
- tweaks for making singleton versions of the given cohort
"""
# mypy: ignore-errors
from argparse import ArgumentParser
from itertools import product
import json
import logging
import os
import toml

from cpg_utils import to_path, Path
# mypy: ignore-errors
from argparse import ArgumentParser
from itertools import product

import toml

from cpg_utils import Path, to_path
from metamist.graphql import gql, query

from reanalysis.utils import read_json_from_path, get_cohort_config
from reanalysis.hpo_panel_match import main as hpo_match
from reanalysis.utils import get_cohort_config, read_json_from_path

BUCKET_TEMPLATE = 'gs://cpg-{dataset}-test-analysis/reanalysis'
LOCAL_TEMPLATE = 'inputs/{dataset}'
Expand Down
24 changes: 24 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,29 @@
line-length = 88
skip-string-normalization = true

[tool.isort]
py_version = 311
profile = "black"
line_length = 88
sections = ["FUTURE", "STDLIB", "THIRDPARTY", "HAIL", "CPG", "FIRSTPARTY", "LOCALFOLDER"]
known_hail = [
"hail",
"hailtop",
]
# Adjust these for each repository, e.g., removing those that should be
# local rather than CPG. Also fill in extend_skip below if there are any
# subdirectories that should be ignored.
known_cpg = [
"analysis_runner",
"cpg_infra",
"cpg_utils",
"cpg_workflows",
"gnomad",
"hail_scripts",
"metamist",
]
# extend_skip = ["list", "submodules", "etc", here"]

[tool.ruff]
line-length = 88
extend-select = ["T201"]
2 changes: 2 additions & 0 deletions reanalysis/clinvar_by_codon.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
"""

import click

import hail as hl

from cpg_utils.hail_batch import init_batch


Expand Down
8 changes: 5 additions & 3 deletions reanalysis/clinvar_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,18 @@
from os.path import join

import click
from cpg_utils import to_path, Path

from hailtop.batch.job import Job

from cpg_utils import Path, to_path
from cpg_utils.config import get_config
from cpg_utils.hail_batch import (
authenticate_cloud_credentials_in_job,
get_batch,
query_command,
)
from hailtop.batch.job import Job

from reanalysis import clinvar_by_codon, summarise_clinvar_entries, seqr_loader
from reanalysis import clinvar_by_codon, seqr_loader, summarise_clinvar_entries
from reanalysis.vep_jobs import add_vep_jobs


Expand Down
3 changes: 2 additions & 1 deletion reanalysis/data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
from enum import Enum
from os.path import join

import hail as hl
from cloudpathlib import AnyPath

import hail as hl


class CustomEncoder(json.JSONEncoder):
"""
Expand Down
9 changes: 5 additions & 4 deletions reanalysis/hail_filter_and_label.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,21 @@
from argparse import ArgumentParser
from datetime import datetime

from peddy import Ped

import hail as hl

from cpg_utils import to_path
from cpg_utils.config import get_config
from cpg_utils.hail_batch import init_batch, output_path
from peddy import Ped

from reanalysis.hail_audit import (
fields_audit,
vep_audit,
BASE_FIELDS_REQUIRED,
FIELDS_REQUIRED,
USELESS_FIELDS,
VEP_TX_FIELDS_REQUIRED,
fields_audit,
vep_audit,
)
from reanalysis.utils import read_json_from_path

Expand Down Expand Up @@ -62,7 +64,6 @@ def get_clinvar_table(key: str = 'clinvar_decisions') -> str | None:

clinvar_table = get_config()['workflow'].get(key)
if clinvar_table is not None:

if to_path(clinvar_table).exists():
logging.info(f'Using clinvar table {clinvar_table}')
return clinvar_table
Expand Down
8 changes: 4 additions & 4 deletions reanalysis/hail_filter_sv.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,16 @@
from argparse import ArgumentParser

import hail as hl

from cpg_utils import to_path
from cpg_utils.config import get_config
from cpg_utils.hail_batch import init_batch, genome_build
from cpg_utils.hail_batch import genome_build, init_batch

from reanalysis.hail_filter_and_label import (
MISSING_INT,
ONE_INT,
green_and_new_from_panelapp,
subselect_mt_to_pedigree,
ONE_INT,
MISSING_INT,
)
from reanalysis.static_values import get_logger
from reanalysis.utils import read_json_from_path
Expand Down Expand Up @@ -190,7 +191,6 @@ def main(


if __name__ == '__main__':

# general CLI identical to the small variant version
parser = ArgumentParser()
parser.add_argument('--mt', required=True, help='path to input MT')
Expand Down
6 changes: 3 additions & 3 deletions reanalysis/hpo_panel_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@

import networkx
import requests
from obonet import read_obo

from cpg_utils import to_path
from metamist.graphql import gql, query
from obonet import read_obo

from reanalysis.models import PhenotypeMatchedPanels, ParticipantHPOPanels
from reanalysis.models import ParticipantHPOPanels, PhenotypeMatchedPanels

HPO_KEY = 'HPO Terms (present)'
HPO_RE = re.compile(r'HP:[0-9]+')
Expand Down Expand Up @@ -81,7 +82,6 @@ def get_panels(endpoint: str = PANELS_ENDPOINT) -> dict[str, set[int]]:
while True:
endpoint_data = get_json_response(endpoint)
for panel in endpoint_data['results']:

# can be split over multiple strings
relevant_disorders = ' '.join(panel['relevant_disorders'] or [])
for match in re.findall(HPO_RE, relevant_disorders):
Expand Down
7 changes: 3 additions & 4 deletions reanalysis/html_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@

import jinja2
import pandas as pd
from cpg_utils import to_path
from peddy.peddy import Ped

from cpg_utils import to_path

from reanalysis.models import (
PanelApp,
PanelDetail,
Expand All @@ -22,9 +23,9 @@
StructuralVariant,
)
from reanalysis.utils import (
get_config,
get_cohort_config,
get_cohort_seq_type_conf,
get_config,
get_logger,
read_json_from_path,
)
Expand Down Expand Up @@ -174,7 +175,6 @@ def get_summary_stats(
ext_label_map: dict = self.ext_labels.copy() if self.ext_labels else {}

for sample in self.samples:

if len(sample.variants) == 0:
samples_with_no_variants.append(sample.ext_id)

Expand All @@ -184,7 +184,6 @@ def get_summary_stats(

# iterate over the list of variants
for variant in sample.variants:

var_string = variant.var_data.coordinates.string_format
unique_variants['any'].add(var_string)
sample_variants['any'].add(var_string)
Expand Down
Loading

0 comments on commit dad420b

Please sign in to comment.