From 5c66df4c458bb2b11eef68853f4b16c6044e7e5a Mon Sep 17 00:00:00 2001 From: Gargi Rajadhyaksha Date: Mon, 10 Oct 2016 15:22:27 -0400 Subject: [PATCH] Add python3 support --- run_index.py | 4 +++- veppy/__init__.py | 3 ++- veppy/consequences.py | 3 ++- veppy/feature_file.py | 6 +++--- veppy/features.py | 7 +++++-- veppy/sequence.py | 31 ++++++++++++++++--------------- veppy/splice_model.py | 1 + veppy/tests/test_veppy.py | 23 +++++++++++++---------- veppy/utils/codons.py | 2 ++ veppy/utils/data/__init__.py | 1 + veppy/utils/fasta.py | 12 ++++++------ veppy/utils/helpers.py | 3 ++- veppy/utils/md5.py | 1 + veppy/utils/readers.py | 10 ++++++---- veppy/veppy.py | 2 ++ 15 files changed, 65 insertions(+), 44 deletions(-) diff --git a/run_index.py b/run_index.py index 8ba8023..91dfac6 100644 --- a/run_index.py +++ b/run_index.py @@ -1,8 +1,10 @@ +from __future__ import absolute_import +from __future__ import print_function from veppy.utils.fasta import GenbankFastaFile from veppy.feature_file import GencodeGtfFile from veppy.feature_file import NcbiMapViewFile -print "Indexing FASTA and feature files, this may take 10 minutes or more..." +print("Indexing FASTA and feature files, this may take 10 minutes or more...") fasta_file = GenbankFastaFile.for_build("GRCh37") feature_file_gencode = GencodeGtfFile.for_build('GRCh37') diff --git a/veppy/__init__.py b/veppy/__init__.py index ed5b7e1..bdfc3b6 100644 --- a/veppy/__init__.py +++ b/veppy/__init__.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import os import sys import logging @@ -51,7 +52,7 @@ SUPPORTED_BUILDS = os.environ.get( 'SUPPORTED_BUILDS', 'GRCh37').split(',') -SUPPORTED_GENE_MODELS = SOURCE_DATA['GRCh37']['feature'].keys() +SUPPORTED_GENE_MODELS = list(SOURCE_DATA['GRCh37']['feature'].keys()) FEATURE_LIMIT = int(os.getenv('FEATURE_LIMIT', sys.maxint)) diff --git a/veppy/consequences.py b/veppy/consequences.py index f763de2..061fe3a 100644 --- a/veppy/consequences.py +++ b/veppy/consequences.py @@ -1,4 +1,5 @@ -import functions +from __future__ import absolute_import +from . import functions def to_dict(csq): diff --git a/veppy/feature_file.py b/veppy/feature_file.py index 4449c05..0c7eb24 100644 --- a/veppy/feature_file.py +++ b/veppy/feature_file.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import os import logging from glob import glob @@ -20,14 +21,13 @@ from . import errors from . import SUPPORTED_BUILDS from . import FEATURE_LIMIT +import six logger = logging.getLogger('veppy') -class FeatureFile(object): - __metaclass__ = ABCMeta - +class FeatureFile(six.with_metaclass(ABCMeta, object)): class BoundedCache(object): def __init__(self, size=1024): self._cache = [None] * size diff --git a/veppy/features.py b/veppy/features.py index 4a1f60f..5bebb92 100644 --- a/veppy/features.py +++ b/veppy/features.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import sys import re import logging @@ -7,6 +8,8 @@ from collections import defaultdict, namedtuple from . import errors +from six.moves import map +from six.moves import range logger = logging.getLogger('veppy') @@ -265,7 +268,7 @@ def build(self, data, force=False): # start counting at 1! feature.number = i + 1 feature.total = len(features) - except Exception, e: + except Exception as e: logger.warn('Invalid feature %s' % self) logger.warn(e) self.errors = True @@ -629,7 +632,7 @@ def to_genomic_ranges(self, coding_start, coding_stop): return genomic_ranges def __str__(self): - return 'coding sequences: %s' % map(str, self._tree) + return 'coding sequences: %s' % list(map(str, self._tree)) class EnsemblTranscript(Transcript): diff --git a/veppy/sequence.py b/veppy/sequence.py index 3f50a2c..e4d6afd 100644 --- a/veppy/sequence.py +++ b/veppy/sequence.py @@ -1,9 +1,13 @@ +from __future__ import absolute_import import sys from collections import namedtuple from .features import EffectiveVariant from .utils import codons from .utils import fasta +from six.moves import map +from six.moves import range +from six.moves import zip Coordinate = \ @@ -207,7 +211,7 @@ def get_amino_acids(coding_coordinates, coding_sequence): (ref_seq, ref_genomic_coordinates) = self.build( variant.chromosome, ref_coding_start, ref_coding_stop) ref_coding_coordinates = \ - range(ref_coding_start, ref_coding_stop + 1) + list(range(ref_coding_start, ref_coding_stop + 1)) # build symmetric upstream/downstream codon buffer around ref # TODO: need to bound upstream_seq and downstream_seq @@ -224,7 +228,7 @@ def get_amino_acids(coding_coordinates, coding_sequence): variant.chromosome, upstream_coding_start, upstream_coding_stop) upstream_coding_coordinates = \ - range(upstream_coding_start, upstream_coding_stop + 1) + list(range(upstream_coding_start, upstream_coding_stop + 1)) # downstream (downstream_coding_start, downstream_coding_stop) = ( @@ -236,28 +240,25 @@ def get_amino_acids(coding_coordinates, coding_sequence): variant.chromosome, downstream_coding_start, downstream_coding_stop) downstream_coding_coordinates = \ - range(downstream_coding_start, downstream_coding_stop + 1) + list(range(downstream_coding_start, downstream_coding_stop + 1)) # -- Alternate # build alt seq... # complement if negative strand... if self.strand == '-': - (ref_seq, alt_allele, upstream_seq, downstream_seq) = map( + (ref_seq, alt_allele, upstream_seq, downstream_seq) = list(map( CodingSequenceBuilder.complement_sequence, (ref_seq, alt_allele, upstream_seq, downstream_seq) - ) + )) ( ref_genomic_coordinates, upstream_genomic_coordinates, downstream_genomic_coordinates - ) = map( - lambda x: x[::-1], - ( - ref_genomic_coordinates, - upstream_genomic_coordinates, - downstream_genomic_coordinates - ) - ) + ) = [x[::-1] for x in ( + ref_genomic_coordinates, + upstream_genomic_coordinates, + downstream_genomic_coordinates + )] if variant.is_insertion: i0 = coding_start - coding_start_round @@ -318,7 +319,7 @@ def get_amino_acids(coding_coordinates, coding_sequence): # NOTE: these are all CODING (!!!) sequences return BuilderResult( - range(coding_start, coding_stop + 1), + list(range(coding_start, coding_stop + 1)), CodingSequence( ref_seq, ref_genomic_coordinates, @@ -376,7 +377,7 @@ def build(self, chromosome, coding_start, coding_stop): codon_genomic_coordinates = [] for gr in genomic_ranges: codon_genomic_coordinates.extend( - range(gr.start, gr.stop + 1) + list(range(gr.start, gr.stop + 1)) ) return (seq, codon_genomic_coordinates) diff --git a/veppy/splice_model.py b/veppy/splice_model.py index 032af47..18e7d0a 100644 --- a/veppy/splice_model.py +++ b/veppy/splice_model.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import from collections import namedtuple FeatureRange = namedtuple('FeatureRange', ['start', 'stop']) diff --git a/veppy/tests/test_veppy.py b/veppy/tests/test_veppy.py index 9ef7f9f..ac141ae 100644 --- a/veppy/tests/test_veppy.py +++ b/veppy/tests/test_veppy.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import +from __future__ import print_function import os import sys import logging @@ -6,6 +8,7 @@ from veppy.veppy import calculate_consequences from veppy.utils.readers import VcfReader +from six.moves import map logger = logging.getLogger('test') @@ -78,18 +81,18 @@ def _eff_set(csqs): set(required_effects), set(pruned_results) ) - except AssertionError, e: - print '-------------------------------------' - print '-------------------------------------' - print ' VARIANT: ', str(variant) - print ' REQUIRED: ', list(required_effects) - print 'DISALLOWED: ', disallowed_effects - print ' RESULTS: ', map( + except AssertionError as e: + print('-------------------------------------') + print('-------------------------------------') + print(' VARIANT: ', str(variant)) + print(' REQUIRED: ', list(required_effects)) + print('DISALLOWED: ', disallowed_effects) + print(' RESULTS: ', list(map( str, transcript_results.get(transcript_id, []) - ) - print '-------------------------------------' - print '-------------------------------------' + ))) + print('-------------------------------------') + print('-------------------------------------') raise AssertionError(e) def _test(self, filepath): diff --git a/veppy/utils/codons.py b/veppy/utils/codons.py index 68b27ad..e7dfe4e 100644 --- a/veppy/utils/codons.py +++ b/veppy/utils/codons.py @@ -1,4 +1,6 @@ +from __future__ import absolute_import from collections import defaultdict +from six.moves import range CODONS_FULL = { # Alanine diff --git a/veppy/utils/data/__init__.py b/veppy/utils/data/__init__.py index e566cd4..ebd6d70 100644 --- a/veppy/utils/data/__init__.py +++ b/veppy/utils/data/__init__.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import os import gzip import pickle diff --git a/veppy/utils/fasta.py b/veppy/utils/fasta.py index 2eb18d0..515fd33 100644 --- a/veppy/utils/fasta.py +++ b/veppy/utils/fasta.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import os import logging @@ -15,14 +16,13 @@ from .. import SOURCE_DATA_MD5 from .. import MD5_CHECK_FAIL from .. import errors +import six +from six.moves import range logger = logging.getLogger('veppy') -class FastaFile(object): - __metaclass__ = ABCMeta - - # -- Util methods +class FastaFile(six.with_metaclass(ABCMeta, object)): @classmethod def load_build(klass, build): try: @@ -30,7 +30,7 @@ def load_build(klass, build): if not klass.BUILD_CACHE.get(translated_build): klass.BUILD_CACHE[translated_build] = \ klass(build, klass.filepath_for_build(translated_build)) - except (FastaNotFound, errors.FastaFileException), e: + except (FastaNotFound, errors.FastaFileException) as e: logger.fatal( 'Error loading FastA file for build %s: %s' % (build, e) ) @@ -101,7 +101,7 @@ def __init__(self, build, filepath, index=False): # TODO: eventually, this should be dynamic and file-specific self.chromosomes = \ - map(lambda x: str(x), range(1, 23)) + ['MT', 'X', 'Y'] + [str(x) for x in range(1, 23)] + ['MT', 'X', 'Y'] def get(self, chromosome, start, stop): fasta_chromosome = self.get_chromosome(chromosome) diff --git a/veppy/utils/helpers.py b/veppy/utils/helpers.py index fe02c36..1b5344b 100644 --- a/veppy/utils/helpers.py +++ b/veppy/utils/helpers.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import os import logging @@ -70,7 +71,7 @@ def set_defaults(transcripts): for transcript in transcripts: _groups[transcript.gene].append(transcript) - default_gene = get_default_gene(_groups.keys()) + default_gene = get_default_gene(list(_groups.keys())) if default_gene: default_gene.default = True diff --git a/veppy/utils/md5.py b/veppy/utils/md5.py index e0c4716..1f897d2 100644 --- a/veppy/utils/md5.py +++ b/veppy/utils/md5.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import hashlib diff --git a/veppy/utils/readers.py b/veppy/utils/readers.py index f1bfdb5..6d3f12f 100644 --- a/veppy/utils/readers.py +++ b/veppy/utils/readers.py @@ -1,7 +1,9 @@ +from __future__ import absolute_import import csv import gzip import logging from cStringIO import StringIO +from six.moves import map logger = logging.getLogger('veppy') @@ -90,9 +92,9 @@ def _check_headers(self): self._headers = list(self.headercols) def next(self): - parsed_row = self._process_next(super(CsvReader, self).next()) + parsed_row = self._process_next(next(super(CsvReader, self))) while parsed_row is None: - parsed_row = self._process_next(super(CsvReader, self).next()) + parsed_row = self._process_next(next(super(CsvReader, self))) return parsed_row def _process_next(self, line): @@ -289,7 +291,7 @@ def vcf_to_dict(self, vcf_obj): 'stop': vcf_obj.POS + len(vcf_obj.REF) - 1, 'chromosome': vcf_obj.CHROM, 'reference_allele': vcf_obj.REF, - 'alternate_alleles': map(self._get_alternate, vcf_obj.ALT), + 'alternate_alleles': list(map(self._get_alternate, vcf_obj.ALT)), 'info': vcf_obj.INFO } @@ -297,4 +299,4 @@ def __iter__(self): return self def next(self): - return self.vcf_to_dict(self.reader.next()) + return self.vcf_to_dict(next(self.reader)) diff --git a/veppy/veppy.py b/veppy/veppy.py index d651f09..19e4c95 100644 --- a/veppy/veppy.py +++ b/veppy/veppy.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import logging from . import features @@ -8,6 +9,7 @@ from .sequence import CodingSequenceBuilder from .splice_model import SpliceJunctionModel from .utils.codons import split_into_codons +from six.moves import range logger = logging.getLogger('veppy')