Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add python3 support #14

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion run_index.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from __future__ import absolute_import
from __future__ import print_function
from veppy.utils.fasta import GenbankFastaFile
from veppy.feature_file import GencodeGtfFile
from veppy.feature_file import NcbiMapViewFile

print "Indexing FASTA and feature files, this may take 10 minutes or more..."
print("Indexing FASTA and feature files, this may take 10 minutes or more...")

fasta_file = GenbankFastaFile.for_build("GRCh37")
feature_file_gencode = GencodeGtfFile.for_build('GRCh37')
Expand Down
3 changes: 2 additions & 1 deletion veppy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import absolute_import
import os
import sys
import logging
Expand Down Expand Up @@ -51,7 +52,7 @@
SUPPORTED_BUILDS = os.environ.get(
'SUPPORTED_BUILDS', 'GRCh37').split(',')

SUPPORTED_GENE_MODELS = SOURCE_DATA['GRCh37']['feature'].keys()
SUPPORTED_GENE_MODELS = list(SOURCE_DATA['GRCh37']['feature'].keys())

FEATURE_LIMIT = int(os.getenv('FEATURE_LIMIT', sys.maxint))

Expand Down
3 changes: 2 additions & 1 deletion veppy/consequences.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import functions
from __future__ import absolute_import
from . import functions


def to_dict(csq):
Expand Down
6 changes: 3 additions & 3 deletions veppy/feature_file.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import absolute_import
import os
import logging
from glob import glob
Expand All @@ -20,14 +21,13 @@
from . import errors
from . import SUPPORTED_BUILDS
from . import FEATURE_LIMIT
import six


logger = logging.getLogger('veppy')


class FeatureFile(object):
__metaclass__ = ABCMeta

class FeatureFile(six.with_metaclass(ABCMeta, object)):
class BoundedCache(object):
def __init__(self, size=1024):
self._cache = [None] * size
Expand Down
7 changes: 5 additions & 2 deletions veppy/features.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import absolute_import
import sys
import re
import logging
Expand All @@ -7,6 +8,8 @@
from collections import defaultdict, namedtuple

from . import errors
from six.moves import map
from six.moves import range

logger = logging.getLogger('veppy')

Expand Down Expand Up @@ -265,7 +268,7 @@ def build(self, data, force=False):
# start counting at 1!
feature.number = i + 1
feature.total = len(features)
except Exception, e:
except Exception as e:
logger.warn('Invalid feature %s' % self)
logger.warn(e)
self.errors = True
Expand Down Expand Up @@ -629,7 +632,7 @@ def to_genomic_ranges(self, coding_start, coding_stop):
return genomic_ranges

def __str__(self):
return 'coding sequences: %s' % map(str, self._tree)
return 'coding sequences: %s' % list(map(str, self._tree))


class EnsemblTranscript(Transcript):
Expand Down
31 changes: 16 additions & 15 deletions veppy/sequence.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from __future__ import absolute_import
import sys
from collections import namedtuple

from .features import EffectiveVariant
from .utils import codons
from .utils import fasta
from six.moves import map
from six.moves import range
from six.moves import zip


Coordinate = \
Expand Down Expand Up @@ -207,7 +211,7 @@ def get_amino_acids(coding_coordinates, coding_sequence):
(ref_seq, ref_genomic_coordinates) = self.build(
variant.chromosome, ref_coding_start, ref_coding_stop)
ref_coding_coordinates = \
range(ref_coding_start, ref_coding_stop + 1)
list(range(ref_coding_start, ref_coding_stop + 1))

# build symmetric upstream/downstream codon buffer around ref
# TODO: need to bound upstream_seq and downstream_seq
Expand All @@ -224,7 +228,7 @@ def get_amino_acids(coding_coordinates, coding_sequence):
variant.chromosome, upstream_coding_start,
upstream_coding_stop)
upstream_coding_coordinates = \
range(upstream_coding_start, upstream_coding_stop + 1)
list(range(upstream_coding_start, upstream_coding_stop + 1))

# downstream
(downstream_coding_start, downstream_coding_stop) = (
Expand All @@ -236,28 +240,25 @@ def get_amino_acids(coding_coordinates, coding_sequence):
variant.chromosome, downstream_coding_start,
downstream_coding_stop)
downstream_coding_coordinates = \
range(downstream_coding_start, downstream_coding_stop + 1)
list(range(downstream_coding_start, downstream_coding_stop + 1))

# -- Alternate
# build alt seq...
# complement if negative strand...
if self.strand == '-':
(ref_seq, alt_allele, upstream_seq, downstream_seq) = map(
(ref_seq, alt_allele, upstream_seq, downstream_seq) = list(map(
CodingSequenceBuilder.complement_sequence,
(ref_seq, alt_allele, upstream_seq, downstream_seq)
)
))
(
ref_genomic_coordinates,
upstream_genomic_coordinates,
downstream_genomic_coordinates
) = map(
lambda x: x[::-1],
(
ref_genomic_coordinates,
upstream_genomic_coordinates,
downstream_genomic_coordinates
)
)
) = [x[::-1] for x in (
ref_genomic_coordinates,
upstream_genomic_coordinates,
downstream_genomic_coordinates
)]

if variant.is_insertion:
i0 = coding_start - coding_start_round
Expand Down Expand Up @@ -318,7 +319,7 @@ def get_amino_acids(coding_coordinates, coding_sequence):

# NOTE: these are all CODING (!!!) sequences
return BuilderResult(
range(coding_start, coding_stop + 1),
list(range(coding_start, coding_stop + 1)),
CodingSequence(
ref_seq,
ref_genomic_coordinates,
Expand Down Expand Up @@ -376,7 +377,7 @@ def build(self, chromosome, coding_start, coding_stop):
codon_genomic_coordinates = []
for gr in genomic_ranges:
codon_genomic_coordinates.extend(
range(gr.start, gr.stop + 1)
list(range(gr.start, gr.stop + 1))
)

return (seq, codon_genomic_coordinates)
1 change: 1 addition & 0 deletions veppy/splice_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import absolute_import
from collections import namedtuple

FeatureRange = namedtuple('FeatureRange', ['start', 'stop'])
Expand Down
23 changes: 13 additions & 10 deletions veppy/tests/test_veppy.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
import logging
Expand All @@ -6,6 +8,7 @@

from veppy.veppy import calculate_consequences
from veppy.utils.readers import VcfReader
from six.moves import map

logger = logging.getLogger('test')

Expand Down Expand Up @@ -78,18 +81,18 @@ def _eff_set(csqs):
set(required_effects), set(pruned_results)
)

except AssertionError, e:
print '-------------------------------------'
print '-------------------------------------'
print ' VARIANT: ', str(variant)
print ' REQUIRED: ', list(required_effects)
print 'DISALLOWED: ', disallowed_effects
print ' RESULTS: ', map(
except AssertionError as e:
print('-------------------------------------')
print('-------------------------------------')
print(' VARIANT: ', str(variant))
print(' REQUIRED: ', list(required_effects))
print('DISALLOWED: ', disallowed_effects)
print(' RESULTS: ', list(map(
str,
transcript_results.get(transcript_id, [])
)
print '-------------------------------------'
print '-------------------------------------'
)))
print('-------------------------------------')
print('-------------------------------------')
raise AssertionError(e)

def _test(self, filepath):
Expand Down
2 changes: 2 additions & 0 deletions veppy/utils/codons.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from __future__ import absolute_import
from collections import defaultdict
from six.moves import range

CODONS_FULL = {
# Alanine
Expand Down
1 change: 1 addition & 0 deletions veppy/utils/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import absolute_import
import os
import gzip
import pickle
Expand Down
12 changes: 6 additions & 6 deletions veppy/utils/fasta.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import absolute_import
import os
import logging

Expand All @@ -15,22 +16,21 @@
from .. import SOURCE_DATA_MD5
from .. import MD5_CHECK_FAIL
from .. import errors
import six
from six.moves import range

logger = logging.getLogger('veppy')


class FastaFile(object):
__metaclass__ = ABCMeta

# -- Util methods
class FastaFile(six.with_metaclass(ABCMeta, object)):
@classmethod
def load_build(klass, build):
try:
translated_build = supported_build(build)
if not klass.BUILD_CACHE.get(translated_build):
klass.BUILD_CACHE[translated_build] = \
klass(build, klass.filepath_for_build(translated_build))
except (FastaNotFound, errors.FastaFileException), e:
except (FastaNotFound, errors.FastaFileException) as e:
logger.fatal(
'Error loading FastA file for build %s: %s' % (build, e)
)
Expand Down Expand Up @@ -101,7 +101,7 @@ def __init__(self, build, filepath, index=False):

# TODO: eventually, this should be dynamic and file-specific
self.chromosomes = \
map(lambda x: str(x), range(1, 23)) + ['MT', 'X', 'Y']
[str(x) for x in range(1, 23)] + ['MT', 'X', 'Y']

def get(self, chromosome, start, stop):
fasta_chromosome = self.get_chromosome(chromosome)
Expand Down
3 changes: 2 additions & 1 deletion veppy/utils/helpers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import absolute_import
import os
import logging

Expand Down Expand Up @@ -70,7 +71,7 @@ def set_defaults(transcripts):
for transcript in transcripts:
_groups[transcript.gene].append(transcript)

default_gene = get_default_gene(_groups.keys())
default_gene = get_default_gene(list(_groups.keys()))
if default_gene:
default_gene.default = True

Expand Down
1 change: 1 addition & 0 deletions veppy/utils/md5.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import absolute_import
import hashlib


Expand Down
10 changes: 6 additions & 4 deletions veppy/utils/readers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import absolute_import
import csv
import gzip
import logging
from cStringIO import StringIO
from six.moves import map

logger = logging.getLogger('veppy')

Expand Down Expand Up @@ -90,9 +92,9 @@ def _check_headers(self):
self._headers = list(self.headercols)

def next(self):
parsed_row = self._process_next(super(CsvReader, self).next())
parsed_row = self._process_next(next(super(CsvReader, self)))
while parsed_row is None:
parsed_row = self._process_next(super(CsvReader, self).next())
parsed_row = self._process_next(next(super(CsvReader, self)))
return parsed_row

def _process_next(self, line):
Expand Down Expand Up @@ -289,12 +291,12 @@ def vcf_to_dict(self, vcf_obj):
'stop': vcf_obj.POS + len(vcf_obj.REF) - 1,
'chromosome': vcf_obj.CHROM,
'reference_allele': vcf_obj.REF,
'alternate_alleles': map(self._get_alternate, vcf_obj.ALT),
'alternate_alleles': list(map(self._get_alternate, vcf_obj.ALT)),
'info': vcf_obj.INFO
}

def __iter__(self):
return self

def next(self):
return self.vcf_to_dict(self.reader.next())
return self.vcf_to_dict(next(self.reader))
2 changes: 2 additions & 0 deletions veppy/veppy.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import absolute_import
import logging

from . import features
Expand All @@ -8,6 +9,7 @@
from .sequence import CodingSequenceBuilder
from .splice_model import SpliceJunctionModel
from .utils.codons import split_into_codons
from six.moves import range

logger = logging.getLogger('veppy')

Expand Down