Skip to content

Commit

Permalink
Merge pull request galaxyproject#2429 from shiltemann/mothur_datatypes
Browse files Browse the repository at this point in the history
small updates to Mothur datatypes
  • Loading branch information
bgruening committed Jun 3, 2016
2 parents 697b01c + abdae95 commit 30fd1d6
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 51 deletions.
7 changes: 4 additions & 3 deletions config/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -317,9 +317,9 @@
<datatype extension="sif" type="galaxy.datatypes.graph:Sif" display_in_upload="true"/>
<!-- datatypes storing triples -->
<datatype extension="triples" type="galaxy.datatypes.triples:Triples" display_in_upload="false"/>
<datatype extension="nt" type="galaxy.datatypes.triples:NTriples" display_in_upload="true"/>
<datatype extension="n3" type="galaxy.datatypes.triples:N3" display_in_upload="true"/>
<datatype extension="ttl" type="galaxy.datatypes.triples:Turtle" display_in_upload="true"/>
<datatype extension="nt" type="galaxy.datatypes.triples:NTriples" display_in_upload="true"/>
<datatype extension="n3" type="galaxy.datatypes.triples:N3" display_in_upload="true"/>
<datatype extension="ttl" type="galaxy.datatypes.triples:Turtle" display_in_upload="true"/>
<datatype extension="rdf" type="galaxy.datatypes.triples:Rdf" display_in_upload="true"/>
<datatype extension="jsonld" type="galaxy.datatypes.triples:Jsonld" display_in_upload="true"/>
<!-- Excel datatypes -->
Expand Down Expand Up @@ -512,6 +512,7 @@
<datatype extension="mothur.align.report" type="galaxy.datatypes.mothur:AlignReport" display_in_upload="true"/>
<datatype extension="mothur.filter" type="galaxy.datatypes.mothur:LaneMask" display_in_upload="true"/>
<datatype extension="mothur.dist" type="galaxy.datatypes.mothur:DistanceMatrix" display_in_upload="true"/>
<datatype extension="mothur.tre" type="galaxy.datatypes.data:Text" subclass="True" display_in_upload="true"/>
<datatype extension="mothur.pair.dist" type="galaxy.datatypes.mothur:PairwiseDistanceMatrix" display_in_upload="true"/>
<datatype extension="mothur.square.dist" type="galaxy.datatypes.mothur:SquareDistanceMatrix" display_in_upload="true"/>
<datatype extension="mothur.lower.dist" type="galaxy.datatypes.mothur:LowerTriangleDistanceMatrix" display_in_upload="true"/>
Expand Down
105 changes: 57 additions & 48 deletions lib/galaxy/datatypes/mothur.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,26 @@ class Otu(Text):
file_ext = 'mothur.otu'
MetadataElement(name="columns", default=0, desc="Number of columns", readonly=True, visible=True, no_value=0)
MetadataElement(name="labels", default=[], desc="Label Names", readonly=True, visible=True, no_value=[])
MetadataElement(name="otulabels", default=[], desc="OTU Names", readonly=True, visible=True, no_value=[])

def __init__(self, **kwd):
Text.__init__(self, **kwd)
super(Otu, self).__init__(**kwd)

def set_meta(self, dataset, overwrite=True, **kwd):
super(Otu, self).set_meta(dataset, overwrite=overwrite, **kwd)

if dataset.has_data():
label_names = set()
otulabel_names = set()
ncols = 0
data_lines = 0
comment_lines = 0

headers = get_headers(dataset.file_name, sep='\t', count=-1)
# set otulabels
if len(headers[0]) > 2:
otulabel_names = headers[0][2:]
# set label names and number of lines
for line in headers:
if len(line) >= 2 and not line[0].startswith('@'):
data_lines += 1
Expand All @@ -40,6 +48,8 @@ def set_meta(self, dataset, overwrite=True, **kwd):
dataset.metadata.columns = ncols
dataset.metadata.labels = list(label_names)
dataset.metadata.labels.sort()
dataset.metadata.otulabels = list(otulabel_names)
dataset.metadata.otulabels.sort()

def sniff(self, filename):
"""
Expand Down Expand Up @@ -80,10 +90,10 @@ def __init__(self, **kwd):
"""
http://www.mothur.org/wiki/Sabund_file
"""
Otu.__init__(self, **kwd)
super(Sabund, self).__init__(**kwd)

def init_meta(self, dataset, copy_from=None):
Otu.init_meta(self, dataset, copy_from=copy_from)
super(Sabund, self).init_meta(dataset, copy_from=copy_from)

def sniff(self, filename):
"""
Expand Down Expand Up @@ -124,16 +134,18 @@ class GroupAbund(Otu):
MetadataElement(name="groups", default=[], desc="Group Names", readonly=True, visible=True, no_value=[])

def __init__(self, **kwd):
Otu.__init__(self, **kwd)
super(GroupAbund, self).__init__(**kwd)

"""
def init_meta(self, dataset, copy_from=None):
Otu.init_meta(self, dataset, copy_from=copy_from)
"""
def init_meta(self, dataset, copy_from=None):
Otu.init_meta(self, dataset, copy_from=copy_from)
super(GroupAbund, self).init_meta(dataset, copy_from=copy_from)

def set_meta(self, dataset, overwrite=True, skip=1, **kwd):
super(GroupAbund, self).set_meta(dataset, overwrite=overwrite, **kwd)

def set_meta(self, dataset, overwrite=True, skip=1, max_data_lines=100000, **kwd):
# See if file starts with header line
if dataset.has_data():
label_names = set()
Expand All @@ -142,7 +154,7 @@ def set_meta(self, dataset, overwrite=True, skip=1, max_data_lines=100000, **kwd
comment_lines = 0
ncols = 0

headers = get_headers(dataset.file_name, sep='\t', count=max_data_lines)
headers = get_headers(dataset.file_name, sep='\t', count=-1)
for line in headers:
if line[0] == 'label' and line[1] == 'Group':
skip = 1
Expand Down Expand Up @@ -207,7 +219,7 @@ class SecondaryStructureMap(Tabular):

def __init__(self, **kwd):
"""Initialize secondary structure map datatype"""
Tabular.__init__(self, **kwd)
super(SecondaryStructureMap, self).__init__(**kwd)
self.column_names = ['Map']

def sniff(self, filename):
Expand Down Expand Up @@ -251,20 +263,18 @@ class AlignCheck(Tabular):

def __init__(self, **kwd):
"""Initialize AlignCheck datatype"""
Tabular.__init__(self, **kwd)
super(AlignCheck, self).__init__(**kwd)
self.column_names = ['name', 'pound', 'dash', 'plus', 'equal', 'loop', 'tilde', 'total']
self.column_types = ['str', 'int', 'int', 'int', 'int', 'int', 'int', 'int']
self.comment_lines = 1

def set_meta(self, dataset, overwrite=True, **kwd):
data_lines = 0
headers = get_headers(dataset.file_name, sep='\t', count=-1)
for line in headers:
data_lines += 1
dataset.metadata.comment_lines = 1
dataset.metadata.data_lines = data_lines - 1 if data_lines > 0 else 0
super(AlignCheck, self).set_meta(dataset, overwrite=overwrite, **kwd)

dataset.metadata.column_names = self.column_names
dataset.metadata.column_types = self.column_types
dataset.metadata.comment_lines = self.comment_lines
dataset.metadata.data_lines -= self.comment_lines


class AlignReport(Tabular):
Expand All @@ -276,7 +286,7 @@ class AlignReport(Tabular):

def __init__(self, **kwd):
"""Initialize AlignCheck datatype"""
Tabular.__init__(self, **kwd)
super(AlignReport, self).__init__(**kwd)
self.column_names = ['QueryName', 'QueryLength', 'TemplateName', 'TemplateLength', 'SearchMethod', 'SearchScore',
'AlignmentMethod', 'QueryStart', 'QueryEnd', 'TemplateStart', 'TemplateEnd',
'PairwiseAlignmentLength', 'GapsInQuery', 'GapsInTemplate', 'LongestInsert', 'SimBtwnQuery&Template'
Expand All @@ -289,12 +299,12 @@ class DistanceMatrix(Text):
MetadataElement(name="sequence_count", default=0, desc="Number of sequences", readonly=True, visible=True, optional=True, no_value='?')

def init_meta(self, dataset, copy_from=None):
Text.init_meta(self, dataset, copy_from=copy_from)
super(DistanceMatrix, self).init_meta(self, dataset, copy_from=copy_from)

def set_meta(self, dataset, overwrite=True, skip=0, **kwd):
Text.set_meta(self, dataset, overwrite=overwrite, skip=skip, **kwd)
super(DistanceMatrix, self).set_meta(dataset, overwrite=overwrite, skip=skip, **kwd)

headers = get_headers(dataset.file_name, sep='\t', count=-1)
headers = get_headers(dataset.file_name, sep='\t')
for line in headers:
if not line[0].startswith('@'):
try:
Expand All @@ -309,10 +319,10 @@ class LowerTriangleDistanceMatrix(DistanceMatrix):

def __init__(self, **kwd):
"""Initialize secondary structure map datatype"""
DistanceMatrix.__init__(self, **kwd)
super(LowerTriangleDistanceMatrix, self).__init__(**kwd)

def init_meta(self, dataset, copy_from=None):
DistanceMatrix.init_meta(self, dataset, copy_from=copy_from)
super(LowerTriangleDistanceMatrix, self).init_meta(dataset, copy_from=copy_from)

def sniff(self, filename):
"""
Expand Down Expand Up @@ -371,10 +381,10 @@ class SquareDistanceMatrix(DistanceMatrix):
file_ext = 'mothur.square.dist'

def __init__(self, **kwd):
DistanceMatrix.__init__(self, **kwd)
super(SquareDistanceMatrix, self).__init__(**kwd)

def init_meta(self, dataset, copy_from=None):
DistanceMatrix.init_meta(self, dataset, copy_from=copy_from)
super(SquareDistanceMatrix, self).init_meta(self, dataset, copy_from=copy_from)

def sniff(self, filename):
"""
Expand Down Expand Up @@ -432,12 +442,12 @@ class PairwiseDistanceMatrix(DistanceMatrix, Tabular):

def __init__(self, **kwd):
"""Initialize secondary structure map datatype"""
Tabular.__init__(self, **kwd)
super(PairwiseDistanceMatrix, self).__init__(**kwd)
self.column_names = ['Sequence', 'Sequence', 'Distance']
self.column_types = ['str', 'str', 'float']

def set_meta(self, dataset, overwrite=True, skip=None, **kwd):
Tabular.set_meta(self, dataset, overwrite=overwrite, skip=skip, **kwd)
super(PairwiseDistanceMatrix, self).set_meta(dataset, overwrite=overwrite, skip=skip, **kwd)

def sniff(self, filename):
"""
Expand Down Expand Up @@ -484,7 +494,7 @@ def __init__(self, **kwd):
http://www.mothur.org/wiki/Name_file
Name file shows the relationship between a representative sequence(col 1) and the sequences(comma-separated) it represents(col 2)
"""
Tabular.__init__(self, **kwd)
super(Names, self).__init__(**kwd)
self.column_names = ['name', 'representatives']
self.columns = 2

Expand All @@ -494,7 +504,7 @@ class Summary(Tabular):

def __init__(self, **kwd):
"""summarizes the quality of sequences in an unaligned or aligned fasta-formatted sequence file"""
Tabular.__init__(self, **kwd)
super(Summary, self).__init__(**kwd)
self.column_names = ['seqname', 'start', 'end', 'nbases', 'ambigs', 'polymer']
self.columns = 6

Expand All @@ -508,15 +518,15 @@ def __init__(self, **kwd):
http://www.mothur.org/wiki/Groups_file
Group file assigns sequence (col 1) to a group (col 2)
"""
Tabular.__init__(self, **kwd)
super(Group, self).__init__(**kwd)
self.column_names = ['name', 'group']
self.columns = 2

def set_meta(self, dataset, overwrite=True, skip=None, max_data_lines=None, **kwd):
Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines)
group_names = set()
super(Group, self).set_meta(dataset, overwrite, skip, max_data_lines)

headers = get_headers(dataset.file_name, sep='\t')
group_names = set()
headers = get_headers(dataset.file_name, sep='\t', count=-1)
for line in headers:
if len(line) > 1:
group_names.add(line[1])
Expand All @@ -528,7 +538,7 @@ class AccNos(Tabular):

def __init__(self, **kwd):
"""A list of names"""
Tabular.__init__(self, **kwd)
super(AccNos, self).__init__(**kwd)
self.column_names = ['name']
self.columns = 1

Expand Down Expand Up @@ -572,7 +582,7 @@ class Frequency(Tabular):

def __init__(self, **kwd):
"""A list of names"""
Tabular.__init__(self, **kwd)
super(Frequency, self).__init__(**kwd)
self.column_names = ['position', 'frequency']
self.column_types = ['int', 'float']

Expand Down Expand Up @@ -624,7 +634,7 @@ class Quantile(Tabular):

def __init__(self, **kwd):
"""Quantiles for chimera analysis"""
Tabular.__init__(self, **kwd)
super(Quantile, self).__init__(**kwd)
self.column_names = ['num', 'ten', 'twentyfive', 'fifty', 'seventyfive', 'ninetyfive', 'ninetynine']
self.column_types = ['int', 'float', 'float', 'float', 'float', 'float', 'float']

Expand Down Expand Up @@ -706,37 +716,36 @@ def __init__(self, **kwd):
U68595 1
U68600 1
# Example 2 (with group columns):
Representative_Sequence total forest pastur
Representative_Sequence total forest pasture
U68630 1 1 0
U68595 1 1 0
U68600 1 1 0
U68591 1 1 0
U68647 1 0 1
"""
Tabular.__init__(self, **kwd)
super(CountTable, self).__init__(**kwd)
self.column_names = ['name', 'total']

def set_meta(self, dataset, overwrite=True, skip=1, max_data_lines=None, **kwd):
data_lines = 0
headers = get_headers(dataset.file_name, sep='\t', count=-1)
super(CountTable, self).set_meta(dataset, overwrite=overwrite, **kwd)

headers = get_headers(dataset.file_name, sep='\t', count=1)
colnames = headers[0]
dataset.metadata.column_types = ['str'] + (['int'] * ( len(headers[0]) - 1))
if len(colnames) > 1:
dataset.metadata.columns = len(colnames)
if len(colnames) > 2:
dataset.metadata.groups = colnames[2:]
for line in headers[1:]:
data_lines += 1

dataset.metadata.comment_lines = 1
dataset.metadata.data_lines = data_lines
dataset.metadata.data_lines -= 1


class RefTaxonomy(Tabular):
file_ext = 'mothur.ref.taxonomy'

def __init__(self, **kwd):
Tabular.__init__(self, **kwd)
super(RefTaxonomy, self).__init__(**kwd)
self.column_names = ['name', 'taxonomy']

def sniff(self, filename):
Expand Down Expand Up @@ -796,7 +805,7 @@ class ConsensusTaxonomy(Tabular):

def __init__(self, **kwd):
"""A list of names"""
Tabular.__init__(self, **kwd)
super(ConsensusTaxonomy, self).__init__(**kwd)
self.column_names = ['OTU', 'count', 'taxonomy']


Expand All @@ -805,7 +814,7 @@ class TaxonomySummary(Tabular):

def __init__(self, **kwd):
"""A Summary of taxon classification"""
Tabular.__init__(self, **kwd)
super(TaxonomySummary, self).__init__(**kwd)
self.column_names = ['taxlevel', 'rankID', 'taxon', 'daughterlevels', 'total']


Expand All @@ -814,7 +823,7 @@ class Axes(Tabular):

def __init__(self, **kwd):
"""Initialize axes datatype"""
Tabular.__init__(self, **kwd)
super(Axes, self).__init__(**kwd)

def sniff(self, filename):
"""
Expand Down Expand Up @@ -892,12 +901,12 @@ class SffFlow(Tabular):
GQY1XT001CF5YW 88 1.02 0.02 1.01 0.04 0.06 1.02 0.03 ...
"""
def __init__(self, **kwd):
Tabular.__init__(self, **kwd)
super(SffFlow, self).__init__(**kwd)

def set_meta(self, dataset, overwrite=True, skip=1, max_data_lines=None, **kwd):
Tabular.set_meta(self, dataset, overwrite, 1, max_data_lines)
super(SffFlow, self).set_meta(dataset, overwrite, 1, max_data_lines)

headers = get_headers(dataset.file_name, sep='\t')
headers = get_headers(dataset.file_name, sep='\t', count=1)
try:
flow_values = int(headers[0][0])
dataset.metadata.flow_values = flow_values
Expand Down

0 comments on commit 30fd1d6

Please sign in to comment.