diff --git a/config/datatypes_conf.xml.sample b/config/datatypes_conf.xml.sample index 8e6426c851ef..a370c5d35714 100644 --- a/config/datatypes_conf.xml.sample +++ b/config/datatypes_conf.xml.sample @@ -317,9 +317,9 @@ - - - + + + @@ -512,6 +512,7 @@ + diff --git a/lib/galaxy/datatypes/mothur.py b/lib/galaxy/datatypes/mothur.py index 531fc7218219..6bc09a950df5 100644 --- a/lib/galaxy/datatypes/mothur.py +++ b/lib/galaxy/datatypes/mothur.py @@ -16,18 +16,26 @@ class Otu(Text): file_ext = 'mothur.otu' MetadataElement(name="columns", default=0, desc="Number of columns", readonly=True, visible=True, no_value=0) MetadataElement(name="labels", default=[], desc="Label Names", readonly=True, visible=True, no_value=[]) + MetadataElement(name="otulabels", default=[], desc="OTU Names", readonly=True, visible=True, no_value=[]) def __init__(self, **kwd): - Text.__init__(self, **kwd) + super(Otu, self).__init__(**kwd) def set_meta(self, dataset, overwrite=True, **kwd): + super(Otu, self).set_meta(dataset, overwrite=overwrite, **kwd) + if dataset.has_data(): label_names = set() + otulabel_names = set() ncols = 0 data_lines = 0 comment_lines = 0 headers = get_headers(dataset.file_name, sep='\t', count=-1) + # set otulabels + if len(headers[0]) > 2: + otulabel_names = headers[0][2:] + # set label names and number of lines for line in headers: if len(line) >= 2 and not line[0].startswith('@'): data_lines += 1 @@ -40,6 +48,8 @@ def set_meta(self, dataset, overwrite=True, **kwd): dataset.metadata.columns = ncols dataset.metadata.labels = list(label_names) dataset.metadata.labels.sort() + dataset.metadata.otulabels = list(otulabel_names) + dataset.metadata.otulabels.sort() def sniff(self, filename): """ @@ -80,10 +90,10 @@ def __init__(self, **kwd): """ http://www.mothur.org/wiki/Sabund_file """ - Otu.__init__(self, **kwd) + super(Sabund, self).__init__(**kwd) def init_meta(self, dataset, copy_from=None): - Otu.init_meta(self, dataset, copy_from=copy_from) + super(Sabund, self).init_meta(dataset, copy_from=copy_from) def sniff(self, filename): """ @@ -124,16 +134,18 @@ class GroupAbund(Otu): MetadataElement(name="groups", default=[], desc="Group Names", readonly=True, visible=True, no_value=[]) def __init__(self, **kwd): - Otu.__init__(self, **kwd) + super(GroupAbund, self).__init__(**kwd) """ def init_meta(self, dataset, copy_from=None): Otu.init_meta(self, dataset, copy_from=copy_from) """ def init_meta(self, dataset, copy_from=None): - Otu.init_meta(self, dataset, copy_from=copy_from) + super(GroupAbund, self).init_meta(dataset, copy_from=copy_from) + + def set_meta(self, dataset, overwrite=True, skip=1, **kwd): + super(GroupAbund, self).set_meta(dataset, overwrite=overwrite, **kwd) - def set_meta(self, dataset, overwrite=True, skip=1, max_data_lines=100000, **kwd): # See if file starts with header line if dataset.has_data(): label_names = set() @@ -142,7 +154,7 @@ def set_meta(self, dataset, overwrite=True, skip=1, max_data_lines=100000, **kwd comment_lines = 0 ncols = 0 - headers = get_headers(dataset.file_name, sep='\t', count=max_data_lines) + headers = get_headers(dataset.file_name, sep='\t', count=-1) for line in headers: if line[0] == 'label' and line[1] == 'Group': skip = 1 @@ -207,7 +219,7 @@ class SecondaryStructureMap(Tabular): def __init__(self, **kwd): """Initialize secondary structure map datatype""" - Tabular.__init__(self, **kwd) + super(SecondaryStructureMap, self).__init__(**kwd) self.column_names = ['Map'] def sniff(self, filename): @@ -251,20 +263,18 @@ class AlignCheck(Tabular): def __init__(self, **kwd): """Initialize AlignCheck datatype""" - Tabular.__init__(self, **kwd) + super(AlignCheck, self).__init__(**kwd) self.column_names = ['name', 'pound', 'dash', 'plus', 'equal', 'loop', 'tilde', 'total'] self.column_types = ['str', 'int', 'int', 'int', 'int', 'int', 'int', 'int'] self.comment_lines = 1 def set_meta(self, dataset, overwrite=True, **kwd): - data_lines = 0 - headers = get_headers(dataset.file_name, sep='\t', count=-1) - for line in headers: - data_lines += 1 - dataset.metadata.comment_lines = 1 - dataset.metadata.data_lines = data_lines - 1 if data_lines > 0 else 0 + super(AlignCheck, self).set_meta(dataset, overwrite=overwrite, **kwd) + dataset.metadata.column_names = self.column_names dataset.metadata.column_types = self.column_types + dataset.metadata.comment_lines = self.comment_lines + dataset.metadata.data_lines -= self.comment_lines class AlignReport(Tabular): @@ -276,7 +286,7 @@ class AlignReport(Tabular): def __init__(self, **kwd): """Initialize AlignCheck datatype""" - Tabular.__init__(self, **kwd) + super(AlignReport, self).__init__(**kwd) self.column_names = ['QueryName', 'QueryLength', 'TemplateName', 'TemplateLength', 'SearchMethod', 'SearchScore', 'AlignmentMethod', 'QueryStart', 'QueryEnd', 'TemplateStart', 'TemplateEnd', 'PairwiseAlignmentLength', 'GapsInQuery', 'GapsInTemplate', 'LongestInsert', 'SimBtwnQuery&Template' @@ -289,12 +299,12 @@ class DistanceMatrix(Text): MetadataElement(name="sequence_count", default=0, desc="Number of sequences", readonly=True, visible=True, optional=True, no_value='?') def init_meta(self, dataset, copy_from=None): - Text.init_meta(self, dataset, copy_from=copy_from) + super(DistanceMatrix, self).init_meta(self, dataset, copy_from=copy_from) def set_meta(self, dataset, overwrite=True, skip=0, **kwd): - Text.set_meta(self, dataset, overwrite=overwrite, skip=skip, **kwd) + super(DistanceMatrix, self).set_meta(dataset, overwrite=overwrite, skip=skip, **kwd) - headers = get_headers(dataset.file_name, sep='\t', count=-1) + headers = get_headers(dataset.file_name, sep='\t') for line in headers: if not line[0].startswith('@'): try: @@ -309,10 +319,10 @@ class LowerTriangleDistanceMatrix(DistanceMatrix): def __init__(self, **kwd): """Initialize secondary structure map datatype""" - DistanceMatrix.__init__(self, **kwd) + super(LowerTriangleDistanceMatrix, self).__init__(**kwd) def init_meta(self, dataset, copy_from=None): - DistanceMatrix.init_meta(self, dataset, copy_from=copy_from) + super(LowerTriangleDistanceMatrix, self).init_meta(dataset, copy_from=copy_from) def sniff(self, filename): """ @@ -371,10 +381,10 @@ class SquareDistanceMatrix(DistanceMatrix): file_ext = 'mothur.square.dist' def __init__(self, **kwd): - DistanceMatrix.__init__(self, **kwd) + super(SquareDistanceMatrix, self).__init__(**kwd) def init_meta(self, dataset, copy_from=None): - DistanceMatrix.init_meta(self, dataset, copy_from=copy_from) + super(SquareDistanceMatrix, self).init_meta(self, dataset, copy_from=copy_from) def sniff(self, filename): """ @@ -432,12 +442,12 @@ class PairwiseDistanceMatrix(DistanceMatrix, Tabular): def __init__(self, **kwd): """Initialize secondary structure map datatype""" - Tabular.__init__(self, **kwd) + super(PairwiseDistanceMatrix, self).__init__(**kwd) self.column_names = ['Sequence', 'Sequence', 'Distance'] self.column_types = ['str', 'str', 'float'] def set_meta(self, dataset, overwrite=True, skip=None, **kwd): - Tabular.set_meta(self, dataset, overwrite=overwrite, skip=skip, **kwd) + super(PairwiseDistanceMatrix, self).set_meta(dataset, overwrite=overwrite, skip=skip, **kwd) def sniff(self, filename): """ @@ -484,7 +494,7 @@ def __init__(self, **kwd): http://www.mothur.org/wiki/Name_file Name file shows the relationship between a representative sequence(col 1) and the sequences(comma-separated) it represents(col 2) """ - Tabular.__init__(self, **kwd) + super(Names, self).__init__(**kwd) self.column_names = ['name', 'representatives'] self.columns = 2 @@ -494,7 +504,7 @@ class Summary(Tabular): def __init__(self, **kwd): """summarizes the quality of sequences in an unaligned or aligned fasta-formatted sequence file""" - Tabular.__init__(self, **kwd) + super(Summary, self).__init__(**kwd) self.column_names = ['seqname', 'start', 'end', 'nbases', 'ambigs', 'polymer'] self.columns = 6 @@ -508,15 +518,15 @@ def __init__(self, **kwd): http://www.mothur.org/wiki/Groups_file Group file assigns sequence (col 1) to a group (col 2) """ - Tabular.__init__(self, **kwd) + super(Group, self).__init__(**kwd) self.column_names = ['name', 'group'] self.columns = 2 def set_meta(self, dataset, overwrite=True, skip=None, max_data_lines=None, **kwd): - Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) - group_names = set() + super(Group, self).set_meta(dataset, overwrite, skip, max_data_lines) - headers = get_headers(dataset.file_name, sep='\t') + group_names = set() + headers = get_headers(dataset.file_name, sep='\t', count=-1) for line in headers: if len(line) > 1: group_names.add(line[1]) @@ -528,7 +538,7 @@ class AccNos(Tabular): def __init__(self, **kwd): """A list of names""" - Tabular.__init__(self, **kwd) + super(AccNos, self).__init__(**kwd) self.column_names = ['name'] self.columns = 1 @@ -572,7 +582,7 @@ class Frequency(Tabular): def __init__(self, **kwd): """A list of names""" - Tabular.__init__(self, **kwd) + super(Frequency, self).__init__(**kwd) self.column_names = ['position', 'frequency'] self.column_types = ['int', 'float'] @@ -624,7 +634,7 @@ class Quantile(Tabular): def __init__(self, **kwd): """Quantiles for chimera analysis""" - Tabular.__init__(self, **kwd) + super(Quantile, self).__init__(**kwd) self.column_names = ['num', 'ten', 'twentyfive', 'fifty', 'seventyfive', 'ninetyfive', 'ninetynine'] self.column_types = ['int', 'float', 'float', 'float', 'float', 'float', 'float'] @@ -706,37 +716,36 @@ def __init__(self, **kwd): U68595 1 U68600 1 # Example 2 (with group columns): - Representative_Sequence total forest pastur + Representative_Sequence total forest pasture U68630 1 1 0 U68595 1 1 0 U68600 1 1 0 U68591 1 1 0 U68647 1 0 1 """ - Tabular.__init__(self, **kwd) + super(CountTable, self).__init__(**kwd) self.column_names = ['name', 'total'] def set_meta(self, dataset, overwrite=True, skip=1, max_data_lines=None, **kwd): - data_lines = 0 - headers = get_headers(dataset.file_name, sep='\t', count=-1) + super(CountTable, self).set_meta(dataset, overwrite=overwrite, **kwd) + + headers = get_headers(dataset.file_name, sep='\t', count=1) colnames = headers[0] dataset.metadata.column_types = ['str'] + (['int'] * ( len(headers[0]) - 1)) if len(colnames) > 1: dataset.metadata.columns = len(colnames) if len(colnames) > 2: dataset.metadata.groups = colnames[2:] - for line in headers[1:]: - data_lines += 1 dataset.metadata.comment_lines = 1 - dataset.metadata.data_lines = data_lines + dataset.metadata.data_lines -= 1 class RefTaxonomy(Tabular): file_ext = 'mothur.ref.taxonomy' def __init__(self, **kwd): - Tabular.__init__(self, **kwd) + super(RefTaxonomy, self).__init__(**kwd) self.column_names = ['name', 'taxonomy'] def sniff(self, filename): @@ -796,7 +805,7 @@ class ConsensusTaxonomy(Tabular): def __init__(self, **kwd): """A list of names""" - Tabular.__init__(self, **kwd) + super(ConsensusTaxonomy, self).__init__(**kwd) self.column_names = ['OTU', 'count', 'taxonomy'] @@ -805,7 +814,7 @@ class TaxonomySummary(Tabular): def __init__(self, **kwd): """A Summary of taxon classification""" - Tabular.__init__(self, **kwd) + super(TaxonomySummary, self).__init__(**kwd) self.column_names = ['taxlevel', 'rankID', 'taxon', 'daughterlevels', 'total'] @@ -814,7 +823,7 @@ class Axes(Tabular): def __init__(self, **kwd): """Initialize axes datatype""" - Tabular.__init__(self, **kwd) + super(Axes, self).__init__(**kwd) def sniff(self, filename): """ @@ -892,12 +901,12 @@ class SffFlow(Tabular): GQY1XT001CF5YW 88 1.02 0.02 1.01 0.04 0.06 1.02 0.03 ... """ def __init__(self, **kwd): - Tabular.__init__(self, **kwd) + super(SffFlow, self).__init__(**kwd) def set_meta(self, dataset, overwrite=True, skip=1, max_data_lines=None, **kwd): - Tabular.set_meta(self, dataset, overwrite, 1, max_data_lines) + super(SffFlow, self).set_meta(dataset, overwrite, 1, max_data_lines) - headers = get_headers(dataset.file_name, sep='\t') + headers = get_headers(dataset.file_name, sep='\t', count=1) try: flow_values = int(headers[0][0]) dataset.metadata.flow_values = flow_values