diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..d1937921 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,50 @@ +name: Python CI + +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-latest + timeout-minutes: 10 + strategy: + matrix: + python-version: [3.7] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Cache pip + uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + run: | + sudo apt-get install -qq phyml + pip install --upgrade pip setuptools wheel + pip install --only-binary=numpy,scipy numpy scipy + pip install matplotlib ipython jupyter sympy pytest codecov pytest-cov + pip install -r requirements.txt + pip install . + + - name: Run tests + run: pytest --cov-report=xml --cov=OrthoEvol tests/ + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v2 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: ./coverage.xml + fail_ci_if_error: true + + - name: Notify failure + if: failure() + run: echo "Build failed" \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 58363dec..f9f64589 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,32 +5,30 @@ notifications: email: datasnakes@gmail.com jobs: include: - - python: 3.5 - dist: trusty - install: - - pip install --upgrade pip setuptools wheel - - pip install --only-binary=numpy,scipy numpy scipy - - pip install matplotlib ipython jupyter sympy pytest codecov "pytest-cov<=2.6.0" - - pip install -r requirements.txt - - pip install . - - python: 3.6 + - python: 3.7 dist: xenial + before_install: + - sudo apt-get install -qq phyml install: - pip install --upgrade pip setuptools wheel - pip install --only-binary=numpy,scipy numpy scipy - pip install matplotlib ipython jupyter sympy pytest codecov pytest-cov - pip install -r requirements.txt - pip install . - - python: 3.7 + - python: 3.8 dist: xenial + before_install: + - sudo apt-get install -qq phyml install: - pip install --upgrade pip setuptools wheel - pip install --only-binary=numpy,scipy numpy scipy - pip install matplotlib ipython jupyter sympy pytest codecov pytest-cov - pip install -r requirements.txt - pip install . - - python: 3.8 + - python: 3.9 dist: xenial + before_install: + - sudo apt-get install -qq phyml install: - pip install --upgrade pip setuptools wheel - pip install --only-binary=numpy,scipy numpy scipy diff --git a/OrthoEvol/Cookies/new_website/tasks.py b/OrthoEvol/Cookies/new_website/tasks.py index b6d8b43b..5c962b6f 100644 --- a/OrthoEvol/Cookies/new_website/tasks.py +++ b/OrthoEvol/Cookies/new_website/tasks.py @@ -1,77 +1,73 @@ -#!/usr/bin/env python -"""Invoke tasks.""" -import os -import json -import shutil -import webbrowser - -from invoke import task - -HERE = os.path.abspath(os.path.dirname(__file__)) -with open(os.path.join(HERE, 'cookiecutter.json'), 'r') as fp: - COOKIECUTTER_SETTINGS = json.load(fp) -# Match default value of website_name from cookiecutter.json -COOKIE = os.path.join(HERE, COOKIECUTTER_SETTINGS['website_name']) -AUTOAPP = os.path.join(COOKIE, 'autoapp.py') -REQUIREMENTS = os.path.join(COOKIE, 'requirements', 'dev.txt') - - -@task -def build(ctx): - """Build the cookiecutter. - - :param ctx: - - """ - ctx.run('cookiecutter {0} --no-input'.format(HERE)) - - -@task -def clean(ctx): - """Clean out generated cookiecutter. - - :param ctx: - - """ - if os.path.exists(COOKIE): - shutil.rmtree(COOKIE) - print('Removed {0}'.format(COOKIE)) - else: - print('App directory does not exist. Skipping.') - - -def _run_flask_command(ctx, command): - """ - - :param ctx: - :param command: - - """ - ctx.run('FLASK_APP={0} flask {1}'.format(AUTOAPP, command), echo=True) - - -@task(pre=[clean, build]) -def test(ctx): - """Run lint commands and tests. - - :param ctx: - - """ - ctx.run('pip install -r {0} --ignore-installed'.format(REQUIREMENTS), - echo=True) - os.chdir(COOKIE) - _run_flask_command(ctx, 'lint') - _run_flask_command(ctx, 'test') - -@task -def readme(ctx, browse=False): - """ - - :param ctx: - :param browse: (Default value = False) - - """ - ctx.run("rst2html.py README.rst > README.html") - if browse: - webbrowser.open_new_tab('README.html') - +#!/usr/bin/env python +"""Invoke tasks.""" + +import os +import json +import shutil +import webbrowser + +from invoke import task + +HERE = os.path.abspath(os.path.dirname(__file__)) + +# Load the settings from cookiecutter.json +with open(os.path.join(HERE, 'cookiecutter.json'), 'r') as fp: + COOKIECUTTER_SETTINGS = json.load(fp) + +# Match default value of website_name from cookiecutter.json +COOKIE = os.path.join(HERE, COOKIECUTTER_SETTINGS['website_name']) + +# Path to autoapp.py file +AUTOAPP = os.path.join(COOKIE, 'autoapp.py') + +# Path to dev requirements file +REQUIREMENTS = os.path.join(COOKIE, 'requirements', 'dev.txt') + +@task +def build(ctx): + """Build the cookiecutter.""" + + # Run cookiecutter with no input + ctx.run(f'cookiecutter {HERE} --no-input') + +@task +def clean(ctx): + """Clean out generated cookiecutter.""" + + # Remove the cookiecutter directory if it exists + if os.path.exists(COOKIE): + shutil.rmtree(COOKIE) + print(f'Removed {COOKIE}') + else: + print('App directory does not exist. Skipping.') + +def _run_flask_command(ctx, command): + """Run a Flask command.""" + + # Run the specified Flask command + ctx.run(f'FLASK_APP={AUTOAPP} flask {command}', echo=True) + +@task(pre=[clean, build]) +def test(ctx): + """Run lint commands and tests.""" + + # Install dev requirements + ctx.run(f'pip install -r {REQUIREMENTS} --ignore-installed', echo=True) + + # Change to the cookiecutter directory + os.chdir(COOKIE) + + # Run lint and test commands + _run_flask_command(ctx, 'lint') + _run_flask_command(ctx, 'test') + +@task +def readme(ctx, browse=False): + """Convert the README to HTML.""" + + # Convert the README to HTML + ctx.run("rst2html.py README.rst > README.html") + + # Open the HTML file in a web browser if specified + if browse: + webbrowser.open_new_tab('README.html') diff --git a/OrthoEvol/Manager/airflow/test_dag.py b/OrthoEvol/Manager/airflow/test_dag.py deleted file mode 100644 index e69de29b..00000000 diff --git a/OrthoEvol/Manager/db_mana_test.py b/OrthoEvol/Manager/db_mana_test.py index 37cbbe2e..8e306c09 100644 --- a/OrthoEvol/Manager/db_mana_test.py +++ b/OrthoEvol/Manager/db_mana_test.py @@ -7,43 +7,49 @@ import getpass from datetime import datetime as d import os -_jobname = "jobname" -# Set up project management -pm_config_file = resource_filename(yml.__name__, "initialize_new.yml") -with open(pm_config_file, 'r') as f: - pm_config = yaml.load(f, Loader=yaml.FullLoader) -pm = ProjectManagement(**pm_config["Management_config"]) +# Define job name +job_name = "jobname" -# Set up database management -db_config_file = resource_filename(yml.__name__, "databases.yml") -with open(db_config_file, 'r') as f: - db_config = yaml.load(f, Loader=yaml.FullLoader) +# Function to load configuration from YAML file +def load_config(file_name): + file_path = resource_filename(yml.__name__, file_name) + with open(file_path, 'r') as file: + return yaml.load(file, Loader=yaml.FullLoader) + +# Load project management configuration +pm_config = load_config("initialize_new.yml") +project_manager = ProjectManagement(**pm_config["Management_config"]) + +# Load and update database management configuration +db_config = load_config("databases.yml") db_config.update(pm_config) -db_config['Database_config']['Full']['NCBI']['NCBI_refseq_release']['upload_number'] = 12 -db_config['Database_config']['Full']['NCBI']['NCBI_refseq_release']['pbs_dict'] = { - 'author': getpass.getuser(), - 'description': 'This is a default pbs job.', - 'date': d.now().strftime('%a %b %d %I:%M:%S %p %Y'), - 'proj_name': 'OrthoEvol', - 'select': '1', - 'memgb': '6gb', - 'cput': '72:00:00', - 'wt': '2000:00:00', - 'job_name': _jobname, - 'outfile': _jobname + '.o', - 'errfile': _jobname + '.e', - 'script': _jobname, - 'log_name': _jobname, - 'pbsworkdir': os.getcwd(), - 'cmd': 'python3.6 ' + os.path.join(os.getcwd(), _jobname + '.py'), - 'email': 'n/a' - } -# Generate main config file for this job -config_file = pm.user_log / Path("upload_config.yml") -with open(str(config_file), 'w') as cf: - yaml.dump(db_config, cf, default_flow_style=False) +ncbi_config = db_config['Database_config']['Full']['NCBI']['NCBI_refseq_release'] +ncbi_config['upload_number'] = 12 +ncbi_config['pbs_dict'] = { + 'author': getpass.getuser(), + 'description': 'This is a default pbs job.', + 'date': d.now().strftime('%a %b %d %I:%M:%S %p %Y'), + 'proj_name': 'OrthoEvol', + 'select': '1', + 'memgb': '6gb', + 'cput': '72:00:00', + 'wt': '2000:00:00', + 'job_name': job_name, + 'outfile': job_name + '.o', + 'errfile': job_name + '.e', + 'script': job_name, + 'log_name': job_name, + 'pbsworkdir': os.getcwd(), + 'cmd': f'python3.6 {os.path.join(os.getcwd(), job_name + ".py")}', + 'email': 'n/a' +} + +# Save the updated configuration to a YAML file +config_file_path = project_manager.user_log / Path("upload_config.yml") +with open(str(config_file_path), 'w') as config_file: + yaml.dump(db_config, config_file, default_flow_style=False) -# Set up database dispatcher and dispatch the functions -dd = DatabaseDispatcher(config_file, pm) -dd.dispatch(dd.strategies, dd.dispatcher, dd.configuration) +# Initialize database dispatcher and execute dispatch functions +db_dispatcher = DatabaseDispatcher(config_file_path, project_manager) +db_dispatcher.dispatch(db_dispatcher.strategies, db_dispatcher.dispatcher, db_dispatcher.configuration) diff --git a/OrthoEvol/Orthologs/Phylogenetics/PAML/README.md b/OrthoEvol/Orthologs/Phylogenetics/PAML/README.md index c6b61677..72796d3f 100644 --- a/OrthoEvol/Orthologs/Phylogenetics/PAML/README.md +++ b/OrthoEvol/Orthologs/Phylogenetics/PAML/README.md @@ -1,28 +1,32 @@ # PAML Documentation + PAML (Phylogenetic Analysis by Maximum Likelihood) is a package of programs for phylogenetic analyses of DNA or protein sequences using maximum likelihood and is maintained by Ziheng Yang. ## Why ETE? -ETE is python package for building, comparing, annotating, manipulating and visualising -trees. It provides a comprehensive API and a collection of command line tools, - including utilities to work with the NCBI taxonomy tree. + +ETE is a python package for building, comparing, annotating, manipulating and visualising +trees. It provides a comprehensive API and a collection of command line tools including +utilities to work with the NCBI taxonomy tree. ### Model Selection and Default Parameters + It's important to note the default parameters for `ETE3PAML` are as follows: -`model='M1'`, `workdir=```. +`model='M1'`, `workdir=''`. ## Usage & Examples ### A simple implementation of ETE3PAML + ```python from OrthoEvol.Orthologs.Phylogenetics.PAML import ETE3PAML -paml = ETE3PAML(alignmentfile='.ffn', speciestree='.nw', workdir='') - -paml.run(pamlsrc='path/to/codeml/binary', output_folder=None) +paml = ETE3PAML(alignmentfile='.ffn', speciestree='tree.nw', workdir='', + pamlsrc='path/to/codeml/binary') +paml.run(output_folder=None) ``` ### Pruning a tree for use with ETE3PAML @@ -30,7 +34,7 @@ paml.run(pamlsrc='path/to/codeml/binary', output_folder=None) ```python from OrthoEvol.Orthologs.Phylogenetics.PAML import ETE3PAML -paml = ETE3PAML(alignmentfile='HTR1A.ffn', speciestree='speciestree.nw', workdir='') +paml = ETE3PAML(infile='HTR1A.ffn', species_tree='speciestree.nw', workdir='') # Input a list of orgnanisms or an organisms csv file with header as 'Organisms' paml.prune_tree(organisms='organisms.csv') diff --git a/OrthoEvol/Orthologs/Phylogenetics/PAML/codeml.py b/OrthoEvol/Orthologs/Phylogenetics/PAML/codeml.py index ae0e01f0..a2747012 100644 --- a/OrthoEvol/Orthologs/Phylogenetics/PAML/codeml.py +++ b/OrthoEvol/Orthologs/Phylogenetics/PAML/codeml.py @@ -11,7 +11,10 @@ class CodemlRun(object): - def __init__(self, P2N_alignment, iqtree_newick, control_file='codeml-8-11-2017.ctl', home=os.getcwd()): + def __init__(self, P2N_alignment, iqtree_newick, control_file='codeml-8-11-2017.ctl', + home=os.getcwd()): + # TODO: Generalize API and functions. + # Set up paths self.home = Path(home) self.paml_path = self.home / Path('PAML') @@ -20,7 +23,8 @@ def __init__(self, P2N_alignment, iqtree_newick, control_file='codeml-8-11-2017. # Set up genes control file name and get the OrthoEvol control file path self.gene = str(iqtree_newick).replace('_iqtree.nwk', '') self.control_file = self.paml_path / Path(self.gene + '.ctl') - self.control_template = pkg_resources.resource_filename(paml_control_files.__name__, control_file) + self.control_template = pkg_resources.resource_filename( + paml_control_files.__name__, control_file) print(self.control_template) # Set up CODEML input files @@ -30,7 +34,9 @@ def __init__(self, P2N_alignment, iqtree_newick, control_file='codeml-8-11-2017. self.iqtree_newick = copy(str(self.iqtree_newick), str(self.paml_path)) os.chdir(str(self.paml_path)) - self.cml = codeml.Codeml(self.P2N_alignment, self.iqtree_newick, working_dir=str(self.paml_path), out_file=self.gene +'_codeml.out') + self.cml = codeml.Codeml(self.P2N_alignment, self.iqtree_newick, + working_dir=str(self.paml_path), + out_file=self.gene + '_codeml.out') self.control_setup(self.control_template) def control_setup(self, control_template): @@ -38,6 +44,3 @@ def control_setup(self, control_template): self.cml.print_options() self.cml.ctl_file = str(self.control_file) self.cml.write_ctl_file() - - - diff --git a/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml.py b/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml.py index 7512aeff..937efb9f 100644 --- a/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml.py +++ b/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml.py @@ -1,11 +1,10 @@ import os + import pandas as pd from ete3 import EvolTree, Tree -from OrthoEvol.utilities import FullUtilities +from OrthoEvol.Tools.logit import LogIt -# Set up csv to list function -csvtolist = FullUtilities().csvtolist class ETE3PAML(object): """Integration of ETE3 for using PAML's codeml. @@ -13,65 +12,105 @@ class ETE3PAML(object): M1 model is best for orthology inferences. """ - def __init__(self, inputfile, speciestree, workdir=''): - """Initialize main variables/files to be used.""" - self.inputfile = inputfile - self.speciestree = speciestree + def __init__(self, infile, species_tree, workdir, pamlsrc=None): + """Initialize main variables/files to be used. + + Ensure that you have the correct path to your codeml binary. It should + be in the paml `/bin`. + + :param infile: The input fasta file. + :type infile: str + :param species_tree: The newick-formatted species tree. + :type species_tree: str + :param workdir: The working directory for input and output. + :type workdir: str + :param pamlsrc: The path to your codeml src if not in PATH, defaults to None + :type pamlsrc: str, optional + """ + # Set up the logger + self.paml_log = LogIt().default(logname="paml", logfile=None) + + self.infile = infile + self.species_tree = species_tree self.workdir = workdir + self.pamlsrc = pamlsrc + + if not self.pamlsrc: + # If user does not specify a path, assume it is in path. + self.pamlsrc = "" # Import your species tree - self._speciestree = Tree(self.speciestree, format=1) - # TODO import organisms list + self._speciestree = Tree(self.species_tree, format=1) # Import alignment file as string - alignment_file = open(self.alignmentfile, 'r') - alignment_str = alignment_file.read() - self.aln_str = alignment_str - alignment_file.close() + self.aln_str = self._import_alignment() + + def _import_alignment(self): + """Import alignment file as string.""" + with open(self.infile, 'r') as alignment_file: + alignment_str = alignment_file.read() + return alignment_str - def prune_tree(self, organismslist, organisms_file=None, column_header="Organisms"): + def prune_tree(self, organisms_list, organisms_file=None, column_header="Organisms"): """Prune branches for species not in the alignment file. Keep branches in the species tree for species in the alignment file Some species may not be present in the alignment file due to lack of matching with blast or simply the gene not being in the genome. - """ + :param organisms_list: A list of species used to create the species + tree. + :type organisms_list: str + :param organisms_file: A file of the organisms in case in list is not + provided, defaults to None + :type organisms_file: str, optional + :param column_header: The name of the column in the file, defaults to "Organisms" + :type column_header: str, optional + """ + # If an organisms file is used, import and convert to list. if organisms_file: - og_df = pd.read_csv(organisms_file) - organismslist = list(og_df[column_header]) - - branches2keep = [] - for organism in organismslist: - if organism in self.aln_str: - branches2keep.append(organism) - else: - print('No sequence for %s.' % organism) - - self._speciestree.prune(branches2keep, preserve_branch_length=True) - - # Write the tree to a file - self._speciestree.write(outfile=os.path.join(self.workdir, - 'temptree.nw')) - - def run(self, pamlsrc, outfile, model='M1'): + organisms_df = pd.read_csv(organisms_file) + organisms_list = list(organisms_df[column_header]) + + branches_to_keep = [] + # Prune branches of missing organisms. + try: + for organism in organisms_list: + if organism in self.aln_str: + branches_to_keep.append(organism) + else: + self.paml_log.warning('No sequence for %s.' % organism) + + self._speciestree.prune( + branches_to_keep, preserve_branch_length=True) + except ValueError as e: + self.paml_log.exception(e) + + else: + # Write the tree to a file if not a ValueError + temp_tree_path = os.path.join(self.workdir, 'temptree.nw') + self._speciestree.write(outfile=temp_tree_path) + + def run(self, outfile, tree="temptree.nw", model="M1"): """Run PAML using ETE. The default model is M1 as it is best for orthology inference in our case. You can use models `M2`, `M0`, `M3`. - Ensure that you have the correct path to your codeml binary. It should - be in the paml `/bin`. + :param outfile: The output PAML file. + :type outfile: str + :param tree: A newick-formatted species tree, defaults to "temptree.nw" + :type tree: str, optional + :param model: The PAML model to be run, defaults to "M1" + :type model: str, optional """ # Import the newick tree - tree = EvolTree('temptree.nw') - + tree = EvolTree(tree) # Import the alignment - tree.link_to_alignment(self.alignmentfile) - + tree.link_to_alignment(self.infile) + # Set the working directory tree.workdir = self.workdir - # Set the binpath of the codeml binary - tree.execpath = pamlsrc - - tree.run_model(model + '.' + outfile) # Run the model M1 M2 M3 M0 + tree.execpath = self.pamlsrc + # Run the model M1 M2 M3 M0 + tree.run_model(model + '.' + outfile) diff --git a/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml_test/ECP_EDN_15.fasta b/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml_test/ECP_EDN_15.fasta index b0892321..29cc2992 100644 --- a/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml_test/ECP_EDN_15.fasta +++ b/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml_test/ECP_EDN_15.fasta @@ -1,32 +1,8 @@ ->Human_ECP -ATGGTTCCAAAACTGTTCACTTCCCAAATTTGTCTGCTTCTTCTGTTGGGGCTTATGGGTGTGGAGGGCTCACTCCATGCCAGACCCCCACAGTTTACGAGGGCTCAGTGGTTTGCCATCCAGCACATCAGTCTGAACCCCCCTCGATGCACCATTGCAATGCGGGCAATTAACAATTATCGATGGCGTTGCAAAAACCAAAATACTTTTCTTCGTACAACTTTTGCTAATGTAGTTAATGTTTGTGGTAACCAAAGTATACGCTGCCCTCATAACAGAACTCTCAACAATTGTCATCGGAGTAGATTCCGGGTGCCTTTACTCCACTGTGACCTCATAAATCCAGGTGCACAGAATATTTCAAACTGCACGTATGCAGACAGACCAGGAAGGAGGTTCTATGTAGTTGCATGTGACAACAGAGATCCA---CGGGATTCTCCACGGTATCCTGTGGTTCCAGTTCACCTGGATACCACCATC ->Goril_ECP -ATGGTTCCAAAACTGTTCACTTCCCAAATTTGTCTGCTTCTTCTGTTGGGGCTTATGGGTGTGGAGGGCTCACTCCATGCCAGACCCCCACAGTTTACGAGGGCTCAGTGGTTTGCCATCCAGCACATCAGTCTGAACCCCCCTCGATGCACCATTGCAATGCGGGTAATTAACAATTATCGATGGCGTTGCAAAAACCAAAATACTTTTCTTCGTACAACTTTTGCTAATGTAGTTAATGTTTGTGGTAACCAAAGTATACGCTGCCTTCATAACAGAACTCTCAACAATTGTCATCGGAGTAGATTCCGGGTGCCTTTACTCCACTGTGACCTCATAAATCCAGGTGCACAGAATATTTCAAACTGCAGGTATGCAGACAGACCAGGAAGGAGGTTCTATGTAGTTGCATGTGACAACAGAGATCCA---CAGGATTCTCCACGGTATCCTGTGGTTCCTGTTCACCTGGATACCACCATC ->Chimp_ECP -ATGGTTCCAAAACTGTTCACTTCCCAAATTTGTCTGCTTCTTCTGTTGGGGCTTATGGGTGTGGAGGGCTCACTCCATGCCAGACCCCCACAGTTTACGAGGGCTCAGTGGTTTGCCATCCAGCACATCAGTCTGAACCCCCCTCGATGCACCATTGCAATGCGGGTAATTAACAATTATCGATGGCGTTGCAAAAACCAAAATACTTTTCTTCGTACAACTTTTGCTAATGTAGTTAATGTTTGTGGTAACCAAAGTATACGCTGCCCTCATAACAGAACTCTCAACAATTGTCATCAGAGTAGATTCCGGGTGCCTTTACTCCACTGTGACCTCATAAATCCAGGTGCACAGAATATTTCAAACTGCAGGTATGCAGACAGACCAGGAAGGAGGTTCTATGTAGTTGCATGTGACAACAGAGATCCA---CGGGATTCTCCACGGTATCCTGTGGTTCCAGTTCACCTGGATGCCACCATC ->Orang_ECP -ATGGTTCCAAAACTGTTCACTTCCCAAATTTGTCTGCTTCTTCTGTTGGGGCTTAGTGGTGTGGGGGGCTCACTCCATGCCAAACCCCGACAGTTTACGAGGGCTCAGTGGTTTGCCATCCAGCACGTCAGTCTGAACCCTCCTCAATGCACCACTGCAATGCGGGTAATTAACAATTATCAACGGCGTTGCAAAGACCAAAATACTTTTCTTCGTACAACTTTTGCTAATGTAGTTAATGTTTGTGGTAACCCAAATATAACCTGTCCTCGTAACAGAACTCTCCACAATTGTCATCGGAGTAGATTCCAGGTGCCTTTACTCCACTGTAACCTCACAAATCCAGGTGCACAGAATATTTCAAACTGCAAGTATGCAGACAGAACAGAAAGGAGGTTCTATGTAGTTGCATGTGACAACAGAGATCCA---CGGGATTCTCCACGGTATCCTGTGGTTCCAGTTCACCTGGATACCACCATC ->Macaq_ECP -ATGGTTCCAAAACTGTTCACTTCCCAAATTTGTCTGCTTCTTCTGTTGGGGCTTATGGGTGTGGAGGGCTCACTCCATGCCAGACCCCCACAGTTTACAAAGGCTCAGTGGTTTGCCATCCAGCACATCAATGTGAACCCCCCTCGATGCACCATTGCAATGCGGGTAATAAATAATTATCAACGGCGTTGCAAAAACCAAAATACTTTTCTTCGTACAACTTTTGCATATACAGCTAATGTTTGTCGTAACGAACGTATACGCTGCCCTCGTAACAGAACTCTCCACAATTGTCATCGTAGTAGATACCGGGTGCCTTTACTCCACTGTGACCTCATAAATCCAGGTGCACAGAATATTTCAACCTGCAGGTATGCAGACAGACCAGGACGGAGGTTCTATGTAGTTGCATGTGAAAGCAGAGATCCA---CGGGATTCTCCACGGTATCCAGTGGTTCCAGTTCACCTGGATACCACCATC ->Macaq2_ECP -ATGGTTCCAAAACTGTTCACTCCCCAAATTTGTCTGCTTCTTCTGTTGGGGCTTATGGGTGTGGAGGGCTCACTCCATGCCAGACCCCCACAGTTTACGAAGGCTCAGTGGTTTGCCATCCAGCACATCAATGTGAACCCCCCTCGATGCACCATTGCAATGCGGGTAATAAATAATTATCAACGGCGTTGCAAAAACCAAAATACTTTTCTTCGTACAACTTTTGCAAATACAGTTAATGTTTGTCGTAACCGAAGTATACGCTGCCCTCGTAACAGAACTCTCCACAATTGTCATCGTAGTAGCTACCGGGTGCCTTTACTCCACTGTGACCTCATAAATCCAGGTGCACAGAATATTTCAACCTGCAGGTATGCAGACAGACCAGGACGGAGGTTCTATGTAGTTGCATGTGAAAGCAGAGATCCA---CGGGATTCTCCACGGTATCCAGTGGTTCCAGTTCACCTGGATACCATCATC ->Orang_EDN -ATGGTTCCAAAACTGTTCACTTCTCAAATTTCCCTGCTTCTTCTGTTGGGGCTTCTGGCTGTGGACGGCTCACTCCATGTCAAACCTCCACAGTTTACCTGGGCTCAATGGTTTGAAACCCAGCACATCAATATGACCTCCCAGCAATGCAACAATGCAATGCAGGTCATTAACAATTTTCAACGGCGTTGCAAAAACCAAAATACTTTTCTGCGTACAACTTTTGCTAATGTAGTTAATGTTTGTGGTAACCCAAATATAACCTGTCCTAGTAACAGAAGTCGCAACAATTGTCATCATAGTGGAGTCCAGGTGCCTTTAATCCACTGTAACCTCACAACTCCAAGTCCACAGAATATTTCAAACTGCAGGTATGCGCAGACACCAGCAAACATGTTCTATATAGTTGCATGTGACAACAGGGATCCACGACGGGACCCTCCACAGTATCCGGTGGTTCCAGTTCACCTGGATAGAATCATC ->Chimp_EDN -ATGGTTCCAAAACTGTTCACTTCCCAAATTTGTCTGCTTCTTCTGTTGGGGCTTCTGGCAGTGGAGGGCTCACTCCATGTCAAACCTCCACAGTTTACCTGGGCTCAATGGTTTGAAACCCAGCACATCAATATGACCTCCCAGCAATGCACCAATGCAATGCGGGTCATTAACAATTATCAACGGCGATGCAAAAACCAAAATACTTTCCTTCTTACAACTTTTGCTAACGTAGTTAATGTTTGTGGTAACCCAAATATGACCTGTCCTAGTAACAAAACTCGCAAAAATTGTCATCACAGTGGAAGCCAGGTGCCTTTAATCCACTGTAACCTCACAACTCCAAGTCCACAGAATATTTCAAACTGCAGGTATGCGCAGACACCAGCAAACATGTTCTATATAGTTGCATGTGACAACAGAGATCAACGACGGGACCCTCCACAGTATCCAGTGGTTCCAGTTCACCTGGATAGAATCATC ->Gorilla_EDN -ATGGTTCCAAAACTGTTCACTTCCCAAATTTGTCTGCTTCTTCTGTTGGGGCTTCTGGCTGTGGAGGGCTCACTCCATGTCAAACCTCCACAGTTTACCTGGGCTCAATGGTTTGAAACCCAGCACATCAATATGACATCCCAGCAATGCACCAATGCAATGCAGGTCATTAACAATTATCAACGGCGATGCAAAAACCAAAATACTTTCCTTCTTACAACTTTTGCTAACGTAGTTAATGTTTGTGGTAACCCAAATATGACCTGTCCTAGTAACAAAACTTGCAAAAATTGTCATCAAAGTGGAAGCCAGGTGCCTTTAATCCACTGTAACCTCACAACTCCAAGTCCACAGAATATTTCAAACTGCAGGTATGCGCAGACACCAGCAAACATGTTCTATATAGTTGCATGTGACAACAGAGATCAACGACGGGACCCTCCACAGTATCCGGTGGTTCCAGTTCACCTGGATAGAATCATC ->Human_EDN -ATGGTTCCAAAACTGTTCACTTCCCAAATTTGTCTGCTTCTTCTGTTGGGGCTTCTGGCTGTGGAGGGCTCACTCCATGTCAAACCTCCACAGTTTACCTGGGCTCAATGGTTTGAAACCCAGCACATCAATATGACCTCCCAGCAATGCACCAATGCAATGCAGGTCATTAACAATTATCAACGGCGATGCAAAAACCAAAATACTTTCCTTCTTACAACTTTTGCTAACGTAGTTAATGTTTGTGGTAACCCAAATATGACCTGTCCTAGTAACAAAACTCGCAAAAATTGTCACCACAGTGGAAGCCAGGTGCCTTTAATCCACTGTAACCTCACAACTCCAAGTCCACAGAATATTTCAAACTGCAGGTATGCGCAGACACCAGCAAACATGTTCTATATAGTTGCATGTGACAACAGAGATCAACGACGAGACCCTCCACAGTATCCGGTGGTTCCAGTTCACCTGGATAGAATCATC ->Hylobates_EDN -ATGGTTCCAAAACTGTTCACTTCCCAAATTTGTCTGCTTCTTCTGTTGGGGCTTATGGGTGTGGAGGGCTCACTCCATGCCAAACCCCAACAGTTTACCTGGGCTCAGTGGTTTGAAATCCAGCACATCAATATGACCTCCCAGCAATGCACCAATGCAATGCGGGTCATTAACAATTATCAACGGCGATGCAAAAACCAAAATACTTTTCTTCGTACCACTTTTGCTAATGTAGTTAATGTTTGTGGTAACCCAAATATGACATGTCCTAGTAACAAAACTCGCAAAAATTGTCATCAAAGTGGAAGCCAGGTGCCTTTAATCCACTGTAACCTCACAACTCCAAGTCCACAGAATATTTCAAACTGCGGGTATGCGCAGACACCAGCAAACATGTTCTATATAGTTGCATGTGACAACAGAGATCAACGACGGGACCCTCCACAGTATCCAGTAGTTCCGGTTCACCTGGATAGAATCATC ->Macaq_EDN -ATGGTTCCAAAACTGTTCACTTCCCAAATTTGTCTGCTTCTTCTGTTGGGGCTTATGGGTGTGGAAGGCTCACTTCATGCCAAACCCGGACAATTTACCTGGGCTCAGTGGTTTGAAATCCAGCATATAAATATGACCTCTGGCCAATGCACCAATGCAATGCAGGTCATTAACAATTATCAACGGCGATGCAAAAATCAAAATACTTTTCTTCTTACAACTTTTGCTGATGTAGTTCATGTCTGTGGTAACCCAAGCATGCCCTGCCCTAGCAACACAAGTCTCAACAATTGTCATCATAGTGGAGTCCAGGTGCCTTTAATCCACTGTAACCTCACAACTCCAAGTCGAAGG---ATTTCAAATTGCAGGTATACACAGACAACAGCAAACAAGTACTACATAGTTGCATGTAACAACAGCGATCCAGTACGGGACCCTCCACAGTATCCAGTGGTTCCAGTTCACCTGGATAGAATCATC ->Macaq2_EDN -ATGGTTCCAAAACTGTTCACTTCCCCAATTTGTCTGCTTCTTCTGTTGGGGCTTATGGGTGTGGAAGGCTCACTTCATGCCAAACCCAGACAATTTACCTGGGCTCAGTGGTTTGAAATCCAGCATATAAATATGACCTCTGGCCAATGCACCAATGCAATGCTGGTAATTAACAATTATCAACGGCGATGCAAAAATCAAAATACTTTTCTTCTTACAACTTTTGCTGATGTAGTTCATGTCTGTGGTAACCCAAGCATGCCCTGCCCTAGCAACACAAGTCTCAACAATTGTCATCATAGTGGAGTCCAGGTGCCTTTAATCCACTGTAACCTCACAACTCCAAGTCGAAGG---ATTTCAAATTGCAGGTATACACAGACAACAGCAAACAAGTACTACATAGTTGCATGTAACAACAGCGATCCAGTACGGGACCCTCCACAGTATCCAGTGGTTCCAGTTCACTTGGATAGAGTCATC ->Papio_EDN -ATGGTTCCAAAACTGTTCACTTCCCCAATTTGTCTGCTTCTTCTGTTGGGGCTTATGGGTGTGGAAGGCTCACTTCATGCCAAACCCGGACAATTTACCTGGGCTCAGTGGTTTGAAATCCAGCATATAAATATGACCTCTGGCCAATGCACCAATGCAATGCTGGTAATTAACAATTATCAACGGCGATGCAAAAATCAAAATACTTTTCTTCTTACAACTTTTGCTGATGTAGTTCATGTCTGTGGTAACCCAAGCATGCCCTGCCCTAGCAACACAAGTCTCAACAATTGTCATCATAGTGGAGTCCAGGTGCCTTTAATCCACTGTAACCTCACAACTCCAAGTCGAAGG---ATTTCAAATTGCAGGTATACACAGACAACAGCAAACAAGTACTACATAGTTGCATGTAACAACAGCGATCCAGTACGGGACCCTCCACAGTATCCAGTGGTTCCAGTTCACTTGGATAGAGTCATC ->Cercopith_EDN -ATGGTTCCAAAACTGTTCACTTCCCCAATTTGTCTGCTTCTTCTGTTGGGGCTTATGGGTGTGGAGGGCTCACTCCATGCCAAACCCGGACAATTTACCTGGGCTCAGTGGTTTGAAATCCAGCATATAAATATGACCTCTGGCCAATGCACCAATGCAATGCTGGTAATTAACAATTATCAACGGCGATGCAAAAATCAAAATACTTTTCTTCTTACAACTTTTGCTGATGTAGTTCATGTCTGTGGTAACCCAAGCATGCCCTGCCCTAGCAACACAAGTCTCAACAATTGTCATCATAGTGGAGTCCAGGTGCCTTTAATCCACTGTAACCTCACAACTCCAAGTCAAAAT---ATTTCAAATTGCAAGTATACACAGACAACAGCAAACAAGTTCTACATAGTTGCATGTAACAACAGCGATCCAGTACGGGACCCTCCACAGTATCCAGTGGTTCCAGTTCACCTGGATAGAGTCATC - - +>Hylobates_lar +ATGGCCAGGTACAGATGCTGCCGCAGCCAGAGCCGGAGCAGATGTTACCGCCAGAGCCGGAGCAGATGTTACCGCCAGAGGCAAAGCCAGAGTCGGAGCAGATGTTACCGCCAGAGCCAGAGCCGGAGCAGATGTTACCGCCAGAGACAAAGAAGTCGGAGACGAAGGAGGCGGAGCTGCCAGACACGGAGGAGAGCCATGAGGTGT---CGCCGCAGGTACAGGCTGAGACGTAGAAGCTGTTACCACATTGTATCT +>Papio_cynocephalus +ATGGCCAGGTACAGATGCTGCCGCAGCCAGAGCCGAAGCAGATGCTATCGCCAGAGCCGGAGCAGATGTAACCGCCAGAGACAGAGCCAAAGCCGGAGAAGCTGCTATCGCCAGAGCCAAAGCCGGAGCAGATGTTACCGCCAGAGACAGAGAAGTCGTAGACGAAGGAGGCGACGCTGCCAGACACGGAGGAGAGCCATGAGGTGCTTCCGCCGCAGGTACAGGCTGAGGCGTAGGAGGCCCTATCACATCGTGTCT +>Gorilla_gorilla +ATGGCCAGGTACAGATGCTGTCGCAGCCAGAGCCGCAGCAGATGTTACCGGCAGAGCCGGAGCAGGTGTTACCGGCAGAGACAAAGCCAGAGCCGGAGCAGATGCTACCGGCAGAGCCAAAGCCGGAGCAGGTGTTACCGGCAGAGACAAAGAAGTCGCAGACGTAGGCGGAGGAGCTGCCAGACACGGAGGAGAGCCATGAGGTGCTGCCGCCGCAGGTACAGACTGAGACGTAGAAGACCCTATCATATTGTATCT +>Pan_troglodytes +ATGGCCAGGTACAGATGCTGTCGCAGCCAGAGCCGGAGCAGATGTTACCGGCAGAGACGGAGCAGGTGTTACCGGCAAAGGCAAAGCCAAAGTCGGAGCAGATGTTACCGGCAGAGCCAGAGACGGAGCAGGTGTTACCGGCAAAGACAAAGAAGTCGCAGACGAAGGCGACGGAGCTGCCAGACACGGAGGAGAGCCATGAGGTGCTGCCGCCGCAGGTACAGACTGAGACGTAAAAGATGTTACCATATTGTATCT \ No newline at end of file diff --git a/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml_test/ECP_EDN_15.nw b/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml_test/ECP_EDN_15.nw index 0e987989..99fdeb2a 100644 --- a/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml_test/ECP_EDN_15.nw +++ b/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml_test/ECP_EDN_15.nw @@ -1 +1 @@ -(((Hylobates_EDN , (Orang_EDN , (Gorilla_EDN , (Chimp_EDN , Human_EDN )))), (Macaq_EDN , (Cercopith_EDN , (Macaq2_EDN , Papio_EDN )))), (Orang_ECP, ((Macaq_ECP, Macaq2_ECP), (Goril_ECP, Chimp_ECP, Human_ECP)))); +((Hylobates_lar,(Gorilla_gorilla,Pan_troglodytes)),Papio_cynocephalus); \ No newline at end of file diff --git a/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml_test/ete3paml_test.py b/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml_test/ete3paml_test.py index 8784d870..6686f146 100644 --- a/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml_test/ete3paml_test.py +++ b/OrthoEvol/Orthologs/Phylogenetics/PAML/ete3paml_test/ete3paml_test.py @@ -6,7 +6,7 @@ class PamlTest(object): """Test codeml with a default tree and newick file.""" def __init__(self, tree="ECP_EDN_15.nw", alignment="ECP_EDN_15.fasta", - workdir="", pamlpath=""): + workdir=".", pamlpath=""): """Test that paml is in your path and working properly. :param tree: (Default value = "ECP_EDN_15.nw") @@ -19,22 +19,20 @@ def __init__(self, tree="ECP_EDN_15.nw", alignment="ECP_EDN_15.fasta", self.alignment = alignment self.pamlpath = pamlpath - model = 'M1' - self.defaultmodel = model + self.defaultmodel = 'M1' - wd = workdir - self.workdir = wd + self.workdir = workdir def main(self): """The main function for running the test.""" print("Running model %s paml on input." % str(self.defaultmodel)) - + tree = EvolTree(self.tree) # Import the newick tree - tree.link_to_alignment(self.alignment) # Import the alignment tree.workdir = self.workdir # Set the working directory tree.execpath = self.pamlpath # Set the binpath of the codeml binary - tree.run_model(self.defaultmodel) # Run the codeml model + tree.link_to_alignment(self.alignment) # Import the alignment + tree.run_model('M1') # Run the codeml model if __name__ == "__main__": diff --git a/OrthoEvol/Orthologs/Phylogenetics/PhyML/README.md b/OrthoEvol/Orthologs/Phylogenetics/PhyML/README.md index 0e999a5a..438b679c 100644 --- a/OrthoEvol/Orthologs/Phylogenetics/PhyML/README.md +++ b/OrthoEvol/Orthologs/Phylogenetics/PhyML/README.md @@ -1,4 +1,5 @@ # PhyML Documentation + PhyML is a phylogeny software based on the maximum-likelihood principle. Early PhyML versions used a fast algorithm performing Nearest Neighbor Interchanges (NNIs) to improve a reasonable starting tree topology. @@ -6,24 +7,37 @@ reasonable starting tree topology. Learn more about PhyML [here](http://www.atgc-montpellier.fr/). ## Default Parameters -The default dataype is `'aa' (amino acid)`, but you may use 'nt' for nuclueotide. + +The default dataype is `'aa' (amino acid)`, but you may use 'nt' for nucleotide. ## Examples ### Running Phyml + ```python -from OrthoEvol.Orthologs.Phylogenetics.PAML import ETE3PAML +from OrthoEvol.Orthologs.Phylogenetics.PhyML import PhyML -PhyML(phyml_input='path/to/phylip/multisequencealignment', datatype='aa') +htr1a = PhyML(infile='HTR1A.phy', datatype='aa') +htr1a.run() ``` ### Running Phyml with our parallel module + ```python -from OrthoEvol.Orthologs.Phylogenetics.PAML import ETE3PAML +from OrthoEvol.Orthologs.Phylogenetics.PhyML import PhyML +from OrthoEvol.Tools.parallel import Multiprocess -PhyML(phyml_input='path/to/phylip/multisequencealignment', datatype='aa') -``` +files = ['HTR1A.phy', 'HTR1E.phy', 'MAOA.phy'] + +def phyml(filename): + phyml = PhyML(infile=filename, datatype='aa') + phyml.run() +if __name__ == '__main__': + mp = Multiprocess() + mp.map2function(phyml, files) +``` ## Notes -This class is designed PhyML version 3.1. \ No newline at end of file + +This class is designed for PhyML [version 3.1](http://www.atgc-montpellier.fr/download/binaries/phyml/PhyML-3.1.zip). \ No newline at end of file diff --git a/OrthoEvol/Orthologs/Phylogenetics/PhyML/__init__.py b/OrthoEvol/Orthologs/Phylogenetics/PhyML/__init__.py index 67e77c70..a9e8a2c3 100644 --- a/OrthoEvol/Orthologs/Phylogenetics/PhyML/__init__.py +++ b/OrthoEvol/Orthologs/Phylogenetics/PhyML/__init__.py @@ -1,6 +1,6 @@ """PhyML tools.""" -from .orthophyml import PhyML +from .phyml import PhyML #from phyml_test.phyml_test import PhymlTest # Make this explicit, then they show up in the API docs diff --git a/OrthoEvol/Orthologs/Phylogenetics/PhyML/orthophyml.py b/OrthoEvol/Orthologs/Phylogenetics/PhyML/orthophyml.py deleted file mode 100644 index 33b98a47..00000000 --- a/OrthoEvol/Orthologs/Phylogenetics/PhyML/orthophyml.py +++ /dev/null @@ -1,46 +0,0 @@ -from Bio.Phylo.Applications import PhymlCommandline -import sys - -from OrthoEvol.Tools.logit import LogIt - - -class PhyML(object): - """The PhyML class uses Biopython's PhyMLCommandline wrapper to generate trees - from the PhyML executable.""" - - def __init__(self, phyml_input, datatype='aa'): - """Run phyml to generate tree results. - - If you're using Linux, ensure that your phyml path is set in your bash - profile. If you're using Windows, this function will look for the name - of the executable 'PhyML-3.1_win32.exe'. - """ - self.phyml_log = LogIt().default(logname="GenBank", logfile=None) - - # Use the phyml executable file - phyml_exe = None - - # This is mainly intended for windows use or use with an executable - # file - win32 = "win32" - executable = "PhyML-3.1_win32.exe" - exe_name = executable if sys.platform == win32 else "phyml" - phyml_exe = exe_name - self.phyml_exe = phyml_exe - self.datatype = datatype - self.phyml_input = phyml_input - self._runphyml() - - def _runphyml(self): - """"Run phyml. - - Input a phylip formatted alignment file and describe the datatype - ('nt' or 'aa'). - """ - - run_phyml = PhymlCommandline(self.phyml_exe, - input=self.phyml_input, - datatype=self.datatype) - out_log, err_log = run_phyml() - self.phyml_log(out_log) - self.phyml_log(err_log) diff --git a/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml.py b/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml.py new file mode 100644 index 00000000..65777772 --- /dev/null +++ b/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml.py @@ -0,0 +1,76 @@ +import sys +import shutil + +from Bio.Phylo.Applications import PhymlCommandline +from Bio.Application import ApplicationError +from Bio import AlignIO + +from OrthoEvol.Tools.logit import LogIt + + +class PhyML(object): + """The PhyML class uses Biopython's PhyMLCommandline wrapper to generate + trees from the PhyML executable.""" + + def __init__(self, infile, datatype="aa"): + """Input a phylip formatted alignment file and specify a datatype. + + :param infile: An input file that is phylip formatted. + :type infile: str + :param datatype: The datatype of the infile ("nt"/"aa"), defaults to "aa" + :type datatype: str, optional + + If you're using Linux, ensure that your phyml path is set in your bash + profile. If you're using Windows, this function will look for the name + of the executable 'PhyML-3.1_win32.exe'. + """ + # Set up logging + self.phyml_log = LogIt().default(logname="Phyml", logfile=None) + # Check that the phyml executable is in the path + self.phyml_exe = self._check_exe() + self.datatype = datatype + if self._validate_format(infile): + self.infile = infile + + def _validate_format(self, infile): + """"Validate the format of the input file. + + :param infile: An input file that is phylip formatted. + :type infile: str + """ + try: + AlignIO.read(open(infile), "phylip") + except ValueError as e: + self.phyml_log.exception(e) + else: + return True + return False + + def _check_exe(self): + """Check to see if the phyml exe is in the path.""" + phyml_exe = None + win32 = "win32" + executable = "PhyML-3.1_win32.exe" + exe_name = executable if sys.platform == win32 else "phyml" + phyml_exe = exe_name + if shutil.which(phyml_exe): + return phyml_exe + else: + self.phyml_log.error("%s is not in the path." % phyml_exe) + + def run(self, model="WAG", alpha="e", bootstrap=100): + """"Run phyml.""" + try: + run_phyml = PhymlCommandline(self.phyml_exe, + input=self.infile, + datatype=self.datatype, model=model, + alpha=alpha, bootstrap=bootstrap) + self.phyml_log.info("Running %s on %s" % (self.phyml_exe, + self.infile)) + out_log, err_log = run_phyml() + if out_log: + self.phyml_log.info(out_log) + if err_log: + self.phyml_log.error(err_log) + except ApplicationError as e: + self.phyml_log.exception(e) diff --git a/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml_test/HTR1E_aligned.phy b/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml_test/HTR1E_aligned.phy deleted file mode 100644 index 09ffd280..00000000 --- a/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml_test/HTR1E_aligned.phy +++ /dev/null @@ -1,308 +0,0 @@ - 13 1098 -Ailuropoda atgaatatca ctaactgtac cccagaagcc agtgtggctg cgagacccaa -Bos atgaacatca ctaactgtac cccggaagcc agtgtggctg tgagacccaa -Callithrix atgaacatca caaactgtac gacagaagcc agcgtggctg taagacccaa -Canis atgaatctca ctaactgtac cacagaagcc aatgtggctg tgagacccaa -Cavia atgaacatca caaactgcac gacagatgcc agcatggttg taaggcccaa -Echinops atgaacatca ctaactgtac cccagaagcc agtgtggctg tgacaccgaa -Equus atgaacatca ctaactgtac cacagaagcc agcgtggctg tgagacccaa -Felis atgaatatca ctaactgtac cacagaagcc agtgtggctg tgagacccaa -Gorilla atgaacatca caaactgtac cacagaagcc agcatggcta taagacccaa -Heterocephalus atgaacctca cgaactatac cacggaagcc agtgtggctg taaaacccaa -Homo atgaacatca caaactgtac cacagaggcc agcatggcta taagacccaa -Loxodonta atgaacatca ctaactgtac cccagaagcg agtgcagctg tgagacctaa -Macaca atgaacatca caaactgtac cacagaagcc ggcatggctg tgaggcccaa - - gaccatcact gagaagatgc tcatttccat gactctggtg gtcatcacca - gaccattacg gagaagatgc tcatttctat gactctggtg atcatcacca - gaccatcact gagaagatgc tcatttgcat gactctggtg gtcatcacca - gaccatcact gagaagatgc tcatttccgt gactctggtg atcatcacca - gacagtgact gagaagatgc ttatttgtat gactctagtg ataatcacca - gaccatcact gagaagatgc tcatttccat gactctagtg atcatcacca - gaccgtcact gagaagatgc tcatttccat gaccctggtg atcatcacct - gaccgtcact gagaagatgc tcatatccat gactctggtg accatcacca - gaccatcact gagaagatgc tcatttgcat gactctggtg gtcatcacca - gactgtcact gagaagatgc ttatttgcat gactctggtg ataatcacca - gaccatcact gagaagatgc tcatttgcat gactctggtg gtcatcacca - gactatcact gagaaaatgc tcatttctgt gactctggtg atcatcacca - gaccatcact gaaaagatgc tcatttgcat gactctggtg gtcatcacca - - ccctgactat gttgctgaac ttggccgtga tcacggctat ctgtaccacc - ccctgaccat gctgctaaac tccgccgtga tcatggccat ctgcaccacc - cccttaccac gttgctgaac ttggctgtga tcatggccat ctgcaccacc - ccctgaccat gttgttgaac ttggccgtga tcatggccat ctgtaccacc - cgctaaccat gttgctgaac tctgctgtaa tcatggccat ctgcaccacc - ccttgacaat gttgttgaat gcagccgtta tcctggccat ctgcaccacc - ccctgaccat gttgctaaac tcagccgtga tcatggccat ttgcaccacc - ccctgaccat gttgttgaat ttggccgtga tcatggccat ctgtaccacc - ccctcaccac gttactgaac ttggctgtga tcatggctat tggcaccact - cactaaccat gttattgaac tctgctgtca tcatggccat ctgcaccacc - ccctcaccac gttgctgaac ttggctgtga tcatggctat tggcaccacc - ccttgacaat gttgctgaac ttggcggtga tcatggccat ctgcaccacc - ccctcaccac gttgctgaac ttggcggtga tcatggctat ctgcaccacc - - aagaagctcc accagcctgc caactacctg atctgctccc tggctgtgac - aagaagctcc accagcctgc caactacctg atctgttctc tagccgtgac - aagaagctcc accagcctgc aaactactta atctgttctc tggccgtgac - aagaagctcc accagcctgc caactacctg atctgttccc tggctgtgac - aagaagctcc accagcccgc caactacctg atctgctctc tggcagtgac - aagaagctcc accagcctgc caactacttg atctgttctc tggctgtgac - aaaaagctcc accagcctgc caactacttg atctgctctc tggctgtgac - aagaagctcc accagcctgc caactacctg atctgttctc tggccgtgac - aagaagctcc accagcctgc caactaccta atctgttctc tggccgtgac - aggaagctcc accagcctgc caactacctg atctgctccc tggccgtgac - aagaagctcc accagcctgc caactaccta atctgttctc tggccgtgac - aagaagctcc atcagcccgc aaactacctg atctgttctc tggctgtgac - aagaagctcc accagcctgc caactaccta atctgttctc tggccgtgac - - agatctcctg gtagcggtgc tcgtcatgcc cctgagcatc atgtacattg - ggatctcctg gtggctgtgc ttgtcatgcc cttgagcatc atgtacattg - agacctcctg gtggcggtgc tcgtcatgcc cctgagcatc atgtacattg - agacctcctg gtggcagtgc tcgtcatgcc cctgagcatc atgtacattg - tgacctcctg gtggcagtgc tcgtcatgcc gctgagcatc atgtacattg - agacctcctg gtggcagttc ttgtcatgcc tctgagcatc atgtacattg - ggacctgctg gtagcagtcc tggtgatgcc ccttagcatc atgtacattg - ggacctcctg gtggcagtgc tcgtcatgcc cctgagcatc atgtacattg - ggacctcctg gtggcagtgc tcgtcatgcc cctgagcatc atctacattg - tgacctccta gtggcggtgc tcgtcatgcc cctgagcgtc atgtacattg - ggacctcctg gtggcagtgc tcgtcatgcc cctgagcatc atctacattg - agacctcctg gtggcagtac ttgtcatgcc tctgagcatc atgtacattg - ggacctcctg gtagccgtgc tcgtcatgcc cctgagcatc atatacattg - - tcatggacag ctggaaacta gggtacttca tctgcgaggt gtggctgagt - tcatggacag ctggaagctg gggtacttca tctgcgaggt gtggctgagt - tcatggaccg ctggaagctt ggatacttcc tctgtgaggt gtggctgagt - tcatggacag ctggaaacta gggtacttca tctgcgaggt gtggctgagt - tcatggacag ctggaggctg ggctacttca tttgtgaagt gtggctgagt - tcatggacag ctggaagctt gggtacttca tctgcgaggt gtggctgagt - tcatggacag ctggaagcta gggtacttcg tctgtgaggt gtggctgagt - ccatggaaag ctggaaacta gggtacttca tctgtgaggt gtggctgagt - tcatggatcg ctggaagctt gggtacttcc tctgtgaggt gtggctgagt - tcatggacaa ctggagactg gggtacttca tctgtgaggt gtggctgagt - tcatggatcg ctggaagctt gggtacttcc tctgtgaggt gtggctgagt - tcatggacag ctggaaactt gggtacttca tctgtgaggt gtggctgagc - tcatggaccg ctggaagctt ggatacttcc tctgtgaggt gtggctgagt - - gtggacatga cctgctgcac ctgttccatc ctccacctct gtgtgattgc - gtggatatga cctgctgcac ctgctccatc cttcatctct gtgtgatcgc - gtggacatga cctgctgcac ctgctccatc ctccacctct gtgtcattgc - gtggacatga cctgctgcac ctgctccatc ctccatctct gtgtgattgc - gtggatatga cctgctgcac ctgttccatc ctgcatctct gtgtgatcgc - gtagacatga cctgctgcac ctgctccatt cttcatctct gtgtcattgc - gtggacatga catgctgcac ctgctccatc ctccatctct gtgtgattgc - gtggacatga cctgctgcac ctgctccatc ctccatctct gtgtgattgc - gtggacatga cctgctgcac ctgctccatc ctccacctct gtgtcattgc - gtggatatga cctgctgcac ctgctccatc ctccatctct gtgtgatcgc - gtggacatga cctgctgcac ctgctccatc ctccacctct gtgtcattgc - atggacatga cctgctgtac ctgctccatc ctccatctct gtgtcattgc - gtggacatga cctgctgcac ctgctccatc ctccacctct gtgtcattgc - - tctcgacagg tactgggcca tcaccaatgc tattgaatac gccaggaaga - cctggacagg tactgggcca tcaccaatgc tatcgagtac gccaggaaga - cctggacagg tactgggcca tcaccaatgc tattgaatat gccaggaaga - cctagacagg tactgggcca tcaccaatgc tattgaatat gccaggaaga - gctggacagg tactgggcca tcaccaatgc tattgaatat gccaggaaga - cctggatcgg tactgggcca tcaccaatgc tattgaatac gccaggaaga - cctggacagg tactgggcca tcaccaacgc tattgagtat gccaggaaga - cctggacagg tactgggcca tcaccaatgc tattgaatat gccaggaaga - cctggacagg tactgggcca tcaccaatgc tattgaatac gccaggaaga - actggacagg tactgggcca tcaccaaagc tattgaatat gcgaggaaaa - cctggacagg tactgggcca tcaccaatgc tattgaatac gccaggaaga - cctggacagg tactgggcca tcaccaatgc tattgaatat gccaggaaga - cctggacagg tactgggcca tcaccaatgc tattgaatac gccaggaaga - - ggacggccaa gagggccggg ctgatgatcc tcaccgtttg gactatctcc - ggactgccaa gagggccggg ctgatgatcc tcacggtctg gaccatctcc - ggacagccaa aagggccgca ctgatgatcc tcactgtctg gactatctcc - ggaccaccaa gagagctggg ctgatgatcc tcaccgtctg gaccatttcc - ggacagccaa aagggctggc ctgatgatcc tcactgtgtg gactatctcc - ggactgccaa aagggcgggg ctgatgatcc tcattgtctg gaccatctcc - ggaccgccaa gagggctgga ctgatgatcc tcaccgtctg gaccatctcc - ggacggccaa gagggctggg ctgatgatcc tcaccgtctg gaccatctcc - ggacggccaa gagggccgcg ctgatgatcc tcaccgtctg gaccatctcc - gaacagccag gagagctggc ctgatgatcc tcaccgtgtg gactatctct - ggacggccaa gagggccgcg ctgatgatcc ttaccgtctg gaccatctcc - ggactgccaa gagggctgga ttgatgatcc tcactgtctg gaccatctct - ggacggccaa gagggcggcg ctgatgatcc tcaccgtctg gaccatctcc - - atcttcatct ccatgccccc tctgttctgg aggagccacc gccagctcag - atcttcatct ccatgccccc tctgttctgg aggagccacc gcagactcag - atcttcatct ccatgccccc tctgttctgg aggagccacc gccgcctaag - atcttcatct ccatgccccc tctgttctgg aggagccacc gtcaactcag - atcttcatct ccatgccccc tctgttctgg aggagccacc gtcaactcag - atcttcatct ccatgccccc tctgttctgg aggagccacc gccggctcag - gtcttcatct ccatgccccc tctgttctgg aggagccacc gccgactcag - atcttcatct ccatgccccc tctgttctgg aggagccact gccagctgag - attttcatct ccatgccccc tctgttctgg aggagccacc gccgcctaag - attttcatct ccatgccccc tctgttctgg aggagccacc gccaagtcag - attttcatct ccatgccccc tctgttctgg agaagccacc gccgcctaag - gtcttcatct ccatgccccc tctgttttgg aggagtcacc gcctactcag - attttcatct ccatgccccc tctgttctgg aggagccacc gccgcctaag - - cccacctcct agccagtgca ccatccagca tgaccatgtc atctacacca - cccgcccccc agtcagtgca ccatccggca cgaccacgtc atctacacca - ccctccccct agtcagtgca ccatccagca cgaccatgtc atctacacca - cccaccaccc agtcagtgca ccatccagca tgaccatgtc atctacacca - cccacccccc agccagtgta ccatccagca tgaccatgtc atctacacca - cccacctccg agtcaatgca ccatccagca tgaccacgtc atctacacca - cctgcccctt agtcagtgca ccatccagca tgaccacgtc atctacacca - cccacgccct agtcagtgca caatccagca tgaccatgtc atctacacca - ccctccccct agtcagtgca ccatccagca cgaccatgtt atctacacca - cccgcccccc agccagtgta cgatccagca tgaccatgtc atctacacca - ccctccccct agtcagtgca ccatccagca cgaccatgtt atctacacca - cccacctccc agtcagtgcg ccatccagca cgaccatgtc atctacacca - ccctccccct agccagtgca ccatccagca cgaccatgtg atctacacca - - tctactccac actcggggca ttttatatcc ccttgacttt gatacttatt - tctactccac acttggggca ttctacattc ccttgacttt gatactgatt - tttactccac gctgggcgcg ttttatatcc ccttgacttt gatactgatt - tttactccac acttggagcc ttttatatcc cattgacttt gatacttatt - tttactcaac attcggggca ttttatatcc ctttgacttt gatcctgatt - tttactccac actgggggcc ttttatatcc ctttgacttt gatcctgatt - tttactccac acttggggca ttttatatcc ccttgacttt gatactgatt - tttactccac actgggggca ttttatatcc ccttgacttt gatacttatt - tttactccac gctgggtgcg ttttatatcc ccttgacttt gatactgatt - tttactccac acttggagca ttttatatcc ctttgacttt gatcctgatt - tttactccac gctgggtgcg ttttatatcc ccttgacttt gatactgatt - tttattccac acttggggca ttttatatcc ccttgatatt gatactgatt - tttactccac gctgggtgcg ttttatatcc ccttgacttt aatactgatt - - ctgtattacc gaatctacca cgcggccaag agcctctacc agaaaagagg - ctctattacc ggatttacca tgcagccaag agcctttacc agaaaagagg - ctctattacc ggatttacca tgcagccaag agcctttacc agaaaagggg - ctgtattacc ggatttacca tgcagccaag agcctgtacc agaaaagagg - ctatattacc ggatttacca cgcggccaag agtctttacc agaaaagggg - ctctactaca ggatttatca tgcagccaag agcctctacc aaaaacgagg - ctctattacc ggatttacca cgcagccaag agtctttacc agaaaagagg - ctgtattacc gtatttatca tgcagccaag agcctttacc agaaaagagg - ctctattacc ggatttacca cgcggccaag agcctttacc agaaaagggg - ctctattacc ggatttacca cgcagccaag agtctttacc agaaaagggg - ctctattacc ggatttacca cgcggccaag agcctttacc agaaaagggg - ctctattacc ggatttacca tgcagccaag agcctgtacc agaaaagggg - ctctattacc ggatttacca cgcggccaag agcctttacc agaaaagggg - - atcgagccgg cacttaagca acagaagcac ggatagccaa aattcttttg - ttcaagccgg catttaagca acagaagcac agatagccaa aattcgttcg - atcaagtcgg cacttaagca acagaagcac agatagccag aattcttttg - atcaagccgg cacttaagca acagaagcac agatagccaa aattcttttg - atcaagccgc cacttgagta atagaagtac agatagccag aattctttcg - atcaagccgg cacttaagca acagaagcac agacagccaa aattcttttg - atcaagccgg cacttaagca acagaagcac agacagccaa aattcgtttg - atcaagccgg cacttaagca acagaagcac agatagccaa aattcttttg - atcaagtcgg cacttaagca acagaagcac agatagccag aattcttttg - atcgagccgg catttaagca acagaagtac agatagccag aattcttttg - atcaagtcgg cacttaagca acagaagcac agatagccag aattcttttg - atcgagccgg cacttaagca acagaagcac agatagccaa aattcttttg - atcgagtcgg cacttaagca acagaagcac agatagccag aattcttttg - - cgagttgtaa actgacacag actttctgtg tgtctgattt gtccacctca - ccagttgcaa actgacacag acgttctgtg tgtctgactt ctccacctca - caagttgtaa acttacacag actttctgtg tgtctgactt ctccacctca - cgagttgtaa gcttacacag actttctgtg tgtctgattt ctccacctca - caagttgtaa acttacacag actttctgtg tgtctgactt ctccacctca - ctagttgtaa acttacccag actttctgtg tgtctgactt ctccacctca - cgagctgtaa acttacacag actttctgtg tgtctgactt ctccacctca - cgagttgtaa acttacacag actttctgtg tgtctgattt ctccacctca - caagttgtaa acttacacag actttctgtg tgtctgactt ctccacctca - cgagttgtaa acttacacag acgttctgcg tgtctgactt ctccacctca - caagttgtaa acttacacag actttctgtg tgtctgactt ctccacctca - caagttgtaa actgacccag actttctgtg tatctgactt ctccacctca - caaattgtaa acttacacag actttctgtg tgtctgactt ctccacttca - - gaccctacca cagagtttga aaagatccac acctctatca ggatcccttc - gaccctacca cagagtttga gaagatccac acctccatta ggattcctcc - gaccctacca tagagtttga aaagttccat gcctctatca ggatcccacc - gaccctacta cagagtttga aaagatcaac acctctatca ggatcccttc - gatcctacca cagagtttga aaagatccat gcttccattc ggatcccccc - gaccctacta cagaatttga aaagatccac acttccatca ggatccctcc - gaccccacca cagagtttga aaagatccac acctccatca ggatccctcc - gaccctacca cagagtttga gaagatccac acctctatca ggatcccttc - gaccctacca cagagtttga aaagttccat gcctccatca ggatcccccc - gatcccacta cagagtttga aaagatccat acttccatcc ggatccctcc - gaccctacca cagagtttga aaagttccat gcctccatca ggatcccccc - gaccctacca cggaatttga aaaagtccac acctccatca ggattcctcc - gaccctacca cagagtttga aaagttccat gcctccatca ggatcccacc - - cttcgataat gatctagacc accccagaga acgtcagcag atctctagca - ctttgacaat gacctagatt acccaggaga acgccaacaa atctccagca - cttcgacaat gatctggatc acccgggaga acgccagcag atctctagca - cttcgacaat gatctagatc acccaggaga acgtcagcaa atctctagta - ctttgacaat gatctcgatc accctggaga acgccagcaa atttccagta - cttcgacaac gatctagatc acccaggaga acgccagcaa atctctagca - ctttgacaat gatctcgatc atccgggaga acgccagcaa atctctagta - cttcgacaat gatctagatc accctggaga acggcagcaa atctctagca - cttcgacaat gatctagatc acccaggaga acgtcagcag atctctagca - ctttgacaat gacctcgatc aacctggaga acgccagcaa atctccagta - cttcgacaat gatctagatc acccaggaga acgtcagcag atctctagca - cttcgacaat gatctagatc acccaggaga acgccagcaa atctctagta - cttcgacaat gatctagatc acccaggaga acgccagcag atttctagca - - ccagggagcg taaggcagca cgcatcctgg ggctgatttt gggggcattc - ccagggagcg caaggcagca cgaatcctgg gtctgatttt gggtgcgttc - ccagggaacg gaaggcagca cgcatcctgg ggctgattct gggtgcattc - ccagggaacg caaggcagca cgcatcctag gactgatttt gggagcattc - ccagggaacg caaggcagcg cgcatcctcg gactgatttt gggtgcattc - ccagggagcg aaaagcagca cgcatcctgg gcctgatttt gggtgcattt - ccagggagcg caaggcagca cgcatcctgg gcctgatttt gggggcgttc - ccagggagcg caaggcagca cgaatcctag gactgatttt gggtgcattc - ccagggaacg gaaggcagca cgcatcctgg gactgattct gggtgcattc - ccagggaacg caaggcagca cgcatcctcg gactgattct gggtgcattc - ccagggaacg gaaggcagca cgcatcctgg ggctgattct gggtgcattc - ccagggagcg taaagcagca cgcatcctgg gcctgatttt gggtgcattc - ccagggaacg gaaggcagcg cgcatcctgg ggttgattct gggcgcattc - - attttgtcgt ggctgccatt tttcatcaaa gagttgattg taggtctgag - atcttatcct ggctgccatt cttcatcaaa gagttgatcg taggtctgag - attttgtcct ggctgccatt tttcatcaaa gagttgattg tgggtctgag - attttgtcat ggctgccatt tttcatcaag gagctgattg taggtctgag - attttgtctt ggcttccatt ttttatcaaa gagttaattg taggtctgag - attttgtcct ggcttccatt ttttatcaag gaattgattg taggtctgag - attttgtcgt ggctgccatt tttcatcaaa gagttgattg taggtctgag - attttgtcat ggctgccatt tttcatcaaa gagttgattg taggtctgag - attttatcct ggctgccatt tttcatcaaa gagttgattg tgggtttgag - attttgtctt ggcttccgtt ttttatcaaa gagttgattg taggtctgag - attttatcct ggctgccatt tttcatcaaa gagttgattg tgggtctgag - attttgtctt ggctgccatt tttcatcaaa gaattgattg taggtctgag - attttgtcct ggctgccatt tttcatcaaa gagttgattg tgggtctgag - - catctacaca gtgtcctctg aagtggctga ttttttgacg tggcttggtt - cacctatgct gtgtcctccg aagtggctga ttttttgacc tggcttggtt - catccacacc gtgtcctcag aagtggccga ctttctgaca tggctcggtt - catctacaca gtgtcctctg aagtggctga ttttctgacg tggcttggct - catttacact gtatcctctg aagtgggtga ctttttgaca tggcttggtt - catatgcact gtgtcctctg aagtagctga cttcttgacc tggcttggtt - catctacacc gtgtcctccg gagtggctga ttttttgaca tggcttggtt - catctataca gtgtcctctg aagtggctga ttttttgacg tggctcggtt - catctacacc gtgtcctcgg aagtggccga ctttctgacg tggctcggtt - catttacact gtgtcctccg aagtgggtga ttttttgaca tggctcggtt - catctacacc gtgtcctcgg aagtggccga ctttctgacg tggctcggtt - catttacact gtgtcctctg aagtggctga ctttttgaca tggcttggtt - catctacacc gtgtcctcgg aagtggccga ttttctgacg tggctcggtt - - acgtgaattc tctgatcaac cctctgctct acactagttt caatgaagac - atgtgaattc tctgatcaac cctctgctct acacaagttt caatgaagac - atgttaattc tctgatcaac cctctgctct acacaagttt taatgaagac - atgttaattc tctgatcaac cctctgctct acacaagttt taatgaagac - atgttaattc tctgatcaat ccattgctgt acacaagttt taatgaagac - atgtgaattc tctgattaac cccctgctct acacgagttt taatgaagac - atgttaattc tctgatcaac cctctgctct acacaagttt taatgaggac - atgttaattc tctgatcaac cctctgctct acacaagttt taatgaagac - atgtgaattc tctgatcaac cctctgctct atacgagttt taatgaagac - atgttaattc tctgatcaac ccactgctgt acacaagttt taatgaagac - atgtgaattc tctgatcaac cctctgctct atacgagttt taatgaagac - atgttaattc tctgatcaac cctctgctct acacaagttt taatgaagac - atgtcaattc tctgatcaac cctctgctct atacgagttt taatgaagat - - tttaagctgg cttttaaaaa gctaattaag tgccgagaac acacttag - tttaaactgg cttttaaaaa gcttattcgg tgccgagaac atacttag - tttaagctgg cttttaaaaa gctcattagg tgccgagagc atacttag - tttaagctag cttttaaaaa gctaattaag tgtcgagaac atacttag - tttaaactgg cttttaaaaa gctcattagg tgccgagagc atacttag - tttaagcggg cctttaaaag gcttattagg tgccgagaac atgcatag - tttaagctgg cttttaaaaa gctcattagg tgccgagaac atacttag - tttaagctgg cttttaaaaa gctcattaag tgccgagaac atacttag - tttaagctgg cttttaaaaa gctcattaga tgccgagagc atacttag - tttaaactgg cttttaaaaa gctcattaga tgccgagagc atacctag - tttaagctgg cttttaaaaa gctcattaga tgccgagagc atacttag - tttaagctgg cttttaaaaa gctcattagg tgccgagaac acacctag - tttaagctgg cttttaaaaa gctcattaga tgccgagagc atgcttag diff --git a/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml_test/HTR1E_aligned.phy_phyml_stats.txt b/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml_test/HTR1E_aligned.phy_phyml_stats.txt deleted file mode 100644 index d21743b3..00000000 --- a/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml_test/HTR1E_aligned.phy_phyml_stats.txt +++ /dev/null @@ -1,43 +0,0 @@ - - oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo - --- PhyML 20120412 --- - http://www.atgc-montpellier.fr/phyml - Copyright CNRS - Universite Montpellier II - oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo - -. Sequence filename: HTR1E_aligned.phy -. Data set: #1 -. Tree topology search : NNIs -. Initial tree: BioNJ -. Model of nucleotides substitution: HKY85 -. Number of taxa: 13 -. Log-likelihood: -4405.01073 -. Unconstrained likelihood: -3468.32718 -. Parsimony: 592 -. Tree size: 0.65942 -. Discrete gamma model: Yes - - Number of categories: 4 - - Gamma shape parameter: 0.267 -. Transition/transversion ratio: 6.511 -. Nucleotides frequencies: - - f(A)= 0.24205 - - f(C)= 0.27869 - - f(G)= 0.21690 - - f(T)= 0.26237 - -. Run ID: none -. Random seed: 1483988035 -. Subtree patterns aliasing: no -. Version: 20120412 -. Time used: 0h0m3s (3 seconds) - - oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo - Suggested citations: - S. Guindon, JF. Dufayard, V. Lefort, M. Anisimova, W. Hordijk, O. Gascuel - "New algorithms and methods to estimate maximum-likelihood phylogenies: assessing the performance of PhyML 3.0." - Systematic Biology. 2010. 59(3):307-321. - - S. Guindon & O. Gascuel - "A simple, fast, and accurate algorithm to estimate large phylogenies by maximum likelihood" - Systematic Biology. 2003. 52(5):696-704. - oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo diff --git a/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml_test/HTR1E_aligned.phy_phyml_tree.txt b/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml_test/HTR1E_aligned.phy_phyml_tree.txt deleted file mode 100644 index f5f9e2e5..00000000 --- a/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml_test/HTR1E_aligned.phy_phyml_tree.txt +++ /dev/null @@ -1 +0,0 @@ -(Gorilla:0.00379882,Homo:0.00284497,(Macaca:0.02315360,(Callithrix:0.01678371,((Cavia:0.05116538,Heterocephalus:0.04811276)1.000000:0.05444995,((Felis:0.02299512,(Ailuropoda:0.05081929,Canis:0.02979235)0.737000:0.00294017)0.991000:0.01704465,((Echinops:0.07310532,Loxodonta:0.04975882)0.936000:0.01493140,(Bos:0.06863939,Equus:0.04259596)0.744000:0.00638656)0.785000:0.00293474)0.897000:0.01025131)1.000000:0.04342394)0.937000:0.01365085)0.970000:0.00984289); diff --git a/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml_test/phyml_test.py b/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml_test/phyml_test.py deleted file mode 100644 index fdada500..00000000 --- a/OrthoEvol/Orthologs/Phylogenetics/PhyML/phyml_test/phyml_test.py +++ /dev/null @@ -1,67 +0,0 @@ -"""Test the PhyML executable. -https://github.com/biopython/biopython/blob/master/Tests/test_phyml_tool.py -""" -import sys -import os -import unittest -from Bio import Phylo -from Bio.Phylo.Applications import PhymlCommandline -from Bio import MissingExternalDependencyError - - -class PhymlTest(unittest.TestCase): - """Test for application wrapper.""" - - def __init__(self): - # Try to avoid problems when the OS is in another language - os.environ['LANG'] = 'C' - - phyml_exe = None - exename = "PhyML-3.1_win32.exe" if sys.platform == "win32" else "phyml" - from Bio._py3k import getoutput - try: - output = getoutput(exename + " --version") - if "not found" not in output and "20" in output: - phyml_exe = exename - except OSError: - # Python 2.6 or 2.7 on Windows XP: - # WindowsError: [Error 2] The system cannot find the file specified - # Python 3.3 or 3.4 on Windows XP: - # FileNotFoundError: [WinError 2] The system cannot find the file - # specified - pass - - if not phyml_exe: - raise MissingExternalDependencyError( - "Install PhyML 3.0 if you want to use the \ - Bio.Phylo.Applications wrapper.") - - # Example Phylip file with 13 aligned protein sequences - EX_PHYLIP = 'HTR1E_aligned.phy' - self.EX_PHYLIP = EX_PHYLIP - - def test_phyml(self): - """Run PhyML using the wrapper.""" - - cmd = PhymlCommandline( - self.phyml_exe, - input=self.EX_PHYLIP, - datatype='nt') - # Smoke test - try: - out, err = cmd() - self.assertTrue(len(out) > 0) - self.assertEqual(len(err), 0) - # Check the output tree - tree = Phylo.read(self.EX_PHYLIP + '_phyml_tree.txt', 'newick') - self.assertEqual(tree.count_terminals(), 13) - finally: - # Clean up generated files - for suffix in ['_phyml_tree.txt', '_phyml_stats.txt']: - fname = self.EX_PHYLIP + suffix - if os.path.isfile(fname): - os.remove(fname) - - -if __name__ == '__main__': - unittest.main() diff --git a/OrthoEvol/Orthologs/Phylogenetics/Phylip/README.md b/OrthoEvol/Orthologs/Phylogenetics/Phylip/README.md new file mode 100644 index 00000000..e3c6de37 --- /dev/null +++ b/OrthoEvol/Orthologs/Phylogenetics/Phylip/README.md @@ -0,0 +1,29 @@ +# Phylip Documentation + +PHYLIP (the PHYLogeny Inference Package) is a package of programs for inferring +phylogenies (evolutionary trees). Methods that are available in the package +include parsimony, distance matrix, and likelihood methods, including +bootstrapping and consensus trees. Data types that can be handled include +molecular sequences, gene frequencies, restriction sites and fragments, +distance matrices, and discrete characters. + +Learn more about Phylip [here](http://evolution.genetics.washington.edu/phylip.html). + +## Examples + +### Running Phylip + +```python +from OrthoEvol.Orthologs.Phylogenetics.Phylip import Phylip + +htr1a = Phylip(infile='HTR1A.phy') + +# Generate a distance matrix +htr1a.dnadist(outfile="htr1a_dist.txt") +``` + +### Running Phylip with our parallel module + +```python + +``` diff --git a/OrthoEvol/Orthologs/Phylogenetics/Phylip/__init__.py b/OrthoEvol/Orthologs/Phylogenetics/Phylip/__init__.py index feb8f9bb..96c2161f 100644 --- a/OrthoEvol/Orthologs/Phylogenetics/Phylip/__init__.py +++ b/OrthoEvol/Orthologs/Phylogenetics/Phylip/__init__.py @@ -1,6 +1,6 @@ """Phylip tools.""" -from .orthophylip import Phylip +from .phylip import Phylip # Make this explicit, then they show up in the API docs __all__ = ("Phylip", diff --git a/OrthoEvol/Orthologs/Phylogenetics/Phylip/orthophylip.py b/OrthoEvol/Orthologs/Phylogenetics/Phylip/orthophylip.py deleted file mode 100644 index 7aff8ef5..00000000 --- a/OrthoEvol/Orthologs/Phylogenetics/Phylip/orthophylip.py +++ /dev/null @@ -1,61 +0,0 @@ -import os -import pexpect # I used this to feed input into shell executable -import sys -# TODO Create better wrappers. - - -class Phylip(object): - """A class that serves as a wrapper for the Phylip Excecutable.""" - - def __init__(self, inputfile): - """The input file should be a phylip formatted multiple sequence alignment. - - :param inputfile: Input a phylip formatted multiple sequence alignment. - """ - - self._rename = os.rename - if sys.platform == 'win32' or 'win64': - sys.exit("This module is strictly for use on Linux at the moment.") - - self.inputfile = inputfile - - # Rename the input file to infile - self._rename(self.inputfile, "infile") - self.inputfile = "infile" - - def dnapars(self, outfile, outtree): - """Generate a maximum parsimony tree using dnapars. - - :param outfile: Standard output filename. - :param outtree: Name of maximum parsimony tree. - """ - - dnapars = pexpect.spawnu("dnapars infile") - dnapars.sendline("Y\r") - dnapars.waitnoecho() - self._rename("outfile", outfile + "_dnapars_output") - self._rename("outtree", outtree + "_maxparsimony_tree") - - def dnaml(self, outfile, outtree): - """Generate a maximum likelihoood tree using dnapaml. - - :param outfile: Standard output filename. - :param outtree: Name of maximum likelihoood tree. - """ - - dnaml = pexpect.spawnu("dnaml infile") - dnaml.sendline("Y\r") - dnaml.waitnoecho() - self._rename("outfile", outfile + "_dnaml_output") - self._rename("outtree", outtree + "_maxlikelihood_tree") - - def dnadist(self, dnadist_output): - """Generate a distance matrix using dnadist. - - :param dnadist_output: Dnadist output filename. - """ - - dnadist = pexpect.spawnu("dnadist infile") - dnadist.sendline("Y\r") - dnadist.waitnoecho() - self._rename("outfile", dnadist_output + "_dnadist") diff --git a/OrthoEvol/Orthologs/Phylogenetics/Phylip/phylip.py b/OrthoEvol/Orthologs/Phylogenetics/Phylip/phylip.py new file mode 100644 index 00000000..ceba79cd --- /dev/null +++ b/OrthoEvol/Orthologs/Phylogenetics/Phylip/phylip.py @@ -0,0 +1,112 @@ +import os +import sys +import shutil + +import pexpect # I used this to feed input into shell executable +from Bio import AlignIO + +from OrthoEvol.Tools.logit import LogIt + + +class Phylip(object): + """A class that serves as a wrapper for the Phylip excecutable.""" + + def __init__(self, infile): + """Initialize the Phylip class. + + :param infile: A phylip formatted multiple sequence alignment. + """ + if self._validate_format(infile): + self.infile = infile + self._rename = os.rename + # Set up logging + self.phylip_log = LogIt().default(logname="Phylip", logfile=None) + # Raise error is OS is not linux + if sys.platform != 'linux': + err_msg = "This module is strictly for use on Linux at the moment." + raise OSError(err_msg) + + def _validate_format(self, infile): + """Validate the format of the Phylip file + + :param infile: A phylip formatted multiple sequence alignment. + :type infile: str + """ + try: + AlignIO.read(open(infile), "phylip") + except ValueError as e: + self.phylip_log.exception(e) + else: + return True + # TODO: Return an exception? + return False + + def _temp_infile(self, infile): + """Create a temporary infile named infile. + + :param infile: A phylip formatted multiple sequence alignment. + """ + shutil.copyfile(infile, "infile") + temp_infile = "infile" + return temp_infile + + def dnapars(self, outfile, outtree): + """Generate a maximum parsimony tree using dnapars. + + :param outfile: Standard output filename. + :param outtree: Name of maximum parsimony tree. + """ + infile = self._temp_infile(infile=self.infile) + try: + dnapars = pexpect.spawnu("dnapars %s" % infile) + dnapars.sendline("Y\r") + dnapars.waitnoecho() + except pexpect.EOF as e: + self.phylip_log.error(dnapars.read()) + self.phylip_log.exception(e) + else: + self.phylip_log.info(dnapars.read()) + self._rename("outfile", outfile) + self._rename("outtree", outtree) + finally: + os.remove(infile) + + def dnaml(self, outfile, outtree): + """Generate a maximum likelihoood tree using dnapaml. + + :param outfile: Standard output filename. + :param outtree: Name of maximum likelihoood tree. + """ + infile = self._temp_infile(infile=self.infile) + try: + dnaml = pexpect.spawnu("dnaml %s" % infile) + dnaml.sendline("Y\r") + dnaml.waitnoecho() + except pexpect.EOF as e: + self.phylip_log.error(dnaml.read()) + self.phylip_log.exception(e) + else: + self.phylip_log.info(dnaml.read()) + self._rename("outfile", outfile) + self._rename("outtree", outtree) + finally: + os.remove(infile) + + def dnadist(self, outfile): + """Generate a distance matrix using dnadist. + + :param outfile: distance matrix output filename. + """ + infile = self._temp_infile(infile=self.infile) + try: + dnadist = pexpect.spawnu("dnadist %s" % infile) + dnadist.sendline("Y\r") + dnadist.waitnoecho() + except pexpect.EOF as e: + self.phylip_log.error(dnadist.read()) + self.phylip_log.exception(e) + else: + self.phylip_log.info(dnadist.read()) + self._rename("outfile", outfile) + finally: + os.remove(infile) diff --git a/OrthoEvol/Orthologs/Phylogenetics/Phylip/phylip_test/phylip_test.py b/OrthoEvol/Orthologs/Phylogenetics/Phylip/phylip_test/phylip_test.py deleted file mode 100644 index a48ec4bd..00000000 --- a/OrthoEvol/Orthologs/Phylogenetics/Phylip/phylip_test/phylip_test.py +++ /dev/null @@ -1 +0,0 @@ -# TODO Write a test to make sure phylip works diff --git a/OrthoEvol/Orthologs/Phylogenetics/PhyloTree/treeviz.py b/OrthoEvol/Orthologs/Phylogenetics/PhyloTree/treeviz.py deleted file mode 100644 index 8ab94515..00000000 --- a/OrthoEvol/Orthologs/Phylogenetics/PhyloTree/treeviz.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Import a newick formatted tree txt file and view it.""" -import warnings - -from Bio import Phylo - -from OrthoEvol.Orthologs import OrthologsDevelopmentWarning - -# Warn users about this module -warnings.warn('This module is still under development and ' - 'may undergo significant changes prior to its official release.', - OrthologsDevelopmentWarning) - - -class TreeViz(object): - """Tools that allow visualization of a newick formatted tree.""" - - def __init__(self, path2tree, treeformat='newick'): - """Import the path to the tree. - - :param path2tree: Path to your tree file. - :param treeformat: (Default value = 'newick') - """ - self.path2tree = path2tree - self.treeformat = treeformat - self.tree = Phylo.read(self.path2tree, self.treeformat) - - def drawtree(self): - """Import a newick formatted tree and visualize it.""" - - Phylo.draw(self.tree) diff --git a/OrthoEvol/Orthologs/Phylogenetics/PhyloTree/README.md b/OrthoEvol/Orthologs/Phylogenetics/TreeViz/README.md similarity index 52% rename from OrthoEvol/Orthologs/Phylogenetics/PhyloTree/README.md rename to OrthoEvol/Orthologs/Phylogenetics/TreeViz/README.md index f82550c9..d4524c51 100644 --- a/OrthoEvol/Orthologs/Phylogenetics/PhyloTree/README.md +++ b/OrthoEvol/Orthologs/Phylogenetics/TreeViz/README.md @@ -1,16 +1,21 @@ # PhyloTree Documentation + PhlyoTree is a simple and useful module to help quickly view and create phylogenetic trees from existing tree files. ## Example -### Draw a newick formatted tree +### Draw and save a newick formatted tree + ```python -from OrthoEvol.Orthologs.Phylogenetics.PhyloTree import TreeViz +from OrthoEvol.Orthologs.Phylogenetics.TreeViz import TreeViz -TreeViz(path2tree='path/to/newick/tree', treeformat='newick') -``` +t = TreeViz(path='tree.txt', tree_format='newick') +t.draw_tree() +t.save_tree('example.png') +``` ## Notes + THIS MODULE IS UNDER DEVELOPMENT!!!! \ No newline at end of file diff --git a/OrthoEvol/Orthologs/Phylogenetics/PhyloTree/__init__.py b/OrthoEvol/Orthologs/Phylogenetics/TreeViz/__init__.py similarity index 100% rename from OrthoEvol/Orthologs/Phylogenetics/PhyloTree/__init__.py rename to OrthoEvol/Orthologs/Phylogenetics/TreeViz/__init__.py diff --git a/OrthoEvol/Orthologs/Phylogenetics/TreeViz/treeviz.py b/OrthoEvol/Orthologs/Phylogenetics/TreeViz/treeviz.py new file mode 100644 index 00000000..d7a39381 --- /dev/null +++ b/OrthoEvol/Orthologs/Phylogenetics/TreeViz/treeviz.py @@ -0,0 +1,62 @@ +"""Import a newick formatted tree txt file and view it.""" +import warnings + +from Bio import Phylo +from ete3 import Tree +import matplotlib.pyplot as plt + +from OrthoEvol.Orthologs import OrthologsDevelopmentWarning + + +class TreeViz(object): + """Tools that allow visualization of a newick formatted tree.""" + + def __init__(self, path, tree_format='newick'): + """Initialize the class. + + :param path: The path to your tree file. + :type path: str + :param tree_format: The format of the tree, default value = 'newick' + :type tree_format: str + :return: A Bio.Phylo tree object + """ + # Warn users about this module + warnings.warn('This module is still under development and ' + 'may undergo significant changes prior to its official ' + 'release.', OrthologsDevelopmentWarning) + self.path = path + self.tree_format = tree_format + # Read the tree + self.tree = self.read_tree(path=path, tree_format=tree_format) + + def read_tree(self, path, tree_format): + """Read the phylogenetic tree. + + :param path: The path to your tree file. + :type path: str + :param tree_format: The format of the tree, defaults to "newick" + :type tree_format: str + """ + tree = Phylo.read(file=self.path, format=self.tree_format) + return tree + + def draw_tree(self, drawing_type="default", auto_show=False): + """Import a newick formatted tree and visualize it. + + :param drawing_type: The type of drawing to create, defaults to "default" + :type drawing_type: str, optional + """ + if drawing_type == "ascii": + Phylo.draw_ascii(self.tree) + elif drawing_type == "graphviz": + Phylo.draw_graphviz(self.tree) + elif drawing_type == "default": + Phylo.draw(tree=self.tree, do_show=auto_show) + + def save_tree(self, filename): + """Save the tree image. + + :param filename: The name of the image file. + :type filename: str + """ + plt.savefig(fname=filename) diff --git a/OrthoEvol/Orthologs/Phylogenetics/__init__.py b/OrthoEvol/Orthologs/Phylogenetics/__init__.py index b7a39442..07b4821a 100644 --- a/OrthoEvol/Orthologs/Phylogenetics/__init__.py +++ b/OrthoEvol/Orthologs/Phylogenetics/__init__.py @@ -4,7 +4,7 @@ from OrthoEvol.Orthologs import OrthologsWarning from OrthoEvol.Orthologs.Phylogenetics.PAML import ETE3PAML -from OrthoEvol.Orthologs.Phylogenetics.PhyloTree import TreeViz +from OrthoEvol.Orthologs.Phylogenetics.TreeViz import TreeViz from OrthoEvol.Orthologs.Phylogenetics import PhyML from OrthoEvol.Orthologs.Phylogenetics import Phylip from OrthoEvol.Orthologs.Phylogenetics.IQTree import IQTreeCommandline diff --git a/OrthoEvol/Pipeline/blastpipeline.py b/OrthoEvol/Pipeline/blastpipeline.py index 6807bb45..c83b5e4a 100644 --- a/OrthoEvol/Pipeline/blastpipeline.py +++ b/OrthoEvol/Pipeline/blastpipeline.py @@ -8,7 +8,7 @@ # This is more pythonic with YAML loading -Blast_config = { +blast_config = { "taxon_file": None, "go_list": None, "post_blast": True, @@ -19,7 +19,7 @@ } -myblast = OrthoBlastN(proj_mana=None, project="sdh-test", project_path=os.getcwd(), **Blast_config) +myblast = OrthoBlastN(proj_mana=None, project="sdh-test", project_path=os.getcwd(), **blast_config) # TIP Works on linux diff --git a/OrthoEvol/Tools/logit/logit.py b/OrthoEvol/Tools/logit/logit.py index c246edaa..9975d2d7 100644 --- a/OrthoEvol/Tools/logit/logit.py +++ b/OrthoEvol/Tools/logit/logit.py @@ -2,7 +2,7 @@ import os import sys from logzero import setup_logger, LogFormatter, logging, colors - +from logging import CRITICAL, ERROR, WARNING, INFO, DEBUG class LogIt(object): """LogIt makes logging easier by creating easy loggers.""" @@ -18,13 +18,19 @@ def __init__(self): self._date_format = '%b-%d-%Y at %I:%M:%S %p' # Used to add date self._log_format = ("%(color)s[%(levelname)s | %(name)s] [%(asctime)s | " "%(module)s - line %(lineno)d]:%(end_color)s %(message)s") + + # Add custom colors for CRITICAL and DEBUG + self._COLORS = {DEBUG: colors.Fore.LIGHTBLUE_EX, + INFO: colors.Fore.GREEN, + WARNING: colors.Fore.YELLOW, + ERROR: colors.Fore.RED, + CRITICAL: colors.Fore.LIGHTRED_EX + } + self._formatter = LogFormatter(fmt=self._log_format, - datefmt=self._date_format) + datefmt=self._date_format, + colors=self._COLORS) - # Add a color for the critical level - self._formatter.DEFAULT_COLORS[50] = colors.Fore.LIGHTRED_EX - # Changed color for the debug level - self._formatter.DEFAULT_COLORS[10] = colors.Fore.LIGHTBLUE_EX self.logging = logging def default(self, logname, logfile): diff --git a/OrthoEvol/Tools/parallel/README.md b/OrthoEvol/Tools/parallel/README.md index fbb10d3a..acaf0786 100644 --- a/OrthoEvol/Tools/parallel/README.md +++ b/OrthoEvol/Tools/parallel/README.md @@ -1,4 +1,5 @@ # Parallel Documentation + The parellel module is home to the `Multiprocess` class which uses python's native multiprocessing module. Find more information [here](https://docs.python.org/3.6/library/multiprocessing.html). It will soon be home to [MPI (Message Passing Interface)](http://mpi4py.readthedocs.io/en/stable/) which is also a form of parallel computing. @@ -12,10 +13,10 @@ using clustering or SGE (Sun Grid Engine). We have a [sge module](https://github ## Examples -### A Random Example +### A Simple Example ```python -from OrthoEvol.Tools import Multiprocess +from OrthoEvol.Tools.parallel import Multiprocess def printwords(word): @@ -26,5 +27,5 @@ words = ['python', 'rust', 'javascript'] if __name__ == '__main__': mp = Multiprocess() - mp.map_to_function(printwords, words) + mp.map_to_function(printwords, words, processors=8) ``` diff --git a/OrthoEvol/Tools/parallel/multiprocess.py b/OrthoEvol/Tools/parallel/multiprocess.py index d84bfec7..2ec09d18 100644 --- a/OrthoEvol/Tools/parallel/multiprocess.py +++ b/OrthoEvol/Tools/parallel/multiprocess.py @@ -11,11 +11,10 @@ class Multiprocess(object): """Use multiple processes with a function.""" - cpus = cpu_count() - num_procs = cpus - 1 - def __init__(self): - pass + """Initialize variables that will be used later.""" + self.cpus = cpu_count() + self.num_procs = self.cpus - 1 @staticmethod def _logger(): @@ -23,7 +22,6 @@ def _logger(): :return: Returns a multiprocessing logger. """ - multiprocess_handler = get_logger() multiprocess_handler = logging.StreamHandler() multiprocess_handler.setLevel(logging.ERROR) @@ -34,17 +32,26 @@ def _logger(): logger = logzero.logger return logger - def map_to_function(self, function, iterable): + def map_to_function(self, function, iterable, procs=None): + """Start a pool to run your function with a list. :param function: Input a python function. :param iterable: Input a list or dictionary to map to the function. + :param procs: The number of processors to use in the pool. """ - + # If the user has noted a number of processors, use them. + # If not, the available processors (minus 1) are used. + if procs and isinstance(procs, int): + self.num_procs = procs log = self._logger() # Start the logger time_secs = time() + + # Create a pool of processors with Pool(processes=self.num_procs) as pool: pool.map(function, iterable) minutes = (time() - time_secs) / 60 + + # Log how long it takes log.info("Took %s minutes to complete.", minutes) logging.shutdown() # Shutdown the logger. diff --git a/OrthoEvol/Tools/pybasher/__init__.py b/OrthoEvol/Tools/pybasher/__init__.py index ea4d56ce..f8646533 100644 --- a/OrthoEvol/Tools/pybasher/__init__.py +++ b/OrthoEvol/Tools/pybasher/__init__.py @@ -1 +1 @@ -from .bash import BaseBash \ No newline at end of file +from .bash import PyBasher \ No newline at end of file diff --git a/OrthoEvol/Tools/pybasher/bash.py b/OrthoEvol/Tools/pybasher/bash.py index fcc9f299..3e7631fa 100644 --- a/OrthoEvol/Tools/pybasher/bash.py +++ b/OrthoEvol/Tools/pybasher/bash.py @@ -104,8 +104,152 @@ class PyBasher(BaseBash): def __init__(self): super().__init__() - def cp(self): + def cp(self, source, destination): """Copy file.""" - cmd = '' + cmd = f"cp {source} {destination}" self._bash(cmd) + + def mv(self, source, destination): + """Move file.""" + + cmd = f"mv {source} {destination}" + self._bash(cmd) + + def rm(self, path): + """Delete file.""" + + cmd = f"rm {path}" + self._bash(cmd) + + def mkdir(self, path): + """Create directory.""" + + cmd = f"mkdir {path}" + self._bash(cmd) + + def rmdir(self, path): + """Delete empty directory.""" + + cmd = f"rmdir {path}" + self._bash(cmd) + + def touch(self, path): + """Create empty file.""" + + cmd = f"touch {path}" + self._bash(cmd) + + def ls(self, path): + """List directory contents.""" + + cmd = f"ls {path}" + self._bash(cmd) + + def cat(self, path): + """Display contents of file.""" + + cmd = f"cat {path}" + self._bash(cmd) + + def pwd(self): + """Print current working directory.""" + + cmd = "pwd" + self._bash(cmd) + + def cd(self, path): + """Change current working directory.""" + + cmd = f"cd {path}" + self._bash(cmd) + + def grep(self, pattern, path): + """Search for pattern in file.""" + + cmd = f"grep {pattern} {path}" + self._bash(cmd) + + def chmod(self, permissions, path): + """Change file permissions.""" + + cmd = f"chmod {permissions} {path}" + self._bash(cmd) + + def chown(self, owner, path): + """Change file owner.""" + + cmd = f"chown {owner} {path}" + self._bash(cmd) + + def find(self, path, pattern): + """Search for files matching pattern in path.""" + + cmd = f"find {path} -name {pattern}" + self._bash(cmd) + + def tar(self, action, options, archive, files): + """Create or extract tar archive.""" + + cmd = f"tar {action} {options} {archive} {files}" + self._bash(cmd) + + def unzip(self, archive, destination): + """Extract zip archive.""" + + cmd = f"unzip {archive} -d {destination}" + self._bash(cmd) + + def zip(self, options, archive, files): + """Create zip archive.""" + + cmd = f"zip {options} {archive} {files}" + self._bash(cmd) + + def du(self, path): + """Display disk usage statistics.""" + + cmd = f"du {path}" + self._bash(cmd) + + def df(self, path): + """Display free disk space.""" + + cmd = f"df {path}" + self._bash(cmd) + + def top(self, options): + """Display top-running processes.""" + + cmd = f"top {options}" + self._bash(cmd) + + def ps(self, options): + """Display running processes.""" + + cmd = f"ps {options}" + self._bash(cmd) + + def kill(self, signal, process_id): + """Send signal to process.""" + + cmd = f"kill {signal} {process_id}" + self._bash(cmd) + + def man(self, command): + """Display manual for command.""" + + cmd = f"man {command}" + self._bash(cmd) + + def info(self, command): + """Display information for command.""" + + cmd = f"info {command}" + self._bash(cmd) + + def history(self, options): + """Display command history.""" + + cmd = f"history {options}" + self._bash(cmd) \ No newline at end of file diff --git a/OrthoEvol/utilities.py b/OrthoEvol/utilities.py index bd96fc15..0b4a26c4 100644 --- a/OrthoEvol/utilities.py +++ b/OrthoEvol/utilities.py @@ -551,7 +551,7 @@ def multi_fasta_sort(self, target_file, man_file, output_file): :return: A multi-FASTA file with sorted sequences. :rtype: str. """ - # TODO-ROB: Check for duplicates. + # TODO: Check for duplicates. with TemporaryFile('r+', dir=str(Path(target_file).parent)) as tmp_file: aln = MultipleSeqAlignment([]) diff --git a/README.rst b/README.rst index 5bd78523..994e60aa 100644 --- a/README.rst +++ b/README.rst @@ -1,5 +1,5 @@ -.. image:: https://travis-ci.org/datasnakes/OrthoEvolution.svg?branch=master - :target: https://travis-ci.org/datasnakes/OrthoEvolution +.. image:: https://app.travis-ci.com/datasnakes/OrthoEvolution.svg?branch=master + :target: https://app.travis-ci.com/datasnakes/OrthoEvolution .. image:: https://badge.fury.io/py/OrthoEvol.svg :target: https://badge.fury.io/py/OrthoEvol @@ -13,8 +13,6 @@ .. image:: https://badgen.net/github/last-commit/datasnakes/OrthoEvolution :target: https://github.com/datasnakes/OrthoEvolution/commits/master -.. image:: https://img.shields.io/badge/chat-on%20gitter-753A88.svg - :target: https://gitter.im/datasnakes/OrthoEvolution diff --git a/examples/standalone-scripts/ncbi-download.py b/examples/standalone-scripts/ncbi-download.py index 43e3b148..882fffd4 100644 --- a/examples/standalone-scripts/ncbi-download.py +++ b/examples/standalone-scripts/ncbi-download.py @@ -1,6 +1,5 @@ #!/usr/bin/env python """This standalone script downloads files from NCBI's ftp.""" -from OrthoEvol.Tools.ftp import NcbiFTPClient import os import fnmatch from subprocess import call, CalledProcessError @@ -9,54 +8,75 @@ import textwrap import sys +from OrthoEvol.Tools.ftp import NcbiFTPClient + # Raise an error if you're not on linux. Windows generally doesn't have wget. -if 'linux' not in str(sys.platform): +if sys.platform != 'linux': msg = 'This interface is not intended for use on your platform.' raise NotImplementedError(msg) -def main(email, dbtype, dbname, preformatted, num_procs=8): +def write_to_file(hostname, dbname, dbpath, filenames): + # Create a for loop that writes the list/text file of files wanted + with open('downloadlist.txt', 'w') as downloads: + for filename in filenames: + # Get only those files. + if fnmatch.fnmatch(filename, dbname + '*'): + refseq_file = os.path.join(filename) + # Write the url of each refseq_rna db file to a text file. + downloads.writelines(hostname + dbpath + refseq_file + '\n') + # use elif here to get the taxdb.tar.gz file. + elif fnmatch.fnmatch(filename, 'taxdb*'): + taxdb_file = os.path.join(filename) + downloads.writelines(hostname + dbpath + taxdb_file + '\n') + + +def main(email, dbtype, dbname, num_procs=8): + """[summary] + + :param email: [description] + :type email: [type] + :param dbtype: [description] + :type dbtype: [type] + :param dbname: [description] + :type dbname: [type] + :param num_procs: The number of processors to use, defaults to 8 + :type num_procs: int, optional + :raises NotImplementedError: [description] + """ ncbiftp = NcbiFTPClient(email=email) log = ncbiftp.ncbiftp_log - accepted = ['yes', 'Yes', 'y', 'Y'] + if dbtype == 'blastdbv5': + # This is a list of the file names in the current directory + dbpath = ncbiftp.blastdbv5_path + filenames = ncbiftp.listfiles(dbpath) - if dbtype == 'blastdb' and preformatted in accepted: + write_to_file(ncbiftp.ftp.host, dbname, dbpath, filenames) + elif dbtype == 'blastdb': # This is a list of the file names in the current directory dbpath = ncbiftp.blastdb_path filenames = ncbiftp.listfiles(dbpath) - # Create a for loop that writes the list/text file of files wanted - with open('downloadlist.txt', 'w') as downloads: - for filename in filenames: - if fnmatch.fnmatch(filename, dbname + '*'): # Get only those files. - refseq_file = os.path.join(filename) - # Write the url of each refseq_rna db file to a text file. - downloads.writelines(ncbiftp.ftp.host + dbpath + refseq_file + '\n') - # use elif here to get the taxdb.tar.gz file. - elif fnmatch.fnmatch(filename, 'taxdb*'): - taxdb_file = os.path.join(filename) - downloads.writelines(ncbiftp.ftp.host + dbpath + taxdb_file + '\n') - - elif preformatted not in accepted: - raise NotImplementedError('Non-formatted databases are NOT unsupported.') - + write_to_file(ncbiftp.ftp.host, dbname, dbpath, filenames) else: raise NotImplementedError('That database is unsupported.') # Download the list of files using 'wget' on linux/unix with contextlib.suppress(os.error): - cmd = 'cat downloadlist.txt | xargs -n 1 -P ' + int(num_procs) + ' wget' + cmd = 'cat downloadlist.txt | xargs -n 1 -P ' + \ + int(num_procs) + ' wget' status = call([cmd], shell=True) if status == 0: - log.info("The %s blast db files have downloaded." % dbname) + log.info("The %s %s files have downloaded." % (dbname, dbtype)) else: log.error(CalledProcessError) ncbiftp.close_connection() ncbiftp.close_connection() + if __name__ == '__main__': parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, @@ -71,9 +91,7 @@ def main(email, dbtype, dbname, preformatted, num_procs=8): parser.add_argument('-dbtype', '--database-type', help='Enter the name of the NCBI database.', required=True) - parser.add_argument('-dbname', '--database-name', help='Respond with yes or no', - required=True) - parser.add_argument('-p', '--preformatted', help='Respond with yes or no', + parser.add_argument('-dbname', '--database-name', help='The name or seqtype of the database', required=True) parser.add_argument('-n', '--num-procs', help='Enter the number of processors to use to download the files', @@ -81,4 +99,4 @@ def main(email, dbtype, dbname, preformatted, num_procs=8): args = parser.parse_args() - main(args.email, args.dbtype, args.dbtype, args.preformatted, args.num_procs) \ No newline at end of file + main(args.email, args.dbtype, args.dbname, args.num_procs) diff --git a/requirements.txt b/requirements.txt index ad729e32..dc0c8dee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +matplotlib tqdm==4.25.0 ete3==3.0.0b35 pandas>=0.25.3 @@ -6,20 +7,18 @@ pexpect==4.4.0 slacker==0.9.42 biopython==1.70 tablib>=0.11.5 -mygene==3.0.0 -cookiecutter==2.1.1 -Flask==1.0 -Flask-Login==0.4.0 -Flask-Mail==0.9.1 -Flask-SQLAlchemy==2.2 -Flask-User==0.6.13 -Flask-WTF>=0.14.2 +mygene>=3.0.0 +cookiecutter +Flask +Flask-Login +Flask-Mail +Flask-SQLAlchemy +Flask-WTF treelib==1.3.5 -psutil==5.6.7 -luigi>=2.8.0 -sciluigi==0.9.5b6 +psutil>=5.6.7 +luigi logzero>=1.5.0 xmltodict>=0.11.0 plotly>=3.10.0 pyyaml>=3.12 -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +wtforms>=3.0.0a1 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/setup.py b/setup.py index 88d150c1..808a9d69 100644 --- a/setup.py +++ b/setup.py @@ -47,8 +47,9 @@ def readme(): 'Operating System :: Unix', 'Natural Language :: English', 'Programming Language :: Python :: 3 :: Only', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Framework :: Flask', 'Framework :: Cookiecutter' ], diff --git a/tests/test_cookies.py b/tests/test_cookies.py new file mode 100644 index 00000000..7e56677a --- /dev/null +++ b/tests/test_cookies.py @@ -0,0 +1,55 @@ +import unittest +from unittest.mock import patch, MagicMock +from OrthoEvol.Cookies import CookBook, Oven +from pathlib import Path +import os + +class TestCookBook(unittest.TestCase): + + def test_init(self): + cookbook = CookBook() + self.assertTrue(hasattr(cookbook, 'CookieJar')) + self.assertIsInstance(cookbook.CookieJar, Path) + + @patch('builtins.open', new_callable=MagicMock) + def test_new_recipes(self, mock_open): + mock_open.return_value.__enter__.return_value = MagicMock() + new_recipe_path = Path('path/to/new/recipe') + cookbook = CookBook(new_recipe='new_recipe_path') + self.assertTrue(hasattr(cookbook, 'new_recipe')) + self.assertEqual(getattr(cookbook, 'new_recipe'), new_recipe_path) + +class TestOven(unittest.TestCase): + + def setUp(self): + self.cookbook = CookBook() + self.oven = Oven(recipes=self.cookbook) + self.test_dir = Path('test_directory') + self.test_dir.mkdir(exist_ok=True) + + def tearDown(self): + if self.test_dir.exists(): + os.rmdir(self.test_dir) + + def test_init(self): + self.assertEqual(self.oven.Recipes, self.cookbook) + self.assertEqual(self.oven.cookie_jar, os.getcwd()) + + def test_bake_the_repo(self): + repo_name = 'test_repo' + self.oven.repo = repo_name + self.oven.bake_the_repo(cookie_jar=self.test_dir) + expected_dir = self.test_dir / repo_name + self.assertTrue(expected_dir.exists()) + + def test_bake_the_user(self): + user_name = 'test_user' + self.oven.user = user_name + self.oven.bake_the_user(cookie_jar=self.test_dir) + expected_dir = self.test_dir / user_name + self.assertTrue(expected_dir.exists()) + + # Similar tests for other methods like bake_the_project, etc. + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_data/test.phy b/tests/test_data/test.phy new file mode 100644 index 00000000..85e0547e --- /dev/null +++ b/tests/test_data/test.phy @@ -0,0 +1,22 @@ +21 1500 +Human ATGGCTTCTGGAATCCTGGTTAATGTAAAGGAGGAGGTGACCTGCCCCATCTGCCTGGAACTCCTGACACAACCCCTGAGCCTGGACTGCGGCCACAGCTTCTGCCAAGCATGCCTCACTGCAAACCACAAGAAGTCCGGAGAGAGTAGCTGCCCTGTGTGCCGGATCAGTTACCAGCCTGAGAACATACGGCCTAATCGGCATGTAGCCAACATAGTGGAGAAGCTCAGGGAGGTCAAGTTGAGCCCAGAGGGGCAGAAAGTTGATCATTGTGCACGCCATGGAGAGAAACTTCTACTCTTCTGTCAGGAGGACGGGAAGGTCATTTGCTGGCTTTGTGAGCGGTCTCAGGAGCACCGTGGTCACCACACGTTCCTCACAGAGGAGGTTGCCCGGGAGTACCAAGTGAAGCTCCAGGCAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAGAGCAAGTGAAGCTCCAGGCAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAGAGTTGGAAGCTGACATCAGAGAAGAGAAAGCTTCCTGGAAGACTCAAATACAGTATGACAAAACCAACGTCTTGGCAGATTTTGAGCAACTGAGAGACATCCTGGACTGGGAGGAGAGCAATGAGCTGCAAAACCTGGAGAAGGAGGAGGAAGACATTCTGAAAAGCCTTACGAACTCTGAAACTGAGATGGTGCAGCAGACCCAGTCCCTGAGAGAGCTCATCTCAGATCTGGAGCATCGGCTGCAGGGGTCAGTGATGGAGCTGCTTCAGGGTGTGGATGGCGTCATAAAAAGGACGGAGAACGTGACCTTGAAGAAGCCAGAAACTTTTCCAAAAAATCAAAGGAGAGTGTTTCGAGCTCCTGATCTGAAAGGAATGCTAGAAGTGTTTAGAGAGCTGACAGATGTCCGACGCTACTGGGTTGATGTGACAGTGGCTCCAAACAACATTTCATGTGCTGTCATTTCTGAAGATAAGAGACAAGTGAGCTCTCCGAAACCAAGATACCAGACATTTGTGAATTTCAATTATTGTACTGGCATCCTGGGCTCTCAAAGTATCACATCAGGGAAACATTACTGGGAGGTAGACGTGTCCAAGAAAACTGCTTGGATCCTGGGGGTATGTGCTGGCTTCCAACCTGATGCAATGTGTAATATTGAAAAAAATGAAAATTATCAACCTAAATACGGCTACTGGGTTATAGGGTTAGAGGAAGGAGTTAAATGTAGTGCTTTCCAGGATAGTTCCTTCCATACTCCTTCTGTTCCTTTCATTGTGCCCCTCTCTGTGATTATTTGTCCTGATCGTGTTGGAGTTTTCCTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATATCACAAACCATGGATTTCTCATCTATAAGTTTTCTCACTGTTCTTTTTCTCAGCCTGTATTTCCATATTTAAATCCTAGAAAATGTGGAGTCCCCATGACTCTGTGCTCACCAAGCTCT +Chimp ATGGCTTCTGGAATCCTGGTTAATGTAAAGGAGGAGGTGACCTGCCCCATCTGCCTGGAACTCCTGACACAACCCCTGAGCCTGGACTGCGGCCACAGCTTCTGCCAAGCATGCCTCACTGCAAACCACAAGAAGTCCGGAGAGAGTAGCTGCCCTGTGTGCCGGATCAGTTACCAGCCTGAGAACATACGGCCTAATCGGCATGTAGCCAACATAGTGGAGAAGCTCAGGGAGGTCAAGTTGAGCCCAGAGGGGCAGAAAGTTGATCATTGTGCACACCATGGAGAGAAACTTCTACTCTTCTGTCAGGAGGACGGGAAGGTCATTTGCTGGCTTTGTGAGCGGTCTCAGGAGCACCGTGGTCACCACACGTTCCTCACAGAGGAGGTTGCCCGGGAGTACCAAGTGAAGCTCCAGGCAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAGAGCAAGTGAAGCTCCAGGCAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAGAGTTGGAAGCTGACATCAGAGAAGAGAAAGCTTCCTGGAAGACTCAAATACAGTATGACAAAACCAACGTCTTGGCAGATTTTGAGCAACTGAGAGACATCCTGGACTGGGAGGAGAGCAATGAGCTGCAAAACCTGGAGAAGGAGGAGGAAGACATTCTGAAAAGCCTTACGAAGTCTGAAACTGAGATGGTGCAGCAGACCCAGTCCGTGAGAGAGCTCATCTCAGATCTGGAGCGTCGGCTGCAGGGGTCAGTGATGGAGCTGCTTCAGGGTGTGGATGGCGTCATAAAAAGGATGGAGAACGTGACCTTGAAGAAGCCAGAAACTTTTCCAAAAAATCAAAGGAGAGTGTTTCGAGCTCCTGATCTGAAAGGAATGCTAGAAGTGTTTAGAGAGCTGACAGATGTCCGACGCTACTGGGTTGATGTGACAGTGGCTCCAAACAACATTTCATGTGCTGTCATTTCTGAAGATATGAGACAAGTGAGCTCTCCGAAACCAAGATATCAGACATTTATGAATTTCAATTATTGTACTGGCATCCTGGGCTCTCAAAGTATCACATCAGGGAAACATTACTGGGAGGTAGACGTGTCCAAGAAAAGTGCTTGGATCCTGGGGGTATGTGCTGGCTTCCAACCTGATGCAATGTGTAATATTGAAAAAAATGAAAATTATCAACCTAAATATGGCTACTGGGTTATAGGGTTAGAGGAAGGAGTTAAATGTAGTGCTTTCCAGGATGGTTCCTTCCATACTCCTTCTGCTCCTTTCATTGTGCCCCTCTCTGTGATTATTTGTCCTGATCGTGTTGGAGTTTTCCTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATATCACAAACCATGGATCTCTCATCTATAAGTTTTCTCACTGTTCTTTTTCTCAGCCTGTATTTCCATATTTAAATCCTAGAAAATGTGGAGTCCCCATGACTCTGTGCTCACCAAGCTCT +Gorilla ATGGCTTCTGGAATCCTGGTTAATGTAAAGGAGGAGGTGACCTGCCCCATCTGCCTGGAACTCCTGACACAACCCCTGAGCCTGGACTGCGGCCACAGCTTCTGCCAAGCATGCCTCACTGCAAACCACAAGAAGTCCGGAGAGAGTAGCTGCCCTGTGTGCCGGATCAGTTACCAGCCTGAGAACATACGGCCTAATCGGCATGTAGCCAACATAGTGGAGAAGCTTAGGGAGGTCAAGTTGAGCCCAGAGGGGCAGAAAGTTGATCATTGTGCACGCCATGGAGAGAAACTTCTACTCTTCTGTCAGGAGGACGGGAAGGTCATTTGCTGGCTTTGCGAGCGGTCTCAGGAGCACCGTGGTCACCACACGTTCCTCACAGAGGAGGTTGCCCAGGAGTACCAAGTGAAGCTCCAGGCAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAGAGCAAGTGAAGCTCCAGGCAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAGAGTTGGAAGCTGACATCAGAGAAGAGAAAGCTTCCTGGAAGACTCAAATACAGTATGACAAAACCAACGTCTTGGCAGATTTTGAGCAACTGAGAGACATCCTGGACTGGGAGGAGAGCAATGAGCTGCAAAACCTGGAGAAGGAGGAGGAAGACATTCTGAAACGCCTTACGAAGTCTGAAACTGAGATGGTGCAGCAGACCCAGTCCGTGAGAGAGCTCATCTCAGATCTGGAGCATCGGCTGCAGGGGTCAGTGATGGAGCTGCTTCAGGGTGTGGATGGCGTCATAAAAAGGATGGAGAACGTGACCTTGAAGAAGCCAGAAACTTTTCCAAAAAATCGAAGGAGAGTGTTTCGAGCTCCTGATCTGAAAGGAATGCTAGAAGTGTTTAGAGAGCTGACAGATGTCCGACGCTACTGGGTTGATGTGACAGTGGCTCCAAACAACATTTCATGTGCTGTCATTTCTGAAGATATGAGACAAGTGAGCTCTCCGAAACCAAGATATCAGACATTTATGAATTTCAATTATTGTACGGGCATCCTGGGCTCTCAAAGTATCACATCAGGGAAACATTACTGGGAGGTAGACGTGTCCAAGAAAAGTGCTTGGATCCTGGGGGTATGTGCTGGCTTCCAACCTGATGCAACGTGTAATATTGAAAAAAATGAAAATTATCAACCTAAATATGGCTACTGGGTTATAGGGTTAGAGGAAGGAGTTAAATGCAGTGCTTTCCAGGATGGTTCCTTCCATACTCCTTCTGCTCCTTTCATTGTGCCCCTCTCTGTGATTATTTGTCCTGATCGTGTTGGAGTTTTCCTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATATCACAAACCATGGATTTCTCATCTATAAGTTTTCTCACTGTTCTTTTTCTCAGCCTGTATTTCCATATTTAAATCCTAGAAAATGTAGAGTCCCCATGACTCTGTGCTCGCCAAGCTCT +Orangutan ATGGCTTCTGGAATCCTGGTTAATGTAAAGGAGGAGGTGACCTGCCCTATCTGCCTGGAACTCCTGACACAACCCCTGAGTCTGGACTGCGGCCACAGCTTCTGCCAAGCATGCCTCACTGCAAACCACAAGAAGTCCGGAGAGAGAAGCTGCCCTGTGTGCCGGGTCAGTTACCAGCCTAAGAACATACGGCCTAATCGGCATGTAGCCAACATAGTGGAGAAGCTCAGGGAGGTCAAATTGAGCCCAGAGGGGCAGAAGGTTGATCACTGTGCACGCCATGGAGAGAAACTTCTACTCTTCTGTAAGGAGGACGGGAAGGTCATTTGCTGGCTTTGTGAGCGGTCTCAGGAGCACCGTGGTCACCACACATTCCTCACGGAGGAGGTTGCCCAGAAGTACCAAGTGAAGCTCCAGGCAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAGAGCAAGTGAAGCTCCAGGCAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAGAGTTGGAAGCTGACATCAGAGAAGAGAAAGCTTCCTGGAAGACTCAAATACAGTATGACAAAACCAGCGTCTTGGCAGATTTTGAGCAACTGAGAGACATCCTGGACTGGGAGGAGAGCAATGAGCTGCAAAACCTGGAGAAGGAGGAGGAAGACATTCTAAAAAGCCTTACGAAGTCTGAAACTGAGATGGTGCAGCAGACCCAGTCCGTGAGAGAGCTCATCTCAGATGTGGAGCATCGGCTGCAGGGGTCAGTGATGGAGCTGCTTCAGGGTGTGGATGGCATCATAAAAAGGATGCAGAACGTGACCTTGAAGAAGCCAGAAACTTTTCCAAAAAATCAAAGGAGAGTGTTTCGAGCTCCTAATCTGAAAGGAATGCTAGAAGTGTTTAGAGAGCTGACAGATGTCCGACGCTACTGGGTTGATGTGACAGTGGCTCCAAACGACATTTCATATGCTGTCATTTCTGAAGATATGAGACAAGTGAGCTGTCCGGAACCAACATATCAGACATATGTGAATTTCAATTATTGTACTGGCATCCTGGGCTCTCAAAGTATCACGTCAGGGAAACATTACTGGGAGGTAGACGTGTCCAAGAAAAGTGCTTGGATCCTGGGGGTATGTGCTGGCTTCCAACCTGATGCAATGTATAATATTGAACAAAATGAAAATTATCAACCTCAATATGGCTACTGGGTTATAGGGTTAGAGGAAGGAGTTAAATGTAGTGCTTTCCAGGATGGTTCCTTCCATAATCCTTCTGCTCCTTTCATTGTGCCCCTCTCTGTGATTATTTGTCCTGATCGTGTTGGAGTTTTCCTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATATCACAAACCATGGATTTCTCATCTATAAGTTTTCTCACTGTTCTTTTTCTCAGCCTGTATTTCCATATTTAAATCCTAGAAAATGTAGAGTCCCCATGACTCTGTGCTCACCAAGCTCT +Gibbon ATGGCTTCTGGAATCCTGGTTAATGTAAAGGAGAAGGTGACCTGCCCCATCTGCCTGGAACTCCTGACACAACCCCTGAGTCTGGACTGCGGCCACAGCTTCTGCCAAGCATGCCTCACTGCAAACCACAAAACGTCCGGAGAGAGAAGCTGCCCTGTGTGCCGGATCAGTTACCAGCATAAGAACATACGGCCTAATCGGCATGTAGCCAACATAGTGGAGAAGCTCAGGGAGGTCAAGTTGAGCCCAGAGGGGCAGAAGGTTGATCACTGTGCACGCCACGGAAAGAAACTTCTACTCTTCTGTCAGGAGGACAGGAAGGTCATTTGCTGGCTTTGTGAGCGGTCTCAGGAGCACCGTGGTCACCACACATTCCTCACGGAGGAGGTTGCCCAGGAGTACCAAATGAAGCTCCAGGCAGCTCTGCAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAGAGCAAATGAAGCTCCAGGCAGCTCTGCAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAGAGTTGGAAGCTGACATCAGAGAAGAGAAAGCTTCCTGGAAGACTCAAATACAGTATGACAAAACCAACATCTTGGCAGATTTTGAGCAACTGAGACACATCCTGGACTGGGTGGAGAGCAATGAGCTGCAAAACCTGGAGAAGGAGGAGAAAGACGTTCTGAAAAGGCTTATGAGGTCTGAAATTGAGATGGTGCAGCAGACCCAGTCCGTGAGAGAGCTCATCTCAGATCTGGAGCATCGGCTGCAGGGGTCAGTGATGGAGCTGCTTCAGGGTGTGGATGGCGTCATAAAAAGGATGAAGAACGTGACCTTGAAGAAGCCAGAAACTTTTCCAAAAAATCGAAGGAGAGTGTTTCGAGCTGCTGATCTGAAAGTAATGCTAGAAGTGTTGAGAGAGCTGAGAGATGTCCGACGCTACTGGGTTGATGTGACAGTGGCTCCAAACAACATTTCATATGCTGTCATTTCTGAAGATATGAGACAAGTGAGCTCTCCGGAACCAATATCTCAGACATTTGTGAATTTCAATTATTGTACTGGCATCCTGGGCTCTCAAAGTATCACATCAGGGAAACATTACTGGGAGGTAGACGTGTCCAAGAAAAGTGCTTGGATCCTGGGGGTATGTGCTGGCTTGCAACCTGATGCAATGTATAATATTGAACAAAATGAAAATTATCAACCTAAATATGGCTACTGGGTTATAGGGTTAGAGGAAGGAGTTAAATGTAATGCTTTCCAGGATGGTTCCATCCATACTCCTTCTGCTCCTTTCGTTGTGCCCCTCTCTGTGAATATTTGTCCTGATCGTGTTGGAGTTTTCCTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATATCACAGACCATGGATTTCTCATCTATAAGTTTTCTCACTGTTCTTTTTCTCAGCCTGTATTTCCATATTTAAATCCTAGAAAATGTACAGTCCCCATGACTCTGTGCTCACCAAGCTCT +Rhes_cDNA ATGGCTTCTGGAATCCTGCTTAATGTAAAGGAGGAGGTGACCTGTCCCATCTGCCTGGAACTCCTGACAGAACCCCTGAGTCTGCACTGCGGCCACAGCTTCTGCCAAGCGTGCATCACTGCGAACCACAAGAAGTCCGAAGAGAGAAGCTGCCCTGTGTGCCGGATCAGTTACCAGCCTGAGAACATACAGCCTAATCGGCATGTAGCCAACATAGTGGAGAAGCTCAGGGAGGTCAAGTTGAGCCCAGAGGGACAGAAGGTTGATCACTGTGCACGCCATGGAGAGAAACTCCTACTCTTCTGTCAGGAGGACAGCAAGGTCATTTGCTGGCTTTGTGAGCGGTCTCAGGAGCACCGTGGTCACCACACTTTCCTCATGGAGGAGGTTGCCCAGGAGTACCATGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGCATGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGTTGGAAGCTGACATCAGAGAAGAGAAAGCTTCCTGGAAGATTCAAATAGACTACGACAAAACCAACGTCTCGGCAGATTTTGAGCAACTGAGAGAGATCCTGGACTGGGAGGAGAGCAATGAGCTGCAGAACCTGGAGAAGGAGGAAGAAGACATTCTGAAAAGCCTTACGAAGTCTGAAACGGAGATGGTGCAGCAGACCCAGTACATGAGAGAGCTCATCTCAGAACTGGAGCATCGGTTGCAGGGGTCAATGATGGATCTACTGCAGGGTGTGGATGGCATCATTAAAAGGATTGAGAACATGACCTTGAAGAAGCCAAAAACTTTTCACAAAAATCAAAGGAGAGTGTTTCGAGCTCCTGATCTGAAAGGAATGCTAGACATGTTTAGAGAGCTAACAGATGCCCGACGCTACTGGGTTGATGTGACACTGGCTACAAACAACATTTCGCATGCTGTCATTGCTGAAGATAAGAGACAAGTGAGCTCTCGGAACCCATTATTTACGTTTCTCACGAATTTCAATTATTGTACTGGCGTCCTGGGCTCCCAAAGTATCACATCAGGGAAGCATTACTGGGAGGTAGATGTGTCCAAGAAAAGTGCTTGGATCCTGGGGGTATGTGCTGGCTTCCAATCCGATGCAATGTATAATATTGAACAAAATGAAAATTATCAACCTAAATATGGCTACTGGGTTATAGGGTTACAGGAAGGAGTTAAATATAGTGTTTTCCAGGATGGTTCCTCACATACTCCTTTTGCTCCTTTCATTGTGCCCCTCTCTGTGATTATTTGTCCTGATCGTGTTGGAGTTTTCGTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATATCACAAACCATGGATTTCTCATCTATAAGTTTTCTCAGTGTTCTTTTTCTAAGCCTGTATTTCCATATTTAAATCCCAGAAAATGTACAGTCCCCATGACTCTGTGCTCACCAAGCTCT +Baboon ATGGCTTCTGGAATCCTGCTTAATGTAAAGGAGGAGGTGACCTGTCCCATCTGCCTGGAACTCCTGACAGAACCCCTGAGTCTGCCCTGTGGCCACAGCTTCTGCCAAGCGTGCATCACTGCAAACCACAGGAAGTCCGAAGAGAGAAGCTGCCCTGTGTGCCGGATCAGTTACCAGCCTGAGAACATACAGCCTAATCGGCATGTAGCCAACATAGTGGAGAAGCTCAGGGAGGTCAAGTTGAGCCCAGAGGGGCTGAAGGTTGATCACTGTGCACGCCATGGAGAGAAACTCCTACTCTTCTGTCAGGAGGACAGCAAGGTCATTTGCTGGCTTTGTGAGCGGTCTCAGGAGCACCGTGGTCACCACACTTTCCTCATGGAGGAGGTTGCCCAGGAGTACCATGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGCATGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGTTGGAAGCTGACATCAGAGAAGAGAAAGCTTCCTGGAAGATTCAAATAGACTACGACAAAACCAACGTCTCGGCAGATTTTGAGCAACTGAGAGAGATCCTGGACTGGGAGGAGAGCAATGAGCTGCAGAACCTGGAGAAGGAGGAAGAAGACATTCTGAAAAGCCTTACGAAGTCTGAAACGGAGATGGTGCAGCAGACCCAGTACATGAGAGAGCTCATCTCAGATCTGGAGCATCGGTTGCAGGGGTCAATGATGGAGCTACTGCAGGGTGTGGATGGCATCATTAAAAGGATTGAGAACATGACCTTGAAGAAGCCAAAAACTTTTCACAAAAATCAAAGGAGAGTGTTTCGAGCTCCTGATCTGAAAGGAATGCTAGACATGTTTAGAGAGCTAACAGATGTCCGACGCTACTGGGTTGATGTGACACTGGCTCCAAACAACATTTCGCATGCTGTCATTGCTGAAGATAAGAGACAAGTGAGCTCTCGGAACCCATTATTTTCGTTTCTCACGAATTTCAATTATTGTACTGGCGTCCTGGGCTCCCAAAGTATCACATCAGGGAAGCATTACTGGGAGGTAGATGTGTCCAAGAAAAGTGCTTGGATCCTGGGGGTATGTGCTGGCTTCCAACCTGATGCAATGTATAATATTGAACAAAATGAAAATTATCAACCTAAATATGGCTACTGGGTTATAGGGTTACAGGAAGGAGTTAAATATAGTGTTTTCCAGGATGGTTCCTCACATACTCCTTTTGCTCCTTTCATTGTGCCCCTCTCTGTGATTATTTGTCCTGATCGTGTTGGAGTTTTCGTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATATCACAAACCATGGATTTCTCATCTATAAGTTTTCTCAGTGTTCTTTTTCTAAGCCTGTATTTCCATATTTAAATCCCAGAAAATGTACAGTCCCCATGACTCTGTGCTCACCAAGCTCT +AGM_cDNA ATGGCTTCTGGAATCCTGGTTAATGTAAAGGAGGAGGTGACCTGTCCCATCTGCCTGGAACTCCTGACAGAACCCCTGAGTCTGCCCTGCGGCCACAGCTTCTGCCAAGCGTGCATCACTGCAAACCACAAGGAGTCCGAAGAGAGAAGCTGCCCTGTGTGCCGGATCAGTTACCAGCCTGAGAATATACAGCCTAATCGGCATGTAGCCAACATAGTGGAGAAGCTCAGAGAGGTCAAGTTGAGCCCAGAGGGGCAGAAGGTTGATCACTGTGCACGCCATGGAGAGAAACTCCTACTCTTCTGTCAGGAGGACAGCAAGGTCATTTGCTGGCTTTGTGAGCGGTCTCAGGAGCACCGTGGTCACCACACTTTCCTCATGGAGGAGGTTGCCCAGGAGTACCATGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGCATGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGTTGGAAGCTGACATCAGAGAAGAGAAAGCTTCCTGGAAGATTCAAATAGACTACGACAAAACCAACGTCTCGGCAGATTTTGAGCAACTGAGAGAGATCCTGGACTGGGAGGAGAGCAATGAGCTGCAGAACCTGGAGAAGGAGGAAGAAGACATTCTGAAAAGCCTTACGAAGTCTGAAACGGAGATGGTGCAGCAGACCCAGTACATGAGAGAGCTCATCTCAGATCTGGAGCATCGGTTGCAGGGGTCAATGATGGAGCTGCTGCAGGGTGTGGATGGCATCATTAAAAGGGTTGAGAACATGACCTTGAAGAAGCCAAAAACATTTCACAAAAATCAAAGGAGAGTGTTTCGAGCTCCTGATCTGAAAGGAATGCTAGACATGTTTAGAGAGCTAACAGATGTCCGACGCTACTGGGTTGATGTGACACTGGCTCCAAACAACATTTCGCATGCTGTCATTGCTGAAGATAAGAGACAAGTGAGCTATCGGAACCCATTATTTGGGTCACTCACGAATTTCAATTATTGTACTGGCGTCCTGGGCTCCCAAAGTATCACATCAGGGAAACATTACTGGGAGGTAGATGTGTCCAAGAAAAGTGCTTGGATCCTGGGGGTATGTGCTGGCTTCCAACCCGATGCAACGTATAATATTGAACAAAATGAAAATTATCAACCTAAATATGGCTACTGGGTTATAGGGTTACAGGAAGGAGATAAATATAGTGTTTTCCAGGATGGTTCCTCACATACTCCTTTTGCTCCTTTCATTGTGCCCCTCTCTGTGATTATTTGTCCTGATCGTGTTGGAGTTTTCGTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATATCACAAACCATGGATTTCTCATCTATAAGTTTTCTCAGTGTTCTTTTTCTAAGCCTGTATTTCCATATTTAAATCCCAGAAAATGTACAGTCCCCATGACTCTGTGCTCACCAAGCTCT +Tant_cDNA ATGGCTTCTGGAATCCTGCTTAATGTAAAGGAGGAGGTGACCTGTCCCATCTGCCTGGAACTCCTGACAGAACCCCTGAGTCTGCCCTGCGGCCACAGCTTCTGCCAAGCGTGCATCACTGCAAACCACAAGGAGTCCGAAGAGAGAAGCTGCCCTGTGTGCCGGATCAGTTACCAGCCTGAGAATATACAGCCTAATCGGCATGTAGCCAACATAGTGGAGAAGCTCAGAGAGGTCAAGTTGAGCCCAGAGGGGCAGAAGGTTGATCACTGTGCACGCCATGGAGAGAAACTCCTACTCTTCTGTCAGGAGGACAGCAAGGTCATTTGCTGGCTTTGTGAGCGGTCTCAGGAGCACCGTGGTCACCACACTTTCCTCATGGAGGAGGTTGCCCAGGAGTACCATGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGCATGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGTTGGAAGCTGACATCAGAGAAGAGAAAGCTTCCTGGAAGATTCAAATAGACTACGACAAAACCAACGTCTCGGCAGATTTTGAGCAACTGAGAGAGATCCTGGACTGGGAGGAGAGCAATGAGCTGCAGAACCTGGAGAAGGAGGAAGAAGACATTCTGAAAAGCCTTACGAAGTCTGAAACGGAGATGGTGCAGCAGACCCAGTACATGAGAGAGCTCATCTCAGATCTGGAGCATCGGTTGCAGGGGTCAATGATGGAGCTGCTGCAGGGTGTGGATGGCATCATTAAAAGGATTGAGAACATGACCTTGAAGAAGCCAAAAACATTTCACAAAAATCAAAGGAGAGTGTTTCGAGCTCCTGATCTGAAAGGAATGCTAGACATGTTTAGAGAGCTAACAGATGTCCGACGCTACTGGGTTGATGTGACACTGGCTCCAAACAACATTTCGCATGCTGTCATTGCTGAAGATAAGAGACAAGTGAGCTATCAGAACCCATCATTTGGGTCACTCACGAATTTCAATTATTGTACTGGCGTCCTGGGCTCCCAAAGTATCACATCAGGGAAACATTACTGGGAGGTAGATGTGTCCAAGAAAAGTGCTTGGATCCTGGGGGTATGTGCTGGCTTCCAACCCGATGCAACGTATAATATTGAACAAAATGAAAATTATCAACCTAAATATGGCTACTGGGTTATAGGGTTACAGGAAGGAGATAAATATAGTGTTTTCCAGGATGGTTCCTCACATACTCCTTTTGCTCCTTTCATTGTGCCCCTCTCTGTGATTATTTGTCCTGATCGTGTTGGAGTTTTCGTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATATCACAAACCATGGATTTCTCATCTATAAGTTTTCTCAGTGTTCTTTTTCTAAGCCTGTATTTCCATATTTAAATCCCAGAAAATGTACAGTCCCCATGACTCTGTGCTCACCAAGCTCT +Patas ATGGCTTCTGGAATCCTGCTTAATGTAAAGGAGGAGGTGACCTGTCCTATCTGCCTGGAACTCCTGACAGAACCCCTGAGTCTGCCCTGCGGCCACAGCTTCTGCCAAGCGTGCATCACTGCAAACCACAAGAAGTCCGAAGAGAGAAGCTGCCCTGTGTGCCGGATCAGTTACCAGCCTGAGAACATACAGCCTAATCGGCATGTAGCCAACATAGTGGAGAAGCTCAGAGAGGTCAAGTTGAGCCCAGAGGGGCAGAAGGTTGATCACTGTGCACGCCATGGAGAGAAACTCCTACTCTTCTGTCAGGAGGACAGGAAGGTCATTTGCTGGCTTTGTGAGCGGTCTCAGGAGCACCGTGGTCACCACACTTTCCTCATGGAGGAGGTTGCCCAGGAGTACCATGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGCATGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGTTGGAAGCTGACATCAGAGAAGAGAAAGCTTCCTGGAAGATTCAAATAGACTACGACAAAACCAACGTCTTGGCAGATTTTGAGCAACTGAGAGAGATCCTGGACTGGGAGGAGAGCAATGAGCTGCAGTACCTGGAGAAGGAGGAAGAAGACATTCTGAAAAGCCTTACGAAGTCTGAAACGAAGATGGTGCGGCAGACCCAGTACGTGAGAGAGCTCATCTCAGATCTGGAGCATCGGTTGCAGGGGTCAATGATGGAGCTGCTGCAGGGTGTGGATGGCATCATTAAAAGGATTGAGAACATGACCTTGAAGAAGCCAGAAACATTTCACAAAAATCAAAGGAGAGTGTTTCGAGCTCCTGCTCTGAAAGGAATGCTAGACATGTTTAGAGAGCTAACAGATGTCCGGCGCTACTGGGTTGATGTGACACTGGCTCCAAACAACATTTCGCATGTTGTCATTGCTGAAGATAAGAGACAAGTGAGCTCTCGGAACCCATTATTTCAGTCACTCAAGAATTTCAATTATTGTACTGGCATCCTGGGCTCCCAAAGTATCACATCAGGGAAACATTACTGGGAGGTAGATGTGTCCAAGAAAAGTGCTTGGATCCTGGGGGTATGTGCTGGCTTCCAACCCGATGCAATGTATGATGTTGAACAAAATGAAAATTATCAACCTAAATATGGCTACTGGGTTATAGGGTTACAGGAAGGAGTAAAATATAGTGTTTTCCAGGATGGTTCCTCACATACTCCTTTTGCTCCTTTCATTGCGCCCCTCTCTGTGATTTTTTGTCCTGATCGTGTTGGAGTTTTCGTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATATCACAAACCATGGATTTCTCATCTATAAGTTTTCTCAGTGTTCTTTTTCTAAGCCTGTATTTCCATATTTAAATCCCAGAAAATGTACAGTCCCCATGACTCTGTGCTCACCAAGCTCT +Colobus ATGGCTTCTGGAATCCTGGTTAATATAAAGGAGGAGGTGACCTGCCCCATCTGCCTGGAACTCCTGACAGAACCCCTGAGTCTGCACTGCGGCCACAGCTTCTGCCAAGCGTGCATCACTGCAAACCACAAGAAGTCCGAAGAGAGAAGCTGCCCTGTGTGCCGGATCAGTTACCAGCCTGAGAACATACGGCCTAATCGGCATGTGGCCAACATAGTGGAGAAGCTCAGGGAGGTCAAGTTGAGCCCAGAGGGGCAGAAGGTTGATCACTGTGCACGCCATGGAGAGAAACTCCTACTCTTCTGTCAGGAGGACAGGAAGGTCATTTGCTGGCTTTGTGAGCGGTCTCAGGAGCACCGTGGTCACCACACGTTCCTCATGGAGGAGGTTGCCCAGGAGTACCACGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGCACGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGTTGGAAGCTGACATCAGAGAAGAGAAAGCTTCCTGGAAGATTCAAATAGACTATGACAAAACCAACGTCTTGGCAGATTTTGAGCAACTGAGAGAGATCCTGGACTGGGAGGAGAGCAATGAGCTGCAGAACCTGGAGAAGGAGGAGGAAGACATTCTGAAAAGCCTTACGAAGTCTGAAACTGAGATGGTGCAGCAGACCCAGTACATGAGAGAGCTCGTCTCAGATCTGGAGCATCGGTTGCAGGGGTCAGTGATGGAGCTGCTGCAGGGTGTGGATGGCATCATAAAAAGGATTGAGGACATGACCTTGAAGAAGCCAAAAACTTTTCCCAAAAATCAAAGGAGAGTGTTTCGAGCTCCTGATCTGAAAGGAATGCTAGACATGTTTAGAGAGCTAACAGATGTCCGACGCTACTGGGTTGATGTGACACTGGCTCCAAACAACATTTCACATGCTGTCATTGCTGAAGATAAGAGACGAGTGAGCTCTCCGAACCCATTATTTCAGTCACTCAAGAATTTCATTTATTGTACTGGCGTCCTGGGCTCCCAAAGTATCACATCAGGGAAACATTACTGGGAGGTAGATGTGTCCAAGAAAAGTGCTTGGATCCTGGGGGTATGTGCTGGCTTCCAACCCGATGCAATGTATAATATTGAACAAAATGAAAATTATCAACCTAAATATGGCTACTGGGTTATAGGGTTACAGGAAGGAGTTAAATATAGTGTTTTCCAGGATGGTTCCTCACATACTCCTTTTGCTCCTTTCATTGTGCCCCTCTCTGTGATCATTTGTCCTGATCGTGTTGGAGTTTTCGTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATATCACAAACCATGGATTTCTCATCTATAAGTTTTCTCAGTGTTCTTTTTCTAAGCCTGTATTTCCATATTTAAATCCTAGAAAATGTACAGTCCCCATGACTCTGTGCTCACCAAGCTCT +DLangur ATGGCTTCTGGAATCCTGGTTAATATAAAGGAGGAGGTGACCTGCCCCATCTGCCTGGAACTCCTGACAGAACCCCTGAGTCTGCACTGCGGCCACAGCTTCTGCCAAGCGTGCATCACTGCAAACCACAAGAAGTCCGAAGAGAGAAGCTGCCCTGTGTGCCGGATCAGTTACCAGCCTGAGAACATACGGCCTAATCGGCATGTGGCCAACATAGTGGAGAAGCTCAGGGAGGTCAAGTTGAGCCCAGAGGGGCAGAAAGTTGATCACTGTGCACGCCATGGAGAGAAACTCCTACTCTTCTGTCAGGAGGACAGGAAGGTCATTTGCTGGCTTTGTGAGCGGTCTCAGGAGCACCGTGGTCACCACACGTTCCTCATGGAGGAAGTTGCCCAGGAGTACCACGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGCACGTGAAGCTCCAGACAGCTCTGGAGATGCTGAGGCAGAAGCAGCAGGAAGCTGAAAAGTTGGAAGCTGACATCAGAGAAGAGAAAGCTTCCTGGAAGATTCAAATAGACTGCGACAAAACCAATGTCTTGGCAGATTTTGAGCAACTGAGAGAGATCCTGGACTGGGAGGAGAGCAATGAGCTGCAGAACCTGGAGAAGGAGGAGGAAGACATTCTGAAAAGCCTTACGAAGTCTGAAACTGAGATGGTGCAGCAGACCCAGTACATGAGAGAGCTCATCTCAGATCTGGAGCATCGGTTGCAGGGGTCAATGATGGAGCTGCTGCAGGGTGTGGATGGCATCATAAAAAGGATTGAGAACATGACCTTGAAGAAGCCAAAAACTTTTCCCAAAAATCAAAGGAGAGTGTTTCGAGCTCCTGATCTGAAAGGAATCCTAGACATGTTTAGAGAACTAACAGATGTCCGACGCTACTGGGTTGATGTGACACTGGCTCCAAACAACATTTCACATGCTGTCATTGCTGAAGATAAGAGACAAGTGAGCTCTCCGAACCCATTATTTCAGTCACTCAAGAATTTCATTTATTGTACTGGCGTCCTGGGCTCCCAAAGTATCACATCAGGGAAACATTACTGGGAGGTAGATGTGTCCAAGAAAAGTGCTTGGATCCTGGGGGTATGTGCTGGCTTCCAACCCGATGCAATGTATAATATTGAACAAAATGAAAATTATCAACCTAAATATGGCTACTGGGTTATAGGGTTACAGGAAGGAGTTAAATATAATGTTTTCCAGGATGGTTCCTCACATACTCCTTTTGCTCCTTTCATTGTGCCCCTCTCTGTGATTATTTGTCCTGATCGTGTTGGAGTTTTCGTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATATCACAAACCATGGATTTCTCATCTATAAGTTTTCTCAGTGTTCTTTTTCTAAGCCTGTATTTCCATATTTAAATCCTAGAAAATGTACAGTCCCCATGACTCTGTGCTCACCAAGCTCT +PMarmoset ATGGCTTCCAGAATCCTGGTGAATATAAAGGAGGAGGTAACCTGCCCCATCTGCCTGGAACTCCTGACAGAACCTCTGAGCCTAGACTGTGGCCACAGCTTCTGCCAAGCCTGCATCACTGCAAACCACAAAGAGTCTGGAGAGAGAAGCTGCCCTTTGTGCCGGATGAGTTACCCGTCTGAGAACTTGCGGCCTAATCGGCATTTGGCCAATATAGTGGAGAGGCTCAAAGAGGTCATGCTGAGCCCAGAGGGGCAGAAGGTTGATCACTGTGCACGCCATGGAGAGAAACTTCTACTCTTCTGTCAGCAGGATGGAAATGTCATTTGCTGGCTTTGTGAGCGGTCTCAAGAACACCGTGGTCACCACACATTCCTCGTGGAGGAGGTTGCAGAGAAATACCAAGGAAAGCTCCAGGTAGCTCTGGAGATGATGAGGCAGAAGCAGCAGGATGCTGAAAAGCAAGGAAAGCTCCAGGTAGCTCTGGAGATGATGAGGCAGAAGCAGCAGGATGCTGAAAAGTTAGAAGCTGATGTCAGAGAAGAGCAAGCTTCCTGGAAGATTCAAATACAAAATGACAAAACCAACATCATGGCAGAGTTTAAGCAACTGAGAGACATCCTGGACTGTGAGGAGAGCAAAGAGCTGCAAAACCTGGAGAAGGAGGAGAAAAACATTCTGAAAAGACTTGTACAGTCGGAAAGTGACATGGTGCTGCAGACCCAGTCCATTAGAGTGCTCATCTCAGATCTGGAGCGTCGCCTGCAGGGGTCAGTGATGGAGCTTTTACAGGGTGTGGATGACGTCATAAAAAGGATTGAGAAAGTTACTTTGCAGAAGCCAAAAACGTTTCTTAATGAAAAAAGGAGAGTATTTCGAGCTCCTGATCTGAAAGGAATGCTACAAGCATTTAAAGAGCTGACAGAAGTCCAACGCTACTGGGCTCATGTGACACTGGTTCCAAGTCACCCTTCATGTACTGTCATTTCTGAAGATGAGAGACAAGTGAGATATCAGGTTCCGATACATCAACCACTTGTGAAAGTCAAGTATTTTTATGGCGTCCTGGGCTCCCTAAGTATCACATCAGGGAAACATTACTGGGAAGTAGACGTGTCCAATAAAAGGGGTTGGATCCTGGGGGTATGTGGTAGCTGGAAATGCAATGCAAAATGGAATGTTCTAAGACCTGAAAATTATCAACCTAAAAATGGCTACTGGGTTATAGGGTTACAGGATGCAGTTAAATATAGTGATGTCCAGGATGGTTCTCGCTCTGTTTCTTCTGGTCCTTTGATCGTGCCCCTCTTTATGACTATTTGTCCTAATCGTGTTGGAGTTTTCCTAGACTATGAGGCTTGCACTATCTCATTCTTCAATGTCACAAGCAATGGATTTCTCATCTATAAGTTTTCTAACTGTCATTTTTCTTATCCTGTATTTCCATATTTCAGTCCTACGACATGTGAATTACCCATGACTCTGTGCTCACCAAGCTCT +Tamarin ATGGCTTCCAGAATCCTGGTGAATATAAAGGAGGAGGTGACCTGCCCCATCTGCCTGGAACTCCTGACAGAACCTCTGAGCCTAGACTGTGGCCACAGCTTCTGCCAAGCATGCATCACTGCAAACCACAAAGAGTCTGGAGAGAGAAGCTGCCCCTTGTGCCGGATGAGTTACCCGTCTGAGAACTTGCGGCCTAATCGGCATTTGGCCAACATAGTGGAGAGGCTCAAAGAGGTCATGCTGAGCCCAGAGGGGCAGAAGGTTGGTCACTGTGCACGCCATGGAGAGAAACTTCTACTCTTCTGTGAGCAGGATGGAAATGTCATTTGCTGGCTTTGTGAGCGGTCTCAAGAACATCGTGGTCACCACACATTACTCGTGGAGGAGGTTGCAGAGAAATACCAAGAAAAGCTCCAGGTAGCTCTGGAGATGATGAGGCAGAAGCAGCAGGATGCTGAAAAGCAAGAAAAGCTCCAGGTAGCTCTGGAGATGATGAGGCAGAAGCAGCAGGATGCTGAAAAGTTGGAAGCTGACGTCAGAGAAGAGCAAGCTTCTTGGAAGATTCAAATACGAAATGACAAAACCAACATCATGGCAGAGTTTAAGCAACTGAGAGACATCCTGGACTGTGAGGAGAGCAAAGAGCTGCAAAACCTGGAGAAGGAGGAGAAAAACATTCTGAAAAGACTTGTACAGTCTGAAAGTGACATGGTGCTGCAGACCCAGTCCATGAGAGTGCTCATCTCAGATCTGGAGCGTCGCCTGCAGGGGTCAGTGCTGGAGCTGTTACAGGGTGTGGATGATGTCATAAAAAGGATTGAGACAGTGACTTTGCAGAAGCCAAAAACCTTTCTTAATGAAAAAAGGAGAGTATTTCGAGCTCCTGATCTGAAAGCAATGCTACAAGCATTTAAAGAGCTGACAGAAGTCCAACGCTACTGGGCTCATGTGACACTGGTTCCAAGTCACCCTTCATATGCTGTTATTTCTGAAGATGAGAGACAAGTGAGATATCAGTTTCAGATACATCAACCATCTGTGAAAGTCAACTATTTTTATGGCGTCCTGGGCTCCCCAAGTATCACATCAGGGAAACATTACTGGGAGGTAGACGTGACCAATAAAAGGGATTGGATCCTGGGGATATGTGTTAGCTTTAAATGCAATGCAAAATGGAATGTTCTAAGACCTGAAAATTATCAACCTAAAAATGGCTACTGGGTTATAGGGTTACAGGATGCAGTTAAATATAGTGATTTCCAGATTGGTTCCCGCTCTACTGCTTCTGTTCCTTTGATCGTGCCCCTCTTTATGACTATTTATCCTAATCGTGTTGGAGTTTTCCTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATGTCACAAACAATGGATTTCTCATCTATAAGTTTTCTAACTGTCATTTTTCTTATCCTGTATTTCCATATTTCAGTCCTATGACATGTGAATTACCCATGACTCTGTGTTCACCAAGCTCT +Squirrel ATGGCTTCCAGAATCCTGGGGAGTATAAAGGAGGAGGTGACCTGCCCCATCTGCCTGGAACTCCTGACAGAACCTCTGAGCCTAGACTGTGGCCACAGCTTCTGCCAAGCATGCATCACTGCAAATCACAAAGAGTCTGGAGAGAGAAGCTGCCCTTTGTGCCGGCTCCCTTACCAGTCTGAGAACCTGCGGCCTAATCGGCATTTGGCCAGCATCGTGGAGAGGCTCAGGGAGGTCATGCTGAGACCAGAAAGGCAGAACGTTGATCACTGTGCACGCCATGGAGAGAAACTTCTACTCTTCTGTGAGCAGGATGGAAATATCATTTGCTGGCTTTGTGAGCGGTCTCAAGAACACCGTGGTCACAACACATTCCTCGTGGAGGAGGTTGCACAGAAATACCGAGAAAAGCTCCAGGTAGCTCTGGAGACAATGAGGCAGAAGCAGCAGGATGCTGAAAAGCGAGAAAAGCTCCAGGTAGCTCTGGAGACAATGAGGCAGAAGCAGCAGGATGCTGAAAAGTTGGAAGCTGACGTCAGACAAGAGCAAGCTTCCTGGAAGATTCAAATACAAAATGACAAAACCAACATCATGGCAGAGTTTAAGCAACTGAGAGACATCCTGGACTGTGAGGAGAGCAATGAGCTGCAAAACCTGGAGAAGGAGGAGAAAAACATTCTGAAAAGACTTGTACAGTCTGAAAATGACATGGTGCTGCAGACCCAGTCCGTGAGAGTGCTCATCTCAGATCTGGAGCGTCGCCTGCAGGGGTCAGTGGTGGAGCTGTTACAGGATGTGGATGGTGTCATAAAAAGGATTGAGAAAGTGACTTTGCAGAAGCCAAAAACCTTCCTTAATGAAAAAAGGAGAGTATTTCGAGCTCCTGATCTGAAAAGAATGCTCCAAGTGTTAAAAGAACTGACAGAAGTCCAACGCTACTGGGCTCATGTGACACTGGTTCCAAGTCACCCTTCATATACTATCATTTCTGAAGATGGGAGACAAGTGAGATATCAGAAACCTATACGTCACCTACTTGTGAAAGTCCAGTATTTTTATGGCGTCCTGGGCTCCCCAAGTATCACATCAGGGAAACATTACTGGGAGGTAGACGTGTCCAATAAAAGGGCTTGGACCCTGGGGGTATGTGTTAGCTTGAAATGTACTGCAAATCAGAGTGTTTCAGGAACTGAAAATTATCAACCTAAAAATGGCTACTGGGTTATAGGGTTACAGAGTTCATTTGAATTTCGTGATTTCCTGGCTGGTTCCCGCCTTACTCTTTCTCCTCCTTTGATCGTGCCCCTCTTTATGACTATTTGTCCTAATCGGGTCGGAGTTTTCCTAGACTATGAGGCTCGCACTATCTCATTCTTCAATGTCACAAGCAATGGATTTCTCATCTACAAGTTTTCTGACTGTCATTTTTCTTATCCTGTATTTCCATATTTCAATCCTATGACGTGTGAATTACCCATGACTCTGTGCTCACCAAGGTCT +Owl ATGGCTTCCAGAATCCTGGTCAATATAAAGGAGGAGGTGACCTGCCCCATCTGCCTGGAACTCCTGACAGAACCCCTGAGCCTGGACTGTGGCCATAGCTTCTGCCAAGCATGCATCACTGCAAATCACAAAAAGTCTGGAGAGAGAAGCTGCCCTTTGTGCCGGATCAGTTACTCGTCTGAGAACCTGCGGCCTAATCGGCATTTGGTCAACATAGTGGAGAGGCTCAGGGAGGTCATGCTGAGCCCAGAGGGGCAGAAGGTTGATCACTGTGCACACCATGGAGAGAAACTTGTACTCTTCTGTCAGCAGGATGGAAATGTCATTTGCTGGCTTTGTGAGCGGTCTCAAGAACACCGTGGGCACCAGACATTCCTTGTGGAGGAGGTTGCACAGAAATACCGAGAAAAGCTCCAGGTAGCTCTGGAGATGATGAGGCAGAAGCAGAAGGATGCTGAAAAGCGAGAAAAGCTCCAGGTAGCTCTGGAGATGATGAGGCAGAAGCAGAAGGATGCTGAAAAGTTGGAAGCTGACGTCAGAGAAGAGCAAGCTTCCTGGAAGATTCAAATACAAAATGACAAAACCAACATCATGGCAGAGTTTAAAAAACGGAGAGACATCCTGGACTGTGAGGAGAGCAAAGAGTTGCAAAACCTGGAGAAGGAGGAGAAAAACATTCTGAAAAGACTTGTACAGTCTGAAAATGACATGGTGCTGCAGACCCAGTCCGTGAGAGTGCTCATCTCAGATCTGGAGCATCGCCTGCAGGGGTCAGTGATGGAGCTGTTACAGGGTGTGGATGGTGTCATAAAAAGGATTGAGAAAGTGACTTTGCAGAATCCAAAAACCTTTCTTAATGAAAAAAGGAGAATATTTCAAACTCCTGATCTGAAAGGAACACTACAAGTGTTTAAAGAGCCGACAGAAGTCCAACGCTACTGGGCTCATGTGACACTGGTTCCAAGTCACCCTTCATGTACTGTCATTTCTGAAGATGAGAGACAAGTGAGATATCAGAAACGGATATATCAACCATTTCTGAAAGTCAAGTATTTTTGTGGCGTCCTGGGCTCCCCAAGTATCACATCAGGGAAACATTACTGGGAGGTAGACGTGTCCAATAAAAGTGAGTGGATCCTGGGGGTATGTGTTAGCTTGAAGCGCACTGCAAGTTGTAGTGTTCCAAGAATTGAAAATGATCAACCTAAAAATGGCTACTGGGTTATAGGGTTACAGGATGCAGTTGAATATAGTGATTTCCAGGATGGTTCCCGCTCTACTCCTTCTGCTCCTTTGATCGTGCCCCTCTTTATGACTATTTGTCCTAATCGTGTTGGAGTTTTCCTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATGTCACAAACAATGGATTTCTCATCTATAAGTTTTCTAACTGTCATTTTTGTTATCCTGTATTTCCATATTTCAGTCCTATGACATGTGAATTACCCATGACTCTGTGCTCACCAAGCTCT +Titi ATGGCTTCCAGAATCCTGGTGAATATAAAGGAGGAGGTGACCTGCCCCATCTGCCTGGAACTCCTGACAGAACCCCTGAGCCTAGACTGTGGCCACAGCTTCTGCCAAGCATGCATCACCGCAAACCACAAAGAGTCTGGAGAGAGAAGCTGCCCTTTGTGCAGGATCAGTTACCCGTCTGAGAACCTGCGGCCTAATCGGCATTTGGCCAACATAGTGGAGAGGCTCAGGGAGGTCGTGCTGAGCCCAGAGGGGCAGAAGGTTGATCTCTGTGCACGCCATGGAGAGAAACTTCTACTCTTCTGTCAGCAGGATGGAAATGTCATTTGCTGGCTTTGTGAGCGGTCTCAAGAACACCGTGGTCACCACACATTCCTCGTGGAGGAGGTTGCACAGACATACCGAGAAAATCTCCAGGTAGTTCTGGAGATGATGAGGCAGAAGCATCAGGATGCTGAAAAGCGAGAAAATCTCCAGGTAGTTCTGGAGATGATGAGGCAGAAGCATCAGGATGCTGAAAAGTTGGAAGCTGACGTCAGAGAAGAGCAAGCTTCCTGGAAGATTCAAATACAAAATGACAAAACCAACATCATGGCAGAGTTTAAGCAACTGAGAGACATCCTGGACTGTGAGGAGAGCAATGAGCTGCAAAACCTAGAGAAGGAGGAGAAAAACATTCTGAAAAGACTTGTACAGTCTGAGAATGACATGGTGCTGCAGACCCAGTCCATAAGCGTGCTCATCTCGGATCTGGAGCATCGCCTGCAGGGGTCAGTGATGGAGCTGTTACAGGGTGTGGATGGCGTCATAAAAAGGGTTAAGAATGTGACTTTGCAGAAGCCAAAAACTTTTCTTAATGAAAAAAGGAGAGTATTTCGAGTTCCTGATCTGAAAGGAATGCTACAAGTGTCTAAAGAGTTGACAGAAGTCCAACGCTACTGGGCTCATGTGACACTGGTTGCAAGTCACCCTTCACGTGCTGTCATTTCTGAAGACGAAAGACAAGTGAGATATCAGGAATGGATACATCAATCATCTGGGAGAGTCAAGTATTTTTATGGCGTCCTGGGCTCCCCAAGTATCACATCAGGGAAACATTACTGGGAGGTAGACGTGTCCAATAAAAGTGCTTGGATCCTGGGGGTATGTGTTAGCTTGAAATGCGCTGCAAATCGGAATGGTCCAGGAGTTGAAAACTATCAACCTAAAAATGGCTACTGGGTGATAGGGTTACAGGATTCAGTTAAATATAATGATTTCCAGGATGGTTCCCGCTCTACTACTTATGCTCCTTTGATCGTGCCCCTCTTTATGACTATTTGTCCTAATCGTGTTGGAGTTTTCCTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATGTCACAAGCAATGGATTTCTCATCTATAAGTTTTCTAACTGTCATTTTTCTTATCCTGTATTTCCATATTTCAGTCCTATGACATGTGAATTACCCATGACTCTGTGCTCACCAAGGTCT +Saki ATGGCTTCCAGAATCCTGATGAACATAAAGGAGGAGGTGACCTGCCCCATCTGCCTGGAACTCCTGACAGAACCCCTGAGCCTAGACTGTGGCCACAGCTTCTGCCAAGCATGCATCACTGCAAACCACAAAAAGTCTGGAGAGAGAAGCTGCCCTTTGTGCCGGATCAGTTACCCATCTGAGAACCTGCGGCCTAATCGGCATTTGGCCAACATAGTGGAGAGGCTCAGGGAGGTCATGCTGAGCCCAGAGGGGCAGAAGGTTGATCACTGTGCACGCCATGGAGAGAAACTTCTACTCTTCTGTCAGCAGGATGGAAATGTCATTTGCTGGCTTTGTGAGCGGTCTCAAGAACACCGTGGTCACCACACATTACTCGTGGAGGAGGTTGCACAGACATACCGAGAAAATCTCCAGGTAGCTCTGGAGACGATGAGGCAGAAGCAGCAGGATGCTGAAAAGCGAGAAAATCTCCAGGTAGCTCTGGAGACGATGAGGCAGAAGCAGCAGGATGCTGAAAAGTTAGAAGCTGACGTCAGAGAAGAGCAAGCTTCCTGGAAGATTCAAATACGAGATGACAAAACCAACATTATGGCAGAGTTTAAGCAACTGAGAGACATCCTGGACTGTGAGGAGAGCAATGAGCTGCAAATCCTAGAGAAGGAGGAGAAAAACATTCTGAAAAGACTTACACAGTCTGAAAATGACATGGTGCTGCAGACCCAGTCCATGGGAGTGCTCATCTCAGATCTGGAGCATCGCCTGCAGGGGTCAGTGATGGAGCTGTTACAGGGTGTGGATGAAGTCATAAAAAGGGTTAAGAACGTGACTTTGCAGAAGCCGAAAACTTTTCTTAATGAAAAAAGGAGAGTATTTCGAGCTCCTGATCTGAAAGGAATGCTACAAGTGTTCAAAGAGCTGACAGAAGTCCAACGCTACTGGGTTCATGTGACACTGGTTCCAAGTCACCTTTCATGTGCTGTCATTTCTGAAGATGAGAGACAAGTGAGATATCAGGAACGGATACATCAATCATTTGGGAAAGTCAAGTATTTTTATGGCGTCCTGGGCTCCCCAAGTATCAGATCAGGGAAACATTACTGGGAGGTAGACGTGTCCAATAAAAGTGCTTGGATCCTGGGAGTATGTGTTAGCTTGAAATGCACTGCAAATCGGAATGGTCCAAGAATTGAAAATTATCAACCTAAAAATGGCTACTGGGTTATAGGGTTACAGGATTCAGTTAAATATAGTGATTTCCAGGATGGTTCCCACTCTGCTACTTATGGTCCTTTGATCGTGCCCCTCTTTATGACTATTTGTCCTAATCGTGTTGGAGTTTTCCTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATGTCACAAGCAATGGATTTCTCATCTATAAGTTTTCTAACTGTCGTTTTTCTGATTCTGTATTTCCATATTTCAGTCCTATGACATGTGAATTACCCATGACTCTGTGCTCACCAAGATCT +Howler ATGGCTTCCAAAATCCTGGTGAATATAAAGGAGGAGGTGACCTGCCCCATCTGCCTGGAACTACTGACAGAACCTCTGAGCCTAGACTGTGGCCACAGCTTCTGCCAAGCATGCATCACTGCAAACCACAAAGAGTCCAGAGAGAGAAGCTGCCCTTTGTGCCGGGTCAGTTACCACTCTGAGAACCTGCGGCCTAATCGGCATTTGGCCAACATAGCGGAGAGGCTCAGGGAGGTCATGTTGAGCCCAGAGGGGCAGAAGGTTGATCGCTGTGCACGCCATGGAGAGAAACTTCTACTCTTCTGTCAGCAGCATGGAAATGTCATTTGCTGGCTTTGTGAGCGGTCTGAAGAACACCGTGGTCACCGCACATCCCTCGTGGAGGAGGTTGCACAGAAATACCGAGAAAAGCTCCAGGCAGCTCTGGAGATGATGAGGCAGAAGGAGCAGGATGCTGAAATGCGAGAAAAGCTCCAGGCAGCTCTGGAGATGATGAGGCAGAAGGAGCAGGATGCTGAAATGTTGGAAGCTGACGTCAGAGAAGAGCAAGCTTCCTGGAAGATTCAAATAGAAAATGACAAAACCAGCACCCTGGCAGAGTTTAAGCAACTGAGAGACATCCTGGACTGTGAGGAGAGCAACGAGCTGCAAAAACTGGAGAAGGAGGAGGAAAACCTTCTGAAAAGACTTGTACAGTCTGAAAATGACATGGTGTTGCAGACCCAGTCCATAAGAGTGCTCATTGCAGACCTGGAGCGTCGCCTGCAGGGGTCAGTTATGGAGCTGTTACAGGGTGTGGAAGGCGTCATAAAAAGGATTAAGAACGTGACTTTGCAGAAGCCAGAAACCTTTCTTAATGAAAAAAGGAGAGTATTTCAAGCTCCTGATCTGAAAGGAATGCTACAAGTGTTTAAAGAGCTGAAAGAAGTCCAGTGCTACTGGGCTCATGTGACACTGATTCCGAATCACCCTTCATGTACTGTCATTTCTGAAGATAAGAGAGAAGTGAGATATCAGGAACAGATACATCATCCGTCTATGGAAGTCAAGTATTTTTATGGCATCCTGGGCTCCCCAAGTATCACATCAGGGAAACATTACTGGGAGGTAGACGTGTCCAATAAAAGTGCTTGGATCCTGGGGGTATGTGTCAGCTTGAAATGCATTGGAAATCGGAATGTTCCAGAAACTGAAAATTATCAACCTAAAAATCGCCACTTGTTTACAGGGTTACAGAATAAAGTTCAATATAACGATTTTCAGGATGATTCCCTCTCTACTCCTTCTGCTCCTTTGATCGTACCCCTCTTTATGACTATTTGTCCTAAACGTGTTGGAGTTTTCCTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATGTCACAAGCAATGGATATCTCATCTATAAGTTTTCTAACTGTCAGTTTTCTTATCCTGTATTTCCATATTTCAGTCCTATGACATGTGAATTACCCATGACTCTGTGCTCACCAAGCTCT +Spider ATGGCTTCCGAAATCCTGTTGAATATAAAGGAGGAGGTGACCTGCCCCATCTGCCTGGAACTACTGACAGAACCTCTGAGCCTAGACTGTGGCCACAGCTTCTGCCAAGCATGCATCACTGCAAACCACAAAGAGTCTGGAGAGAGAAGCTGCCCTTTGTGCCGGGTCAGTTACCAGTCTGAGAACCTGCGGCCTAATCGGCATTTGGCAAACATAGCGGAGAGGCTCAGGGAGGTCATGTTGAGCCCAGAAGGGCAGAAGGTTGATCGCTGTGCACGCCATGGAGAGAAACTTCTACTCTTCTGTCAGCAGCATGGAAATGTCATTTGCTGGCTTTGTGAGCGGTCTCAAGAACACCGTGGTCACAGCACATTCCTCGTGGAGGAGGTTGCACAGAAATACCAAGAAAAGCTCCAGGTAGCTCTGGAGATGATGAGGCAGAAGCAGCAGGATGCTGAAAAGCAAGAAAAGCTCCAGGTAGCTCTGGAGATGATGAGGCAGAAGCAGCAGGATGCTGAAAAGTTGGAAGCTGATGTCAGAGAAGAGCAAGCTTCCTGGAAGATTCAAATAGAAAATGACAAAACCAACATCCTGGCAGAGTTTAAGCAACTGAGAGACATCCTGGACTGTGAGGAGAGCAATGAGCTACAAAACTTGGAGAAGGAGGAGGAAAACCTTCTGAAAACACTTGCACAGTCTGAAAATGACATGGTGCTGCAGACCCAGTCCATGAGAGTGCTCATCGCAGATCTGGAGCACCGCCTGCAGGGGTCAGTGATGGAGCTGTTACAGGATGTGGAAGGCGTCATAAAAAGGATTAAGAATGTGACTTTGCAGAAGCCAAAAACCTTTCTTAATGAAAAAAGGAGAGTGTTTCGAGCTCCTGATCTGAAAGGAATGCTACAAGTGTTTAAAGAGCTGAAAGAAGTCCAGTGCTACTGGGCTCATGTGACACTGGTTCCAAGTCACCCTTCATGTACTGTCATTTCTGAAGATGAGAGACAAGTGAGATATCAGGAACAGATACATCAACCATCTGTGAAAGTCAAGTATTTTTGTGGCGTCCTGGGCTCCCCAGGTTTCACATCAGGGAAACATTACTGGGAGGTAGACGTGTCCGATAAAAGTGCTTGGATCCTGGGGGTATGTGTTAGCTTGAAATGCACTGCAAATCAGAATGTTCCAGGAACTGAAAATTATCAACCTAAAAATGGCTCCTGGGTTACAGGGTTACAGGATGCAGTTAAATATAGTGATTTCCAGGATGGTTCCTGCTCTACTCCTTCTGCTCCTTTGATGGTGCCCCTCTTTATGACTATTTGTCCTAAACGTGTTGGAGTTTTCCTAGACTGTAAGGCTTGCACTGTCTCATTCTTCAATGTCACAAGCAATGGATGTCTCATCTATAAGTTTTCTAAGTGTCATTTTTCTTATCCTGTATTTCCATATTTCAGTCCTATGATATGTAAATTACCCATGACTCTGTGCTCACCAAGCTCT +Woolly ATGGCTTCCGAAATCCTGGTGAATATAAAGGAGGAGGTGACCTGCCCCATCTGCCTGGACCTACTGACAGAACCTCTGAGCCTAGACTGTGGCCACAGCTTCTGCCAAGCATGCATCACTGCAGACCACAAAGAGTCTGGAGAGAGAAGCTGCCCTTTGTGCCGGGTCGGTTACCAGTCTGAGAACCTGCGGCCTAATCGGCATTTGGCAAACATAGCCGAGAGGCTCAGGGAGGTCATGTTGAGCCCAGAAGGGCAGAAGGTTGATCGCTGTGCACGCCATGGAGAGAAACTTCTACTCTTCTGTCAGCAGCATGGAAATGTCATTTGCTGGCTTTGTGAGCGGTCTCAAGAACACCGTGGTCACAGCACATTCCTCGTGGAGGAGGTTGCACAGAAATACCGAGAAAAGCTCCAGGTAGCTCTGGAAATGATGAGGGAGAAGCAGCAGGATGCTGAAAAGCGAGAAAAGCTCCAGGTAGCTCTGGAAATGATGAGGGAGAAGCAGCAGGATGCTGAAAAGTTGGAAGCTGATGTCAGAGAAGAGCAAGCTTCCTGGAAGATTCAAATAAAAAACGACAAAACCAACATCCTGGCAGAGTTTAAGCAACTGAGAGACATCCTGGACTGTGAGGAGAGCAATGAGCTGCAAAACCTGGAGAAGGAGGAGGAAAACCTTCTGAAAATACTTGCACAGTCTGAAAATGACATGGTGCTGCAGACCCAGTCCATGAGAGTGCTCATCGCAGATCTGGAGCATCGCCTGCAGGGGTCAGTGATGGAGCTGTTACAGGGTGTGGAAGGCATCATAAAAAGGACTACGAATGTGACTTTGCAGAAGCCAAAAACCTTTCTTAATGAAAAAAGGAGAGTGTTTCGAGCTCCTAATCTGAAAGGAATGCTACAAGTGTTTAAAGAGCTGAAAGAAGTCCAATGCTACTGGGCTCATGTGACACTGGTTCCAAGTCACCCTTCATGTGCTGTCATTTCTGAAGATCAGAGACAAGTGAGATATCAGAAACAGAGACATCGACCATCTGTGAAAGCCAAATATTTTTATGGCGTCCTGGGCTCCCCAAGTTTCACATCAGGGAAACATTACTGGGAGGTAGACGTGTCCAATAAAAGTGCTTGGATCCTGGGGGTATGTGTTAGCTTGAAATGCACTGCAAATCAGAATGTTCCAGGAACTGAAGATTATCAACCTAAAAATGGCTACTGGGTTACAGGGTTACAGGATGCAGGTAAATATAGTGATTTCCAGGATGGTTCCTGCTCTACTCCTTTTGCTCCTTTGATTGTGCCCCTCTTTATGACTATTCGTCCTAAACGTGTTGGCGTTTTCCTAGACTATGAGGCTTGCACTGTCTCATTCTTCAATGTCACAAGCAATGGATGTCTCATCTATAAGTTTTCTAACTGTCATTTTTCTTGTCCTGTATTTCCATATTTCAGTCCTATGACATGTAAATTACCCATGACTCTGTGCTCACCAAGCTCT \ No newline at end of file diff --git a/tests/test_data/test_tree.txt b/tests/test_data/test_tree.txt new file mode 100644 index 00000000..854e37b6 --- /dev/null +++ b/tests/test_data/test_tree.txt @@ -0,0 +1,9 @@ +(Chimp:0.00208,((((Owl:0.02620,((((Tamarin:0.01875, +PMarmoset:0.01757):0.01468,Squirrel:0.04726):0.00187, +((Woolly:0.01966,Spider:0.01147):0.00891,Howler:0.03714):0.01619):0.00201, +(Saki:0.02055,Titi:0.01984):0.01030):0.00482):0.11096, +(((Patas:0.01087,((Tant_cDNA:0.00133,AGM_cDNA:0.00134):0.00510, +(Baboon:0.00300,Rhes_cDNA:0.00592):0.00417):0.00257):0.01211, +DLangur:0.00474):0.00128,Colobus:0.00273):0.02822):0.01375, +(Gibbon:0.02325,Orangutan:0.01225):0.00205):0.00660, +Gorilla:0.00545):0.00131,Human:0.00663); diff --git a/tests/test_manager.py b/tests/test_manager.py index 4e79e8b0..fda7c63e 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -3,7 +3,7 @@ from shutil import rmtree from OrthoEvol.Manager.management import ProjectManagement - +from OrthoEvol.Manager.webster import Webster class TestManager(unittest.TestCase): """Test the Manager module.""" @@ -11,6 +11,7 @@ class TestManager(unittest.TestCase): def setUp(self, project='test-project', repository=None): self.project = project self.repo = repository + self.webster = Webster() def delete_project(self): rmtree(self.project) @@ -26,6 +27,14 @@ def test_projectmanagement(self): self.assertEqual(str(self.project), 'test-project') self.delete_project() + def test_webster(self): + self.webster.add("GUIDANCE2") + self.webster.add("PAL2NAL") + self.assertEqual(len(self.webster.citations), 2) + self.assertIn("GUIDANCE2", self.webster.citations) + self.assertIn("PAL2NAL", self.webster.citations) + + if __name__ == '__main__': unittest.main() diff --git a/tests/test_orthologs.py b/tests/test_orthologs.py index a285122a..ded9ea57 100644 --- a/tests/test_orthologs.py +++ b/tests/test_orthologs.py @@ -1,9 +1,11 @@ """This is the test suite for Orthologs.""" import unittest from shutil import rmtree +import os from OrthoEvol.Orthologs.Blast import BaseBlastN, OrthoBlastN - +from OrthoEvol.Orthologs.Phylogenetics.PhyML import PhyML +from OrthoEvol.Orthologs.Phylogenetics.TreeViz import TreeViz class TestOrthologs(unittest.TestCase): """Test the Orthologs module.""" @@ -11,37 +13,63 @@ class TestOrthologs(unittest.TestCase): def setUp(self, project="gpcr", project_path="projects"): self.project = project self.project_path = project_path + self.cur_dir = os.path.dirname(os.path.abspath(__file__)) + self.join = os.path.join def delete_project(self, project_path): rmtree(project_path) - def test_baseblastn(self): - """Test the BaseBlastN class.""" - # The with statement is for travisci where a BLASTDB variable - # is not set. - # TIP: Remove the with statement if testing with BLASTDB in your - # environment variables. - with self.assertRaises(EnvironmentError): - gpcr_blastn = BaseBlastN(project=self.project, method=1, - save_data=True, acc_file="gpcr.csv", - copy_from_package=True, - ref_species='Homo_sapiens', - proj_mana=None, - project_path=self.project_path) - self.assertEqual(gpcr_blastn.proj_mana, None) - self.assertEqual(gpcr_blastn.acc_file, "gpcr.csv") - self.assertTrue(gpcr_blastn.copy_from_package) - self.delete_project(project_path=self.project_path) - - def test_orthoblastn(self): - """Test the OrthoBlastN class.""" - with self.assertRaises(EnvironmentError): - ortho_blastn = OrthoBlastN(project="orthology-project", - method=1, save_data=True, - acc_file="gpcr.csv", - copy_from_package=True) - self.assertEqual(ortho_blastn.ref_species, 'Homo_sapiens') - self.assertTrue(ortho_blastn.copy_from_package) + def delete_phyml_output(self): + os.remove(self.join(self.cur_dir, 'test_data/test.phy_phyml_stats.txt')) + os.remove(self.join(self.cur_dir, 'test_data/test.phy_phyml_tree.txt')) + + def delete_treeviz_output(self): + os.remove('example.png') + + # def test_baseblastn(self): + # """Test the BaseBlastN class.""" + # # The with statement is for travisci where a BLASTDB variable + # # is not set. + # # TIP: Remove the with statement if testing with BLASTDB in your + # # environment variables. + # with self.assertRaises(EnvironmentError): + # gpcr_blastn = BaseBlastN(project=self.project, method=1, + # save_data=True, acc_file="gpcr.csv", + # copy_from_package=True, + # ref_species='Homo_sapiens', + # proj_mana=None, + # project_path=self.project_path) + # self.assertEqual(gpcr_blastn.proj_mana, None) + # self.assertEqual(gpcr_blastn.acc_file, "gpcr.csv") + # self.assertTrue(gpcr_blastn.copy_from_package) + # self.delete_project(project_path=self.project_path) + + # def test_phyml(self): + # """Test the PhyML class.""" + # p = PhyML(infile=self.join(self.cur_dir, 'test_data/test.phy'), datatype='nt') + # p.run(bootstrap=0) + # self.assertIsNotNone(self.join(self.cur_dir, 'test_data/test.phy_phyml_stats.txt')) + # self.assertIsNotNone(self.join(self.cur_dir, 'test_data/test.phy_phyml_tree.txt')) + # self.delete_phyml_output() + + def test_treeviz(self): + """Test the TreeViz class.""" + t = TreeViz(path=self.join(self.cur_dir, 'test_data/test_tree.txt'), + tree_format='newick') + t.draw_tree() + t.save_tree('example.png') + self.assertIsNotNone('example.png') + self.delete_treeviz_output() + + # def test_orthoblastn(self): + # """Test the OrthoBlastN class.""" + # with self.assertRaises(EnvironmentError): + # ortho_blastn = OrthoBlastN(project="orthology-project", + # method=1, save_data=True, + # acc_file="gpcr.csv", + # copy_from_package=True) + # self.assertEqual(ortho_blastn.ref_species, 'Homo_sapiens') + # self.assertTrue(ortho_blastn.copy_from_package) if __name__ == '__main__': diff --git a/tests/test_tools.py b/tests/test_tools.py index 310d2a68..4696b3f7 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -9,6 +9,7 @@ from OrthoEvol.Tools.ftp import NcbiFTPClient from OrthoEvol.Tools.mygene import MyGene from OrthoEvol.Manager.config import test +from OrthoEvol.Tools.pybasher import PyBasher class TestTools(unittest.TestCase): @@ -49,6 +50,54 @@ def test_mygene(self): mg.query_mygene() os.remove(self.outfile) + def test_mv(self): + # Create a file to move + with open("test.txt", "w") as f: + f.write("Test content") + + # Create a PyBasher instance + pybasher = PyBasher() + + # Move the file + pybasher.mv("test.txt", "moved.txt") + + # Check that the file was moved + assert not os.path.exists("test.txt") + assert os.path.exists("moved.txt") + + # Check that the moved file has the correct contents + with open("moved.txt", "r") as f: + moved_content = f.read() + assert moved_content == "Test content" + + # Clean up + os.remove("moved.txt") + + def test_cp(self): + # Create a file to copy + with open("test.txt", "w") as f: + f.write("Test content") + + # Create a PyBasher instance + pybasher = PyBasher() + + # Copy the file + pybasher.cp("test.txt", "copy.txt") + + # Check that the copy was made + assert os.path.exists("copy.txt") + + # Check that the copy has the same contents as the original + with open("test.txt", "r") as f: + original_content = f.read() + with open("copy.txt", "r") as f: + copy_content = f.read() + assert original_content == copy_content + + # Clean up + os.remove("test.txt") + os.remove("copy.txt") + if __name__ == '__main__': unittest.main() diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..cf97c1b3 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,58 @@ +import unittest +from pathlib import Path +import os +import shutil + +from OrthoEvol.utilities import CookieUtils, FunctionRepeater + + +class TestCookieUtils(unittest.TestCase): + + def setUp(self): + self.utils = CookieUtils() + self.test_dir = Path('test_dir') + self.test_dir.mkdir(exist_ok=True) + self.archive_path = Path('archive_dir') + self.archive_path.mkdir(exist_ok=True) + + def tearDown(self): + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + if self.archive_path.exists(): + shutil.rmtree(self.archive_path) + + def test_archive(self): + # Mocking file and directory creation for the test + test_file = self.test_dir / 'test.txt' + with open(test_file, 'w') as f: + f.write('test') + self.assertTrue(test_file.exists()) + + # Test archive functionality + archive_list = self.utils.archive(database_path=self.test_dir, archive_path=self.archive_path, option='Full') + self.assertIsInstance(archive_list, list) + self.assertTrue(any(self.archive_path in Path(a) for a in archive_list)) + + def test_get_size(self): + test_file = self.test_dir / 'test.txt' + with open(test_file, 'w') as f: + f.write('test') + size = self.utils.get_size(start_path=str(test_file)) + self.assertIsInstance(size, str) + +class TestFunctionRepeater(unittest.TestCase): + + def setUp(self): + self.mock_function = unittest.mock.Mock() + self.repeater = FunctionRepeater(interval=1, function=self.mock_function) + + def tearDown(self): + self.repeater.stop() + + def test_repeater_start_stop(self): + self.assertTrue(self.repeater.is_running) + self.repeater.stop() + self.assertFalse(self.repeater.is_running) + +if __name__ == '__main__': + unittest.main()