-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyse_nexus_characters.py
71 lines (54 loc) · 1.82 KB
/
analyse_nexus_characters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""
Attempt to work out what MrBayes means when it says "There are 80 characters
incompatible with the specified coding bias. These characters will be excluded."
Assumption is that you need at least two characters each with at least two
witnesses.
However, the numbers don't quite tally with MrBayes' complaint...
"""
import sys
from collections import defaultdict
class Character(object):
def __init__(self):
self.readings = []
def is_informative(self):
# We want to count readings that occur more than once
x = 0
for r in set(self.readings):
if r in ('-','?'):
continue
occurs = len([a for a in self.readings if a == r])
if occurs > 1:
print("Reading {} occurs {} times".format(r, occurs))
x += 1
return x > 1
def analyse(input_file):
ntax = 0
matrix = defaultdict(Character)
with open(input_file) as nexus:
in_matrix = False
for line in nexus:
# Look for the matrix
if in_matrix is False:
if line.strip() == "MATRIX":
in_matrix = True
continue
# We're in the matrix
if line.strip() == ';':
break
taxon, states = line.split()
ntax += 1
for i, s in enumerate(states):
matrix[i].readings.append(s)
inf = 0
notinf = 0
for char in list(matrix.values()):
if char.is_informative():
inf += 1
else:
notinf += 1
print("Matrix has {} taxa and {} characters".format(ntax, len(matrix)))
print(" {} characters are informative".format(inf))
print(" {} characters are uninformative".format(notinf))
if __name__ == "__main__":
nexus = sys.argv[1]
analyse(nexus)