-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcalculate_involved_genes.py
123 lines (96 loc) · 2.84 KB
/
calculate_involved_genes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import sys
def matchSysnonyms(synonyms, ontology, ontologyGenes, interactions):
newInt={}
for mirna in interactions:
if mirna not in newInt:
newInt[mirna]=set()
for gene in interactions[mirna]:
newGene=gene
if gene not in ontologyGenes:
if gene in synonyms:
geneSynonyms=synonyms[gene]
for synonym in geneSynonyms:
if synonym in ontologyGenes:
newGene=synonym
break
newInt[mirna].add(gene)
return newInt
def readOntology(filename):
ontology={}
ontologyNames={}
ontologyGenes=set()
f=open(filename,'r')
for line in f:
line=line.strip().split('|')
gene=line[0]
cat=line[1]
name=line[2]
if cat not in ontologyNames:
ontologyNames[cat]=name
if cat not in ontology:
ontology[cat]=set()
ontology[cat].add(gene)
ontologyGenes.add(gene)
f.close()
return ontology,ontologyNames,ontologyGenes
def readInteractions(filename):
interactions={}
f=open(filename,'r')
for line in f:
line=line.strip().split('|')
mirna=line[0]
gene=line[1]
if mirna not in interactions:
interactions[mirna]=set()
interactions[mirna].add(gene)
f.close()
return interactions
def readSynonyms(filename, taxid):
synonyms = {}
f= open(filename, 'r')
f.readline()
for line in f:
line = line.rstrip().split('\t')
tid=line[0]
name=line[2]
otherNames=line[4]
names=[line[2]]
if tid!=taxid:
continue
if otherNames!= '-':
otherNames=otherNames.split('|')
names=[name]+otherNames
for synonym in names:
if synonym not in synonyms:
synonyms[synonym] = names
f.close()
return synonyms
def readMirnas(filename):
mirnas=set()
f=open(filename,'r')
for line in f:
mirna=line.strip()
mirnas.add(mirna)
f.close()
return mirnas
ontologyFile=sys.argv[1]
interactionsFile=sys.argv[2]
mirnaFile=sys.argv[3]
synonymsFile=sys.argv[4]
outFile=sys.argv[5]
taxid=''
if len(sys.argv)>=6:
taxid=sys.argv[6]
ontology,ontologyNames,ontologyGenes=readOntology(ontologyFile)
interactions=readInteractions(interactionsFile)
mirnas=readMirnas(mirnaFile)
if taxid!='':
synonyms=readSynonyms(synonymsFile,taxid)
interactions=matchSysnonyms(synonyms,ontology,ontologyGenes,interactions)
g=open(outFile,'w')
for category in ontology:
g.write('>' + category + '\n')
for mirna in mirnas:
genes=interactions[mirna] & ontology[category]
g.write(mirna + '\t' + ','.join(genes) + '\n')
g.close()