-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmygseaCalcNES.py
103 lines (94 loc) · 2.91 KB
/
mygseaCalcNES.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
'''
This script performs enrichment score normalization given enrichment score output files, one for each permutation required.
Set parameters before running:
ROOT_DIR: The root directory
Parameters:
phenotype: The phenotype
type: Set to be bin (for binary)
covars: Number of covariates used
numPerm: Number of permutations
Authors: Shirley Hui, Asha Rostamianfar
'''
import util
import reader
import operator
import sys
import glob
import numpy as np
import copy
import os
minSize = 10
maxSize = 200
phenotype = str(sys.argv[1])
type=str(sys.argv[2])
covars=str(sys.argv[3])
numPerm=str(sys.argv[4])
print phenotype
ROOT_DIR = "/home/shirleyhui/Work/PNC/"
if type=='bin':
if covars=='0':
originalPesScore = ROOT_DIR+'gsea/bin/gsea_'+phenotype+'_nocovar/obs/'+phenotype+'_binAssoc.perm0.assoc.logistic.adjusted.nocovar.txt.May012018_gmt-gsea.txt'
inDir = ROOT_DIR+'gsea/bin/gsea_'+phenotype+'_nocovar/perm/*.txt'
else:
originalPesScore = ROOT_DIR+'gsea/bin/gsea_'+phenotype+'_covars'+covars+'/obs/'+phenotype+'_binAssoc.perm0.assoc.logistic.adjusted.covar'+covars+'.txt.May012018_gmt-gsea.txt'
inDir = ROOT_DIR+'gsea/bin/gsea_'+phenotype+'_covars'+covars+'/perm/*.txt'
pesFiles = glob.glob(inDir)
print len(pesFiles)
def addPES(pesFile, pathwayToPes, pathwayToSize, firstTime = False):
for line in open(pesFile):
parts = line.replace('\n', '').split('\t')
pId = parts[0]
pes = float(parts[1])
size = int(parts[2])
if size < minSize or size > maxSize:
continue
if firstTime:
pathwayToSize[pId] = size
pathwayToPes[pId] = [pes]
else:
try:
pathwayToPes[pId].append(pes)
except KeyError,e:
pass
def getAllPes():
pathwayToSize = {}
pathwayToPes = {}
addPES(originalPesScore, pathwayToPes, pathwayToSize, True)
i = 0
for f in pesFiles:
print 'Analyzing', f
if i % 100 == 0:
print i
sys.stdout.flush()
i += 1
addPES(f, pathwayToPes, pathwayToSize, False)
return pathwayToPes, pathwayToSize
def getNES(pathwayToPes, pIdx, pathwayToNES, firstTime = False):
for p, pesScores in pathwayToPes.iteritems():
#print p
mean = np.mean(pesScores[1:])
std = np.std(pesScores[1:])
if pIdx >= len(pesScores):
continue;
if std == 0:
nes = 0
else:
nes = ((pesScores[pIdx] - mean) / std)
if firstTime:
#print p, nes
pathwayToNES[p] = nes
else:
pathwayToNES[p].append(nes)
pathwayToPes, pathwayToSize = getAllPes()
firstTime = True
outDir =( '%s/nes/%s/' % (GSEA_DIR,phenotype))
print 'Writing out to directory:', outDir
if not os.path.exists(outDir):
print "Out dir doesn't exist. trying to make"
os.makedirs(outDir)
stop = int(numPerm)+1
for i in range(0,stop):
pathwayToNES = {}
getNES(pathwayToPes, i, pathwayToNES, firstTime)
outFileName = outDir + ('nes-%s-%s-covars%s-%d.txt' % (phenotype,type,covars,i))
util.printMapToFile(pathwayToNES, outFileName)