-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcondense_contig_regions.py
76 lines (66 loc) · 2.01 KB
/
condense_contig_regions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python
"""
Merge contig locations in a BED file to a bin of YY nts location on a single chromosome.
The score of that bin will be the maximum score obtained in the 500 nts bin.
Usage:
cat in.bed | python CondenseContigRegions.py bin_size > bin-out.bed
in.bed looks like:
track name=junctions description="TSS score"
chrC 70043 70044 44 -
chrC 105671 105913 1 +
chrC 132687 132980 2 -
chrC 132735 132977 1 -
chrM 11450 11722 2 -
chr2 6149 8097 2 +
chr2 6925 8079 3 +
chr2 77246 77428 1 +
chr2 87142 87322 1 +
"""
import sys
if __name__=="__main__":
try:
binsize = int(sys.argv[1])
except:
print 'Incorrect argument supplied'
print __doc__
sys.exit()
win_score = []
wind_cnt, condenseStart, LastPos = 0, 0, 0
chr_change = None
for line in sys.stdin
line = line.strip('\n\r').split('\t')
line[3] = round(float(line[3]), 4)
win_score.append(line[3])
if wind_cnt == (binsize-1):
win_score.sort()
dense_line = [line[0],
str(condenseStart),
str(condenseStart+1),
str(win_score[-1]),
line[4]
]
print '\t'.join(dense_line)
condenseStart += binsize
wind_cnt = 0
win_score = []
continue
wind_cnt += 1
LastPos = int(line[1])
if chr_change != line[0]:
print_last_bin()
chr_change=None
continue
chr_change=line[0]
def print_last_bin():
"""
print the final bin to the merged list in the file
"""
if wind_cnt != 0:
win_score.sort()
dense_line = [line[0],
str(LastPos),
str(LastPos+1),
str(win_score[-1]),
line[4]
]
print '\t'.join(dense_line)