Skip to content

Commit

Permalink
Fix a bug related to the broad peak calling function in previous
Browse files Browse the repository at this point in the history
versions. The gaps were miscalculated, so segmented weak broad calls
may be reported, and sometimes you would see peaks with lower than
cutoff values in the output files.

"Potentially" Fixed issue macs3-project#105 on temporary files, need further followup.
  • Loading branch information
taoliu committed Dec 22, 2015
1 parent 08e297b commit db45aa0
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 25 deletions.
17 changes: 17 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
2015-12-22 Tao Liu <[email protected]>
MACS version 2.1.0 20151222 (tag:rc Dongzhi)

* Bug fixes

1) Fix a bug while dealing with some chromosomes only containing
one read (pair). The size of dup_plus/dup_minus arrays after
filtering dups should +1.

2) Fix a bug related to the broad peak calling function in
previous versions. The gaps were miscalculated, so segmented weak
broad calls may be reported, and sometimes you would see peaks
with lower than cutoff values in the output files.

3) "Potentially" Fixed issue #105, need further followup.


2015-07-31 Tao Liu <[email protected]>
MACS version 2.1.0 20150731 (tag:rc)

Expand Down
2 changes: 1 addition & 1 deletion MACS2/Constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
MACS_VERSION = "2.1.0.20150731"
MACS_VERSION = "2.1.0.20151222"
#MACSDIFF_VERSION = "1.0.4 20110212 (tag:alpha)"
FILTERDUP_VERSION = "1.0.0 20140616"
RANDSAMPLE_VERSION = "1.0.0 20120703"
Expand Down
41 changes: 27 additions & 14 deletions MACS2/IO/CallPeakUnit.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Time-stamp: <2015-07-30 15:55:55 Tao Liu>
# Time-stamp: <2015-12-22 16:06:56 Tao Liu>

"""Module for Calculate Scores.
Expand Down Expand Up @@ -461,7 +461,7 @@ cdef class CallerFromAlignments:
self.chr_pos_treat_ctrl = cPickle.load( f )
f.close()
return
except IOError:
except:
temp_fd, temp_filename = mkstemp()
os.close(temp_fd)
self.pileup_data_files[ chrom ] = temp_filename
Expand Down Expand Up @@ -507,8 +507,9 @@ cdef class CallerFromAlignments:
f = file(self.pileup_data_files[ chrom ],"wb")
cPickle.dump( self.chr_pos_treat_ctrl, f , protocol=2 )
f.close()
except IOError:
pass
except:
# fail to write then remove the key in pileup_data_files
self.pileup_data_files.pop(chrom)
return

cdef list __chrom_pair_treat_ctrl ( self, treat_pv, ctrl_pv ):
Expand Down Expand Up @@ -1585,7 +1586,7 @@ cdef class CallerFromAlignments:

# get the regions with scores above cutoffs
above_cutoff = np.nonzero( apply_multiple_cutoffs(score_array_s,lvl1_cutoff_s) )[0] # this is not an optimized method. It would be better to store score array in a 2-D ndarray?
above_cutoff_index_array = np.arange(pos_array.shape[0])[above_cutoff] # indices
above_cutoff_index_array = np.arange(pos_array.shape[0],dtype="int32")[above_cutoff] # indices
above_cutoff_endpos = pos_array[above_cutoff] # end positions of regions where score is above cutoff
above_cutoff_startpos = pos_array[above_cutoff-1] # start positions of regions where score is above cutoff

Expand All @@ -1611,6 +1612,9 @@ cdef class CallerFromAlignments:
cp = ctrl_array_ptr[ ti ]

peak_content.append( ( ts, te, tp, cp, ti ) )
acs_ptr += 1 # move ptr
ace_ptr += 1
acia_ptr+= 1
lastp = te

#peak_content.append( (above_cutoff_startpos[0], above_cutoff_endpos[0], treat_array[above_cutoff_index_array[0]], ctrl_array[above_cutoff_index_array[0]], score_array_s, above_cutoff_index_array[0] ) )
Expand Down Expand Up @@ -1645,7 +1649,7 @@ cdef class CallerFromAlignments:

# get the regions with scores above cutoffs
above_cutoff = np.nonzero( apply_multiple_cutoffs(score_array_s,lvl2_cutoff_s) )[0] # this is not an optimized method. It would be better to store score array in a 2-D ndarray?
above_cutoff_index_array = np.arange(pos_array.shape[0])[above_cutoff] # indices
above_cutoff_index_array = np.arange(pos_array.shape[0],dtype="int32")[above_cutoff] # indices
above_cutoff_endpos = pos_array[above_cutoff] # end positions of regions where score is above cutoff
above_cutoff_startpos = pos_array[above_cutoff-1] # start positions of regions where score is above cutoff

Expand All @@ -1669,26 +1673,33 @@ cdef class CallerFromAlignments:
ti = acia_ptr[ 0 ]
tp = treat_array_ptr[ ti ]
cp = ctrl_array_ptr[ ti ]

peak_content.append( ( ts, te, tp, cp, ti ) )
acs_ptr += 1 # move ptr
ace_ptr += 1
acia_ptr+= 1

lastp = te
for i in range( 1, above_cutoff_startpos.size ):
ts = acs_ptr[ 0 ]
te = ace_ptr[ 0 ]
ti = acia_ptr[ 0 ]
acs_ptr += 1
# for everything above cutoff
ts = acs_ptr[ 0 ] # get the start
te = ace_ptr[ 0 ] # get the end
ti = acia_ptr[ 0 ]# get the index

acs_ptr += 1 # move ptr
ace_ptr += 1
acia_ptr+= 1
tp = treat_array_ptr[ ti ]
cp = ctrl_array_ptr[ ti ]
tl = ts - lastp
tp = treat_array_ptr[ ti ] # get the treatment pileup
cp = ctrl_array_ptr[ ti ] # get the control pileup
tl = ts - lastp # get the distance from the current point to last position of existing peak_content

if tl <= lvl2_max_gap:
# append
peak_content.append( ( ts, te, tp, cp, ti ) )
lastp = te
else:
# close
self.__close_peak_for_broad_region (peak_content, lvl2peaks, min_length, chrom, lvl2_max_gap/2, score_array_s )

peak_content = [ ( ts, te, tp, cp, ti ), ]
lastp = te

Expand Down Expand Up @@ -1743,6 +1754,8 @@ cdef class CallerFromAlignments:
fold_change = mean_from_value_length( tarray_fc, tlist_length ),
qscore = mean_from_value_length( tarray_qscore, tlist_length ),
)
#if chrom == "chr1" and peak_content[0][0] == 237643 and peak_content[-1][1] == 237935:
# print tarray_qscore, tlist_length
# start a new peak
return True

Expand Down
6 changes: 3 additions & 3 deletions MACS2/IO/Parser.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Time-stamp: <2015-06-02 23:21:25 Tao Liu>
# Time-stamp: <2015-08-14 14:29:30 Tao Liu>

"""Module for all MACS Parser classes for input.
Expand Down Expand Up @@ -1242,8 +1242,8 @@ cdef class BAMPEParser(BAMParser):
(n_cigar_op, bwflag ) = unpack( '<HH' , data[ 12:16 ] )
if bwflag & 4 or bwflag & 512 or bwflag & 1024 or bwflag & 256 or bwflag & 2048:
return ret #unmapped sequence or bad sequence or 2nd or sup alignment
if bwflag & 256 or bwflag & 2048:
return ret # secondary or supplementary alignment
#if bwflag & 256 or bwflag & 2048:
# return ret # secondary or supplementary alignment
if bwflag & 1:
# paired read. We should only keep sequence if the mate is mapped
# and if this is the left mate, all is within the flag!
Expand Down
6 changes: 3 additions & 3 deletions MACS2/callpeak_cmd.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Time-stamp: <2015-07-27 12:31:14 Tao Liu>
# Time-stamp: <2015-12-22 11:55:15 Tao Liu>

"""Description: MACS 2 main executable
Expand Down Expand Up @@ -286,9 +286,9 @@ def run( args ):
ofhd_xls.write(tagsinfo)

if options.shift > 0:
ofhd_xls.write("#2 Sequencing ends will be shifted towards 3' by %d bp(s)\n" % (options.shift))
ofhd_xls.write("# Sequencing ends will be shifted towards 3' by %d bp(s)\n" % (options.shift))
elif options.shift < 0:
ofhd_xls.write("#2 Sequencing ends will be shifted towards 5' by %d bp(s)\n" % (options.shift * -1))
ofhd_xls.write("# Sequencing ends will be shifted towards 5' by %d bp(s)\n" % (options.shift * -1))

ofhd_xls.write("# d = %d\n" % (options.d))
try:
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# Time-stamp: <2015-07-31 14:55:55 Tao Liu>
# Time-stamp: <2015-12-22 16:21:04 Tao Liu>

"""Description
Expand Down Expand Up @@ -57,7 +57,7 @@ def main():
]

setup(name="MACS2",
version="2.1.0.20150731",
version="2.1.0.20151222",
description="Model Based Analysis for ChIP-Seq data",
author='Tao Liu',
author_email='[email protected]',
Expand Down
4 changes: 2 additions & 2 deletions setup_w_cython.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# Time-stamp: <2015-07-31 14:55:49 Tao Liu>
# Time-stamp: <2015-12-22 16:21:12 Tao Liu>

"""Description:
Expand Down Expand Up @@ -80,7 +80,7 @@ def main():
]

setup(name="MACS2",
version="2.1.0.20150731",
version="2.1.0.20151222",
description="Model Based Analysis for ChIP-Seq data",
author='Tao Liu',
author_email='[email protected]',
Expand Down

0 comments on commit db45aa0

Please sign in to comment.