From 556e2d5b7a2fb8d808f59e8ff168861e8fdb9ff2 Mon Sep 17 00:00:00 2001
From: Tao Liu <vladimir.liu@gmail.com>
Date: Wed, 30 Jul 2014 23:29:16 -0400
Subject: [PATCH] Add two new commands: cmbreps to combine two replicates;
 bdgopt to modify the score column of bedGraph file such as pscore to qscore
 conversion.

---
 MACS2/IO/cBedGraph.pyx                | 105 ++++++++++++++++++++++++--
 MACS2/IO/cParser.pyx                  |  51 +++----------
 MACS2/OptValidator.py                 |  78 ++++++++++++++++++-
 MACS2/{Poisson.pyx => Statistics.pyx} |   2 +-
 MACS2/bdgcmp.py                       |   2 +-
 MACS2/bdgopt.py                       |  71 +++++++++++++++++
 MACS2/cPoisson.h                      |   4 -
 MACS2/{cPoisson.c => cStatistics.c}   |  46 ++++++++++-
 MACS2/cStatistics.h                   |   9 +++
 MACS2/cmbreps.py                      |  54 +++++++++++++
 bin/macs2                             |  56 +++++++++++++-
 setup.py                              |   4 +-
 setup_w_cython.py                     |   8 +-
 13 files changed, 428 insertions(+), 62 deletions(-)
 rename MACS2/{Poisson.pyx => Statistics.pyx} (99%)
 create mode 100644 MACS2/bdgopt.py
 delete mode 100644 MACS2/cPoisson.h
 rename MACS2/{cPoisson.c => cStatistics.c} (73%)
 create mode 100644 MACS2/cStatistics.h
 create mode 100644 MACS2/cmbreps.py

diff --git a/MACS2/IO/cBedGraph.pyx b/MACS2/IO/cBedGraph.pyx
index 10ace005..05c034db 100644
--- a/MACS2/IO/cBedGraph.pyx
+++ b/MACS2/IO/cBedGraph.pyx
@@ -1,4 +1,4 @@
-# Time-stamp: <2014-03-03 13:56:01 Tao Liu>
+# Time-stamp: <2014-07-30 23:10:44 Tao Liu>
 
 """Module for Feature IO classes.
 
@@ -42,6 +42,17 @@ __doc__ = "bedGraphTrackI class"
 # ------------------------------------
 # Misc functions
 # ------------------------------------
+LOG10_E = 0.43429448190325176
+
+from libc.math cimport log1p, exp, log10
+
+cpdef float fisher_method_combining_two_log10pvalues ( float p1, float p2 ):
+    #x = 2 * ( p1 + p2 ) / LOG10_E
+    #return -log10( exp( -x/2 ) * ( 1 + x/2 ) )
+    return ( p1 + p2 ) - log1p( ( p1 + p2 ) / LOG10_E ) * LOG10_E
+
+cpdef float mean ( float p1, float p2 ):
+    return ( p1 + p2 ) / 2
 
 # ------------------------------------
 # Classes
@@ -700,7 +711,7 @@ cdef class bedGraphTrackI:
             ret.add_loc(chrom,0,max_p,new_value)
         return ret
 
-    def overlie (self, bdgTrack2, func=max ):
+    def overlie (self, bdgTrack2, func="max" ):
         """Calculate two bedGraphTrackI objects by letting self
         overlying bdgTrack2, with user-defined functions.
 
@@ -738,6 +749,15 @@ cdef class bedGraphTrackI:
         
         assert isinstance(bdgTrack2,bedGraphTrackI), "bdgTrack2 is not a bedGraphTrackI object"
 
+        if func == "max":
+            f = max
+        elif func == "mean":
+            f = lambda x, y: ( x + y ) / 2
+        elif func == "fisher":
+            f = lambda p1, p2: ( p1 + p2 ) - log1p( ( p1 + p2 ) / LOG10_E ) * LOG10_E
+        else:
+            raise Exception("Invalid function")
+
         ret = bedGraphTrackI()
         retadd = ret.add_loc
         
@@ -766,21 +786,21 @@ cdef class bedGraphTrackI:
                 while True:
                     if p1 < p2:
                         # clip a region from pre_p to p1, then set pre_p as p1.
-                        retadd(chrom,pre_p,p1,func(v1,v2))
+                        retadd(chrom,pre_p,p1,f(v1,v2))
                         pre_p = p1
                         # call for the next p1 and v1
                         p1 = p1n()
                         v1 = v1n()
                     elif p2 < p1:
                         # clip a region from pre_p to p2, then set pre_p as p2.
-                        retadd(chrom,pre_p,p2,func(v1,v2))
+                        retadd(chrom,pre_p,p2,f(v1,v2))
                         pre_p = p2
                         # call for the next p2 and v2
                         p2 = p2n()
                         v2 = v2n()
                     elif p1 == p2:
                         # from pre_p to p1 or p2, then set pre_p as p1 or p2.
-                        retadd(chrom,pre_p,p1,func(v1,v2))
+                        retadd(chrom,pre_p,p1,f(v1,v2))
                         pre_p = p1
                         # call for the next p1, v1, p2, v2.
                         p1 = p1n()
@@ -807,6 +827,81 @@ cdef class bedGraphTrackI:
         self.minvalue = func(self.minvalue)
         return True
 
+    def p2q ( self ):
+        """Convert pvalue scores to qvalue scores.
+
+        *Assume scores in this bedGraph are pvalue scores! Not work
+         for other type of scores.
+        """
+        cdef:
+            str chrom
+            object pos_array, pscore_array
+            dict pvalue_stat = {}
+            dict pqtable = {}
+            long n, pre_p, this_p, length, j, pre_l, l, i
+            double this_v, pre_v, v, q, pre_q, this_t, this_c
+            long N, k, this_l
+            double f
+            long nhcal = 0
+            long npcal = 0
+            list unique_values
+            double t0, t1, t 
+
+        # calculate frequencies of each p-score
+        for chrom in self.__data.keys():
+            pre_p = 0
+
+            [pos_array, pscore_array] = self.__data[ chrom ]
+
+            pn = iter(pos_array).next
+            vn = iter(pscore_array).next
+
+            for i in range( len( pos_array ) ):
+                this_p = pn()
+                this_v = vn()
+                this_l = this_p - pre_p
+                if pvalue_stat.has_key( this_v ):
+                    pvalue_stat[ this_v ] += this_l
+                else:
+                    pvalue_stat[ this_v ] = this_l
+                pre_p = this_p
+
+            nhcal += len( pos_array )
+
+        nhval = 0
+
+        N = sum(pvalue_stat.values()) # total length
+        k = 1                           # rank
+        f = -log10(N)
+        pre_v = -2147483647
+        pre_l = 0
+        pre_q = 2147483647              # save the previous q-value
+
+        # calculate qscore for each pscore
+        pqtable = {}
+        unique_values = sorted(pvalue_stat.keys(), reverse=True)
+        for i in range(len(unique_values)):
+            v = unique_values[i]
+            l = pvalue_stat[v]
+            q = v + (log10(k) + f)
+            q = max(0,min(pre_q,q))           # make q-score monotonic
+            pqtable[ v ] = q
+            pre_v = v
+            pre_q = q
+            k+=l
+            nhcal += 1
+
+        # convert pscore to qscore
+        for chrom in self.__data.keys():
+            [pos_array, pscore_array] = self.__data[ chrom ]
+
+            for i in range( len( pos_array ) ):
+                pscore_array[ i ] = pqtable[ pscore_array[ i ] ]
+
+        self.merge_regions()
+        return
+        
+
     def extract_value ( self, bdgTrack2 ):
         """It's like overlie function. THe overlapped regions between
         bdgTrack2 and self, will be recorded. The values from self in
diff --git a/MACS2/IO/cParser.pyx b/MACS2/IO/cParser.pyx
index 13029620..e2edf683 100644
--- a/MACS2/IO/cParser.pyx
+++ b/MACS2/IO/cParser.pyx
@@ -1,4 +1,4 @@
-# Time-stamp: <2014-06-06 10:24:14 Tao Liu>
+# Time-stamp: <2014-07-29 23:55:08 Tao Liu>
 
 """Module for all MACS Parser classes for input.
 
@@ -158,7 +158,7 @@ cdef class GenericParser:
         else:
             self.fhd = io.open( filename, mode='rb' ) # binary mode! I don't expect unicode here!
 
-    def tsize( self ):
+    cpdef int tsize( self ):
         """General function to detect tag size.
 
         * Although it can be used by most parsers, it must be
@@ -189,13 +189,13 @@ cdef class GenericParser:
         self.tag_size = s/n
         return self.tag_size
 
-    def __tlen_parse_line ( self, str thisline ):
+    cdef __tlen_parse_line ( self, str thisline ):
         """Abstract function to detect tag length.
         
         """
         raise NotImplemented
     
-    def build_fwtrack ( self ):
+    cpdef build_fwtrack ( self ):
         """Generic function to build FWTrackIII object. Create a new
         FWTrackIII object. If you want to append new records to an
         existing FWTrackIII object, try append_fwtrack function.
@@ -228,7 +228,7 @@ cdef class GenericParser:
         self.close()
         return fwtrack
 
-    def append_fwtrack ( self, fwtrack ):
+    cpdef append_fwtrack ( self, fwtrack ):
         """Add more records to an existing FWTrackIII object. 
 
         """
@@ -252,9 +252,7 @@ cdef class GenericParser:
         self.close()
         return fwtrack
         
-
-
-    def __fw_parse_line ( self, str thisline ):
+    cdef __fw_parse_line ( self, str thisline ):
         """Abstract function to parse chromosome, 5' end position and
         strand.
         
@@ -264,7 +262,7 @@ cdef class GenericParser:
         cdef int strand = -1
         return ( chromosome, fpos, strand )
 
-    def sniff ( self ):
+    cpdef sniff ( self ):
         """Detect whether this parser is the correct parser for input
         file.
 
@@ -288,7 +286,7 @@ cdef class GenericParser:
                 self.fhd.seek( 0 )
                 return True
             
-    def close ( self ):
+    cpdef close ( self ):
         """Run this when this Parser will be never used.
 
         Close file I/O stream.
@@ -299,7 +297,7 @@ cdef class BEDParser( GenericParser ):
     """File Parser Class for tabular File.
 
     """
-    def __tlen_parse_line ( self, str thisline ):
+    cdef __tlen_parse_line ( self, str thisline ):
         """Parse 5' and 3' position, then calculate tag length.
 
         """
@@ -313,7 +311,7 @@ cdef class BEDParser( GenericParser ):
         thisfields = thisline.split( '\t' )
         return atoi( thisfields[ 2 ] )-atoi( thisfields[ 1 ] )
     
-    def __fw_parse_line ( self, str thisline ):
+    cdef __fw_parse_line ( self, str thisline ):
         #cdef list thisfields
         cdef char * chromname
         
@@ -349,12 +347,11 @@ cdef class BEDParser( GenericParser ):
                      atoi( thisfields[ 1 ] ),
                      0 )
             
-
 cdef class ELANDResultParser( GenericParser ):
     """File Parser Class for tabular File.
 
     """
-    def __tlen_parse_line ( self, str thisline ):
+    cdef __tlen_parse_line ( self, str thisline ):
         """Parse tag sequence, then tag length.
 
         """
@@ -366,7 +363,7 @@ cdef class ELANDResultParser( GenericParser ):
         else:
             return len( thisfields[ 1 ] )
 
-    def __fw_parse_line ( self, str thisline ):
+    cdef __fw_parse_line ( self, str thisline ):
         cdef:
             str chromname, strand
             int thistaglength
@@ -702,30 +699,6 @@ cdef class BAMParser( GenericParser ):
                 return False
 
     cpdef int tsize ( self ):
-        if HAS_PYSAM:
-            return self.__tsize_w_pysam()
-        else:
-            return self.__tsize_wo_pysam()
-
-    cdef int __tsize_w_pysam( self ):
-        cdef:
-            int n = 0                   # successful read of tag size
-            double s = 0                # sum of tag sizes
-
-        if self.tag_size != -1:
-            # if we have already calculated tag size (!= -1),  return it.
-            return self.tag_size
-
-        for n in range( 1, 11, 1 ):
-            try:
-                s += self.fhd.next().qlen
-            except:
-                break
-        self.tag_size = int( s/n )
-        self.fhd.reset()
-        return self.tag_size
-
-    cdef int __tsize_wo_pysam( self ):
         """Get tag size from BAM file -- read l_seq field.
 
         Refer to: http://samtools.sourceforge.net/SAM1.pdf
diff --git a/MACS2/OptValidator.py b/MACS2/OptValidator.py
index 76f76a0a..09da2de2 100644
--- a/MACS2/OptValidator.py
+++ b/MACS2/OptValidator.py
@@ -1,4 +1,4 @@
-# Time-stamp: <2014-06-17 01:39:03 Tao Liu>
+# Time-stamp: <2014-07-30 23:05:15 Tao Liu>
 
 """Module Description
 
@@ -650,3 +650,79 @@ def opt_validate_bdgcmp ( options ):
 
     return options
 
+
+def opt_validate_cmbreps ( options ):
+    """Validate options from a OptParser object.
+
+    Ret: Validated options object.
+    """
+    # logging object
+    logging.basicConfig(level=20,
+                        format='%(levelname)-5s @ %(asctime)s: %(message)s ',
+                        datefmt='%a, %d %b %Y %H:%M:%S',
+                        stream=sys.stderr,
+                        filemode="w"
+                        )
+    
+    options.error   = logging.critical        # function alias
+    options.warn    = logging.warning
+    options.debug   = logging.debug
+    options.info    = logging.info
+
+    # methods should be valid:
+
+
+    if options.method not in [ 'fisher', 'max', 'mean']:
+        logging.error( "Invalid method: %s" % options.method )
+        sys.exit( 1 )
+
+    if len( options.ifile ) != 2:
+        logging.error("Only support two replicates!")
+        sys.exit( 1 )
+
+    # # of -i must == # of -w
+
+    # if not options.weights:
+    #     options.weights = [ 1.0 ] * len( options.ifile )
+
+    # if len( options.ifile ) != len( options.weights ):
+    #     logging.error("Must provide same number of weights as number of input files.")
+    #     sys.exit( 1 )
+
+    # if options.method == "fisher" and len( options.ifile ) > 3:
+    #     logging.error("NOT IMPLEMENTED! Can't combine more than 3 replicates using Fisher's method.")
+    #     sys.exit( 1 )
+
+    return options
+
+
+def opt_validate_bdgopt ( options ):
+    """Validate options from a OptParser object.
+
+    Ret: Validated options object.
+    """
+    # logging object
+    logging.basicConfig(level=20,
+                        format='%(levelname)-5s @ %(asctime)s: %(message)s ',
+                        datefmt='%a, %d %b %Y %H:%M:%S',
+                        stream=sys.stderr,
+                        filemode="w"
+                        )
+    
+    options.error   = logging.critical        # function alias
+    options.warn    = logging.warning
+    options.debug   = logging.debug
+    options.info    = logging.info
+
+    # methods should be valid:
+
+    if options.method.lower() not in [ 'multiply', 'add', 'p2q']:
+        logging.error( "Invalid method: %s" % options.method )
+        sys.exit( 1 )
+
+    if options.method.lower() in [ 'multiply', 'add' ] and not options.extparam:
+        logging.error( "Need EXTRAPARAM for method multiply or add!")
+        sys.exit( 1 )
+
+    return options
+
diff --git a/MACS2/Poisson.pyx b/MACS2/Statistics.pyx
similarity index 99%
rename from MACS2/Poisson.pyx
rename to MACS2/Statistics.pyx
index 937c70ba..bc439111 100644
--- a/MACS2/Poisson.pyx
+++ b/MACS2/Statistics.pyx
@@ -1,4 +1,4 @@
-cdef extern from "cPoisson.h":
+cdef extern from "cStatistics.h":
     double log10_poisson_cdf ( unsigned int n, double lam, short lower )
     double log10_poisson_cdf_P_large_lambda ( unsigned int k, double lbd )
     double log10_poisson_cdf_Q_large_lambda ( unsigned int k, double lbd )
diff --git a/MACS2/bdgcmp.py b/MACS2/bdgcmp.py
index fa64745f..fd5ef87c 100644
--- a/MACS2/bdgcmp.py
+++ b/MACS2/bdgcmp.py
@@ -1,4 +1,4 @@
-# Time-stamp: <2013-10-28 12:17:33 Tao Liu>
+# Time-stamp: <2014-07-30 02:40:36 Tao Liu>
 
 import sys
 import os
diff --git a/MACS2/bdgopt.py b/MACS2/bdgopt.py
new file mode 100644
index 00000000..64bf9998
--- /dev/null
+++ b/MACS2/bdgopt.py
@@ -0,0 +1,71 @@
+# Time-stamp: <2014-07-30 23:06:58 Tao Liu>
+
+"""Description: Modify bedGraph file
+
+Copyright (c) 2014 Tao Liu <tliu4@buffalo.edu>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD License (see the file COPYING included with
+the distribution).
+
+@status:  experimental
+@version: $Revision$
+@author:  Tao Liu
+@contact: tliu4@buffalo.edu
+"""
+
+# ------------------------------------
+# python modules
+# ------------------------------------
+import sys
+import os
+import logging
+from MACS2.IO import cBedGraphIO
+from MACS2.OptValidator import opt_validate_bdgopt as opt_validate
+
+# ------------------------------------
+# constants
+# ------------------------------------
+logging.basicConfig(level=20,
+                    format='%(levelname)-5s @ %(asctime)s: %(message)s ',
+                    datefmt='%a, %d %b %Y %H:%M:%S',
+                    stream=sys.stderr,
+                    filemode="w"
+                    )
+
+# ------------------------------------
+# Misc functions
+# ------------------------------------
+error   = logging.critical		# function alias
+warn    = logging.warning
+debug   = logging.debug
+info    = logging.info
+# ------------------------------------
+# Classes
+# ------------------------------------
+
+# ------------------------------------
+# Main function
+# ------------------------------------
+def run( options ):
+    options = opt_validate( options )
+    info("Read and build bedGraph...")
+    bio = cBedGraphIO.bedGraphIO(options.ifile)
+    btrack = bio.build_bdgtrack(baseline_value=0)
+
+    info("Modify bedGraph...")
+    if options.method.lower() == "multiply":
+        btrack.apply_func( lambda x: x * options.extraparam)
+    elif options.method.lower() == "add":
+        btrack.apply_func( lambda x: x + options.extraparam)
+    elif options.method.lower() == "p2q":
+        btrack.p2q()
+        
+    ofile = os.path.join( options.outdir, options.ofile )
+    info("Write bedGraph of modified scores...")
+    ofhd = open(ofile,"wb")
+    btrack.write_bedGraph(ofhd,name="%s_modified_scores" % (options.method.upper()),description="Scores calculated by %s" % (options.method.upper()))
+    info("Finished '%s'! Please check '%s'!" % (options.method, ofile))
+
+
+
diff --git a/MACS2/cPoisson.h b/MACS2/cPoisson.h
deleted file mode 100644
index 1a2acb4d..00000000
--- a/MACS2/cPoisson.h
+++ /dev/null
@@ -1,4 +0,0 @@
-double log10_poisson_cdf ( unsigned int n, double lam, short lower );
-double log10_poisson_cdf_P_large_lambda ( unsigned int k, double lbd );
-double log10_poisson_cdf_Q_large_lambda ( unsigned int k, double lbd );
-
diff --git a/MACS2/cPoisson.c b/MACS2/cStatistics.c
similarity index 73%
rename from MACS2/cPoisson.c
rename to MACS2/cStatistics.c
index d5e48cef..eb7892e0 100644
--- a/MACS2/cPoisson.c
+++ b/MACS2/cStatistics.c
@@ -1,13 +1,57 @@
-//Time-stamp: <2013-09-15 21:56:36 Tao Liu>
+//Time-stamp: <2014-07-29 17:01:45 Tao Liu>
 
 #include <math.h>
 
 double log10_poisson_cdf ( unsigned int n, double lam, short lower );
 double log10_poisson_cdf_P_large_lambda ( unsigned int k, double lbd );
 double log10_poisson_cdf_Q_large_lambda ( unsigned int k, double lbd );
+double chi2_k1_cdf ( double x );
+double chi2_k2_cdf ( double x );
+double chi2_k4_cdf ( double x );
+double log10_chi2_k1_cdf ( double x );
+double log10_chi2_k2_cdf ( double x );
+double log10_chi2_k4_cdf ( double x );
 
 #define max(x, y) ((x)>(y)?(x):(y))
 #define logspace_add(x, y) ((x)>(y)?(x):(y)) + log1p ( exp( -fabs(x - y) ) )
+#define LOG10_E 0.43429448190325176
+
+double chi2_k1_cdf ( double x )
+{
+  /*CDF for chi-square distribution with degree of freedom 1.*/
+  return erf( sqrt(x/2) );
+}
+
+double log10_chi2_k1_cdf ( double x )
+{
+  /*log10 CDF for chi-square distribution with degree of freedom 1.*/
+  return log10( erf( sqrt(x/2) ) );
+}
+
+double chi2_k2_cdf ( double x )
+{
+  /*CDF for chi-square distribution with degree of freedom 2.*/
+  return 1 - exp( -x/2 );
+}
+
+double log10_chi2_k2_cdf ( double x )
+{
+  /*log10 CDF for chi-square distribution with degree of freedom 2.*/
+  return log1p( - exp( -x/2 ) ) * LOG10_E;
+}
+
+double chi2_k4_cdf ( double x )
+{
+  /*CDF for chi-square distribution with degree of freedom 4.*/
+  return 1 - exp( -x/2 ) * ( 1 + x/2 );
+}
+
+double log10_chi2_k4_CDF ( double x )
+{
+  /*log10 CDF for chi-square distribution with degree of freedom 4.*/
+  return log1p( - exp( -x/2 ) * ( 1 + x/2 ) ) * LOG10_E;
+}
+
 
 double log10_poisson_cdf ( unsigned int n, double lam, short lower )
 {
diff --git a/MACS2/cStatistics.h b/MACS2/cStatistics.h
new file mode 100644
index 00000000..b87920d5
--- /dev/null
+++ b/MACS2/cStatistics.h
@@ -0,0 +1,9 @@
+double log10_poisson_cdf ( unsigned int n, double lam, short lower );
+double log10_poisson_cdf_P_large_lambda ( unsigned int k, double lbd );
+double log10_poisson_cdf_Q_large_lambda ( unsigned int k, double lbd );
+double chi2_k1_cdf ( double x );
+double chi2_k2_cdf ( double x );
+double chi2_k4_cdf ( double x );
+double log10_chi2_k1_cdf ( double x );
+double log10_chi2_k2_cdf ( double x );
+double log10_chi2_k4_cdf ( double x );
diff --git a/MACS2/cmbreps.py b/MACS2/cmbreps.py
new file mode 100644
index 00000000..7c86c8c4
--- /dev/null
+++ b/MACS2/cmbreps.py
@@ -0,0 +1,54 @@
+# Time-stamp: <2014-07-30 15:33:55 Tao Liu>
+
+import sys
+import os
+import logging
+
+from MACS2.IO import cBedGraphIO
+from MACS2.OptValidator import opt_validate_cmbreps as opt_validate
+
+from math import log as mlog
+
+# ------------------------------------
+# constants
+# ------------------------------------
+logging.basicConfig(level=20,
+                    format='%(levelname)-5s @ %(asctime)s: %(message)s ',
+                    datefmt='%a, %d %b %Y %H:%M:%S',
+                    stream=sys.stderr,
+                    filemode="w"
+                    )
+
+# ------------------------------------
+# Misc functions
+# ------------------------------------
+error   = logging.critical		# function alias
+warn    = logging.warning
+debug   = logging.debug
+info    = logging.info
+# ------------------------------------
+# Main function
+# ------------------------------------
+
+def run( options ):
+    options = opt_validate( options )
+    #weights = options.weights
+
+    info("Read and build bedGraph for each replicate...")
+    reps = []
+    i = 1
+    for ifile in options.ifile:
+        info("Read file #%d" % i)
+        reps.append( cBedGraphIO.bedGraphIO( ifile ).build_bdgtrack( ) )
+        i += 1
+
+    # first two reps
+
+    info("combining #1 and #2 with method '%s'" % options.method)
+    cmbtrack = reps[ 0 ].overlie( reps[ 1 ], func=options.method )
+    ofile = os.path.join( options.outdir, options.ofile )
+    info("Write bedGraph of combined scores...")
+    ofhd = open(ofile,"wb")
+    cmbtrack.write_bedGraph(ofhd,name="%s_combined_scores" % (options.method.upper()),description="Scores calculated by %s" % (options.method.upper()))
+    info("Finished '%s'! Please check '%s'!" % (options.method, ofile))
+    
diff --git a/bin/macs2 b/bin/macs2
index aa188501..4b937112 100644
--- a/bin/macs2
+++ b/bin/macs2
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Time-stamp: <2014-06-17 11:50:19 Tao Liu>
+# Time-stamp: <2014-07-30 23:14:39 Tao Liu>
 
 """Description: MACS v2 main executable.
 
@@ -70,6 +70,14 @@ def main():
         # compare treatment and control to make enrichment scores
         from MACS2.bdgcmp import run
         run( args )
+    elif subcommand == "bdgopt":
+        # operations on the score column of bedGraph file
+        from MACS2.bdgopt import run
+        run( args )        
+    elif subcommand == "cmbreps":
+        # combine replicates
+        from MACS2.cmbreps import run
+        run( args )
     elif subcommand == "randsample":
         # randomly sample sequencing reads, and save as bed file
         from MACS2.randsample import run
@@ -123,6 +131,12 @@ def prepare_argparser ():
     
     # command for 'bdgcmp'
     add_bdgcmp_parser( subparsers )
+
+    # command for 'bdgopt'
+    add_bdgopt_parser( subparsers )
+    
+    # command for 'cmbreps'
+    add_cmbreps_parser( subparsers )    
     
     # command for 'bdgdiff'
     add_bdgdiff_parser( subparsers )
@@ -163,9 +177,9 @@ def add_callpeak_parser( subparsers ):
     # group for input files
     group_input = argparser_callpeak.add_argument_group( "Input files arguments" )
     group_input.add_argument( "-t", "--treatment", dest = "tfile", type = str, required = True, nargs = "+",
-                              help = "ChIP-seq treatment file. If multiple files are given as '-t A B C', then they will all be read and combined. REQUIRED." )
+                              help = "ChIP-seq treatment file. If multiple files are given as '-t A B C', then they will all be read and treated as replicates. MACS2 uses Liptak's method with weights as REQUIRED." )
     group_input.add_argument( "-c", "--control", dest = "cfile", type = str, nargs = "*",
-                                    help = "Control file. If multiple files are given as '-c A B C', then they will all be read and combined.")
+                                    help = "Control file. If multiple files are given as '-c A B C', they won't be regarded as replicates, instead they will be pooled to estimate ChIP-seq background noise.")
     group_input.add_argument( "-f", "--format", dest = "format", type = str,
                               choices = ("AUTO", "BAM", "SAM", "BED", "ELAND",
                                          "ELANDMULTI", "ELANDEXPORT", "BOWTIE",
@@ -418,7 +432,41 @@ def add_bdgcmp_parser( subparsers ):
     output_group.add_argument( "-o", "--ofile", dest = "ofile", type = str, nargs = "+",
                                help = "Output filename. Mutually exclusive with --o-prefix. The number and the order of arguments for --ofile must be the same as for -m." )
     return
-    
+
+def add_bdgopt_parser( subparsers ):
+    """Add function 'operations on score column of bedGraph' argument parsers.
+    """    
+    argparser_bdgopt = subparsers.add_parser( "bdgopt",
+                                              help = "Operations on score column of bedGraph file." )
+    argparser_bdgopt.add_argument( "-i", "--ifile", dest = "ifile", type = str, required = True,
+                                   help = "MACS score in bedGraph. Note: this must be a bedGraph file covering the ENTIRE genome. REQUIRED" )
+    argparser_bdgopt.add_argument( "-m", "--method", dest = "method", type = str,
+                                   choices = ( "multiply", "add", "p2q" ),
+                                   help = "Method to modify the score column of bedGraph file. Available choices are: multiply, add or p2q. 1) multiply, the EXTRAPARAM is required and will be multiplied to the score column. If you intend to divide the score column by X, use value of 1/X as EXTRAPARAM. 2) add, the EXTRAPARAM is required and will be added to the score column. If you intend to subtract the score column by X, use value of -X as EXTRAPARAM. 3) p2q, this will convert p-value scores to q-value scores using Benjamini-Hochberg process. The EXTRAPARAM is not required. This method assumes the scores are -log10 p-value from MACS2. Any other types of score will cause unexpected errors.", default="p2q")
+    argparser_bdgopt.add_argument( "-p", "--extra-param", dest = "extraparam", type = float,
+                                   help = "The extra parameter for METHOD. Check the detail of -m option.")
+    add_outdir_option( argparser_bdgopt )
+    argparser_bdgopt.add_argument( "-o", "--ofile", dest = "ofile", type = str,
+                                   help = "Output BEDGraph filename.", required = True )
+    return
+
+def add_cmbreps_parser( subparsers ):
+    """Add function 'combine replicates' argument parsers.
+    """
+    argparser_cmbreps = subparsers.add_parser( "cmbreps",
+                                               help = "Combine BEDGraphs of scores from replicates." )
+    argparser_cmbreps.add_argument( "-i", dest = "ifile", type = str, required = True, nargs = "+",
+                                    help = "MACS score in bedGraph for each replicate. Require exactly two files such as '-i A B'. REQUIRED" )
+    # argparser_cmbreps.add_argument( "-w", dest = "weights", type = float, nargs = "*",
+    #                                 help = "Weight for each replicate. Default is 1.0 for each. When given, require same number of parameters as IFILE." )    
+    argparser_cmbreps.add_argument( "-m", "--method", dest = "method", type = str,
+                                    choices = ( "fisher", "max", "mean" ),
+                                    help = "Method to use while combining scores from replicates. 1) fisher: Fisher's combined probability test. It requires scores in ppois form (-log10 pvalues) from bdgcmp. Other types of scores for this method may cause cmbreps unexpected errors. 2) max: take the maximum value from replicates for each genomic position. 3) mean: take the average value. Note, except for Fisher's method, max or mean will take scores AS IS which means they won't convert scores from log scale to linear scale or vice versa.", default="fisher")
+    add_outdir_option( argparser_cmbreps ) 
+    argparser_cmbreps.add_argument( "-o", "--ofile", dest = "ofile", type = str, required = True,
+                                    help = "Output BEDGraph filename for combined scores." )
+    return
+
 def add_randsample_parser( subparsers ):
     argparser_randsample = subparsers.add_parser( "randsample",
                                                   help = "Randomly sample number/percentage of total reads." )
diff --git a/setup.py b/setup.py
index 3044be41..ccbaae06 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Time-stamp: <2014-06-17 15:08:15 Tao Liu>
+# Time-stamp: <2014-07-29 17:04:00 Tao Liu>
 
 """Description
 
@@ -53,7 +53,7 @@ def main():
                    #Extension("MACS2.IO.cDiffScore", ["MACS2/IO/cDiffScore.c"], include_dirs=numpy_include_dir ),                   
                    Extension("MACS2.IO.cCallPeakUnit", ["MACS2/IO/cCallPeakUnit.c"], include_dirs=numpy_include_dir),
                    Extension("MACS2.hashtable", ["MACS2/hashtable.c"], include_dirs=["MACS2/",numpy_get_include()]),
-                   Extension("MACS2.Poisson", ["MACS2/Poisson.c", "MACS2/cPoisson.c"], libraries=["m"], include_dirs=["MACS2/",numpy_get_include()]),
+                   Extension("MACS2.Statistics", ["MACS2/Statistics.c", "MACS2/cStatistics.c"], libraries=["m"], include_dirs=["MACS2/",numpy_get_include()]),
                    ]
     
     setup(name="MACS2",
diff --git a/setup_w_cython.py b/setup_w_cython.py
index 565f370d..ef8b3c71 100644
--- a/setup_w_cython.py
+++ b/setup_w_cython.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Time-stamp: <2014-06-17 15:11:17 Tao Liu>
+# Time-stamp: <2014-07-29 17:03:44 Tao Liu>
 
 """Description: 
 
@@ -61,7 +61,7 @@ def main():
                        #Extension("MACS2.IO.cDiffScore", ["MACS2/IO/cDiffScore.pyx"], include_dirs=numpy_include_dir ),                      
                        Extension("MACS2.IO.cCallPeakUnit", ["MACS2/IO/cCallPeakUnit.pyx"], include_dirs=numpy_include_dir),
                        Extension("MACS2.hashtable", ["MACS2/hashtable.pyx"], include_dirs=["MACS2/",numpy_get_include()]),
-                       Extension("MACS2.Poisson", ["MACS2/Poisson.pyx", "MACS2/cPoisson.c"], libraries=["m"], include_dirs=["MACS2/",numpy_get_include()]),
+                       Extension("MACS2.Statistics", ["MACS2/Statistics.pyx", "MACS2/cStatistics.c"], libraries=["m"], include_dirs=["MACS2/",numpy_get_include()]),
                        ]
     else:
         ext_modules = [Extension("MACS2.cProb", ["MACS2/cProb.c"], libraries=["m"], include_dirs=numpy_include_dir ),
@@ -79,8 +79,8 @@ def main():
                        Extension("MACS2.IO.cScoreTrack", ["MACS2/IO/cScoreTrack.c"], include_dirs=numpy_include_dir),
                        #Extension("MACS2.IO.cDiffScore", ["MACS2/IO/cDiffScore.c"], include_dirs=numpy_include_dir ),                       
                        Extension("MACS2.IO.cCallPeakUnit", ["MACS2/IO/cCallPeakUnit.c"], include_dirs=numpy_include_dir),
-                       Extension("MACS2.hashtable", ["MACS2/hashtable.c"], include_dirs=["MACS2/",numpy_get_include()]), 
-                       Extension("MACS2.Poisson", ["MACS2/Poisson.c", "MACS2/cPoisson.c"], libraries=["m"], include_dirs=["MACS2/",numpy_get_include()]),
+                       Extension("MACS2.hashtable", ["MACS2/hashtable.c"], include_dirs=["MACS2/",numpy_get_include()]),
+                       Extension("MACS2.Statistics", ["MACS2/Statistics.c", "MACS2/cStatistics.c"], libraries=["m"], include_dirs=["MACS2/",numpy_get_include()]),
                        ]
 
     setup(name="MACS2",