From ea2b3a1bff15857fe7904a68f52c8eaae3c0bef1 Mon Sep 17 00:00:00 2001 From: Tao Liu Date: Tue, 10 Jan 2012 16:59:26 -0500 Subject: [PATCH] Add a script to convert MA2C wig to bedGraph and fix overlapping problem in wig at the same time. --- README | 8 +++++ Scripts/ma2cWigToBedGraph | 64 +++++++++++++++++++++++++++++++++++++++ setup.py | 3 +- 3 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 Scripts/ma2cWigToBedGraph diff --git a/README b/README index 583b3a2..609e972 100644 --- a/README +++ b/README @@ -1,3 +1,11 @@ +* Attention + +This my personal code repository. I haven't tested them fully. So I can't guarantee they works... + + +* Ancient README + + * WebApp list: 1. CistromeDB diff --git a/Scripts/ma2cWigToBedGraph b/Scripts/ma2cWigToBedGraph new file mode 100644 index 0000000..b1f7988 --- /dev/null +++ b/Scripts/ma2cWigToBedGraph @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# Time-stamp: <2012-01-10 16:52:01 Tao Liu> + +# in the wiggle file from MA2C, the 1st column -- position is actually +# the center of each probe. Some probes may overlap. In order to +# convert them to appropriate bigwig, I need to create bedGraph file +# with above two problems fixed. + +import os +import sys +# ------------------------------------ +# Main function +# ------------------------------------ +def main(): + if len(sys.argv) < 2: + sys.stderr.write("need 1 paras: %s \n\n**Make sure your variableStep wig file is from MA2C!\n**In the output, gaps will be filled with 0 and if two regions are overlapped, the later value will be used.\n" % sys.argv[0]) + sys.exit(1) + + #max_span = int(sys.argv[1]) + wigfile = open(sys.argv[1],"r") + + #fs = bdgfile.readline().rstrip().split() + #prev_region = (fs[0],int(fs[1]),int(fs[2]),float(fs[3])) + prev_region = None + + + for l in wigfile: + if l.startswith("track"): + continue + if l.startswith("variableStep"): + chrom = l.rstrip().split()[1].split("=")[1] + span = int(l.rstrip().split()[2].split("=")[1]) + continue + (pos,value) = l.rstrip().split() + startpos = int(pos) - int(span/2) + endpos = startpos + span + curr_region = (chrom,startpos,endpos,float(value)) + + if not prev_region: + prev_region = curr_region + continue + + if prev_region[0] == curr_region[0]: + # only if they are in the same chromosome + right_pos_of_prev_region = min(curr_region[1],prev_region[1]+span) + if right_pos_of_prev_region > prev_region[1]: + print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1],right_pos_of_prev_region,prev_region[3]) + # fill in gaps with 0 + if curr_region[1] > prev_region[1]+span: + print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1]+span,curr_region[1],0) + else: + right_pos_of_prev_region = prev_region[1]+span + if right_pos_of_prev_region > prev_region[1]: + print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1],right_pos_of_prev_region,prev_region[3]) + prev_region = curr_region + + # last region + right_pos_of_prev_region = prev_region[1]+span + if right_pos_of_prev_region > prev_region[1]: + print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1],right_pos_of_prev_region,prev_region[3]) + + +if __name__ == '__main__': + main() diff --git a/setup.py b/setup.py index 581f477..1e82487 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,8 @@ def main(): 'Scripts/norm.py', 'Scripts/cutoff.py', 'Scripts/ChIP-seq_Pipeline1.py', - 'Scripts/convert_gene_ids.py', + 'Scripts/convert_gene_ids.py', + 'Scripts/ma2cWigToBedGraph', # 'Scripts/hmm_conception.py', ],