-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathstep_1_mapping_atom.rb
executable file
·134 lines (110 loc) · 4.02 KB
/
step_1_mapping_atom.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
## atomic mapping wrapper for Illumina/Solexa reads
# paired or single-end reads
# batch size: single slide, <100M reads.
### author: Yufeng Shen, c2b2, Columbia
### Note: we probably want to change some bwa parameters:
## number of allowed gaps in "aln" step: -o 2 (default is 1)
## Parameter for read trimming in "aln" step: -q 5 (default is 0), it trimmed from the 3' end
## until most bases have quality > INT. argmax_x{\sum_{i=x+1}^l(INT-q_i)}
require 'getoptlong'
def main
opts = GetoptLong.new(
["--reference", "-r", GetoptLong::OPTIONAL_ARGUMENT],
["--input", "-i", GetoptLong::REQUIRED_ARGUMENT],
["--pair", "-p", GetoptLong::OPTIONAL_ARGUMENT],
["--help", "-h", GetoptLong::NO_ARGUMENT],
["--gaps", "-g", GetoptLong::OPTIONAL_ARGUMENT],
["--qualtrim", "-q", GetoptLong::OPTIONAL_ARGUMENT],
["--bwa", "-b", GetoptLong::OPTIONAL_ARGUMENT],
["--platform", "-t", GetoptLong::OPTIONAL_ARGUMENT],
["--delimiter", "-d", GetoptLong::OPTIONAL_ARGUMENT]
)
optHash = {}
opts.each do |opt, arg|
optHash[opt] = arg
end
if optHash.key?("--help")
$stderr.puts "Usage: ruby __.rb -i foo.1.fastq [-p foo.2.fastq] [-r reference.fasta] [-q quality_trim] [-g maxgaps]"
$stderr.puts "Note: mapping paired reads requires -p argument."
exit
end
if optHash.key?("--reference")
ref = optHash["--reference"]
else
ref = "/ifs/data/c2b2/ip_lab/shares/DATA/Sequencing/resources/human_g1k_v37.fasta"
end
f1 = optHash["--input"]
if optHash.key?("--pair")
f2 = optHash["--pair"]
else
f2 = nil
end
if optHash.key?("--bwa")
bwa = File.expand_path(optHash["--bwa"])
else
# bwa = "/ifs/data/c2b2/ip_lab/shares/SOFTWARE/bwa_titan/bwa"
bwa = "/ifs/scratch/c2b2/ip_lab/aps2157/ExomePipe/bwa/bwa-0.5.8c/bwa"
end
## todo: optionally provide samtools location through command line.
samtools = "/ifs/home/c2b2/ip_lab/yshen/usr/bin/samtools"
## todo: provide more options of bwa for fine-tuning.
threads = 2 ## number of threads used by bwa
options = "" # other bwa options
platform = "ILLUMINA"
if optHash.key?("--platform")
maxgaps = optHash["--platform"].to_i
end
delimiter = '_'
if optHash.key?("--delimiter")
maxgaps = optHash["--delimiter"].to_i
end
maxgaps = 2
if optHash.key?("--gaps")
maxgaps = optHash["--gaps"].to_i
end
qualtrim = 5
if optHash.key?("-qualtrim")
qualtrim = optHash["--qualtrim"].to_i
end
######### align step
cmd="#{bwa} aln -q #{qualtrim} -o #{maxgaps} -t #{threads} #{options} #{ref} #{f1} > #{f1}.sai"
system(cmd)
if f2 != nil # paired
cmd="#{bwa} aln -t #{threads} #{options} #{ref} #{f2} > #{f2}.sai"
system(cmd)
#### map
output = File.dirname(File.expand_path(f1)) + "/" + getPrefix(File.basename(f1)) + ".aligned.bam"
basename = File.basename(output)
name_array = basename.split(delimiter)
name = name_array[0] + "_" + name_array[1]
cmd = "#{bwa} sampe -i ID_PairedSample_#{name} -m SM_PairedSample_#{name} -l LIB_PairedSample_#{name} -p #{platform} #{ref} #{f1}.sai #{f2}.sai #{f1} #{f2} | #{samtools} view -bS -o #{output} - "
system(cmd)
## sort and index the BAM file
cmd= "#{samtools} sort #{output} #{output}.sorted"
system(cmd)
cmd = "#{samtools} index #{output}.sorted.bam"
system(cmd)
else # single reads
output = f1 + ".bam"
basename = File.basename(output)
name_array = basename.split(delimiter)
name = name_array[0]
cmd = "#{bwa} samse -i ID_SingleSample_#{name} -m SM_Single_Sample_#{name} -l LIB_Single_Sample_#{name} -p #{platform} #{ref} #{f1}.sai #{f1} | #{samtools} view -bS -o #{output} - "
system(cmd)
# We need to make a function for this.
cmd= "#{samtools} sort #{output} #{output}.sorted"
system(cmd)
cmd = "#{samtools} index #{output}.sorted.bam"
system(cmd)
system("rm -rf #{output}")
end
end
def sortAndIndex(bam)
#
end
def getPrefix(fname)
return fname
# $stderr.puts fname
# return fname.match(/^(.*)[\.|\-]1[\.|\-](.*)$/)[1]
end
main()