-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathresync
157 lines (132 loc) · 2.9 KB
/
resync
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/perl -w
=head1 NAME
resync
=head1 SYNOPSIS
resync --fwd=FILE --rev=FILE --write_singles
Takes two files and outputs files where only sequences with headers present in both files remain.
--fwd, --rev: fasta or fastq files
--write_singles: single reads will be written to a separate file
--help: This info.
=head1 AUTHOR
=cut
use warnings;
use strict;
use Getopt::Long;
use Pod::Usage;
my $help = 0;
my $verbose = 0;
my $fwdfile;
my $revfile;
my $singles;
GetOptions(
"help!" => \$help,
"verbose!" => \$verbose,
"write_singles" => \$singles,
"fwd=s" => \$fwdfile,
"rev=s" => \$revfile
);
pod2usage(0) if $help;
pod2usage(-msg => "Need a forward file", -exitval => 1) unless $fwdfile;
pod2usage(-msg => "Forward file must have extension fasta, fastq, fa or fq") unless ($fwdfile=~/\.f(ast)?[aq]$/);
open FWD, $fwdfile or die "Can't open $fwdfile: $!";
pod2usage(-msg => "Need a reverse file", -exitval => 1) unless $revfile;
pod2usage(-msg => "Reverse file must have extension fasta, fastq, fa or fq") unless ($revfile=~/\.f(ast)?[aq]$/);
open REV, $revfile or die "Can't open $revfile: $!";
my $max;
if ($fwdfile=~/q$/ and $revfile=~/q$/){
$max = 5;
print "\nFiles are fastq\n";
}
elsif($fwdfile=~/a$/ and $revfile=~/a$/){
$max = 3;
print "\nFiles are fasta\n";
}
else{
die "Please provide forward and reverse files of same format!";
}
my %headers;
my $count=1;
while (my $line = <FWD>){
if ($count == 1){
$line=~/^(\S+)/;
$headers{$1}=1;
$count++;
}
else{
$count++;
$count = 1 if ($count == $max);
}
}
close FWD;
$revfile=~/(\S+)\.\w+$/;
my $root=$1;
if ($max==5){
open REV_OUT, ">$root.sync.fastq";
open SINGLES, ">$root.singles.fastq" if $singles;
}
else{
open REV_OUT, ">$root.sync.fasta";
open SINGLES, ">$root.singles.fasta" if $singles;
}
$count=1;
my $paired = 'no';
while (my $line = <REV>){
if ($count == 1){
$line=~/^(\S+)/;
if (exists ($headers{$1})){
delete $headers{$1};
$paired = 'yes';
print REV_OUT $line;
}
elsif($singles){
print SINGLES $line;
}
$count++;
}
else{
print REV_OUT $line if ($paired eq 'yes');
print SINGLES $line if ($paired eq 'no' and $singles);
$count++;
if ($count == $max){
$count = 1;
$paired= 'no';
}
}
}
close REV;
close REV_OUT;
open FWD, $fwdfile;
$fwdfile=~/(\S+)\.\w+$/;
$root=$1;
if ($max==5){
open FWD_OUT, ">$root.sync.fastq";
}
else{
open FWD_OUT, ">$root.sync.fasta";
}
while (my $line = <FWD>){
if ($count == 1){
$line=~/^(\S+)/;
if (!exists ($headers{$1})){
$paired = 'yes';
print FWD_OUT $line;
}
elsif($singles){
print SINGLES $line;
}
$count++;
}
else{
print FWD_OUT $line if ($paired eq 'yes');
print SINGLES $line if ($paired eq 'no' and $singles);
$count++;
if ($count == $max){
$count = 1;
$paired = 'no';
}
}
}
close FWD;
close FWD_OUT;
close SINGLES if $singles;