-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathh3tbl2pseudo_json.pl
executable file
·98 lines (76 loc) · 2.64 KB
/
h3tbl2pseudo_json.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/perl
# Author: Luke Ulrich
# Synopsis: Convert HMMER3 results into pseudo JSON where each line consists of the results of
# the sequence id, a tab character, and then the JSON encoded coils for this
# sequence.
#
# The JSON encoded results consist of an array of arrays.
$| = 1;
use strict;
use warnings;
use FindBin '$Bin';
use lib "$Bin/lib";
use Common;
use Getopt::Long;
my $usage = <<"USAGE";
Usage: $0 [options] <HMMER3 dom table file> <fasta source file>
Available options
-----------------
-e, --error-file = string : file name to write any errors to.
USAGE
# Globals ---------------------------------------
my $g_Help;
my $g_ErrFile;
GetOptions("h|help", \$g_Help,
"e|error-file=s", \$g_ErrFile);
die $usage if ($g_Help);
my $g_DomTblFile = shift or die $usage;
my $g_FastaSourceFile = shift or die $usage;
print STDERR qq(Reading ids...);
my @ids = @{ &Common::readFastaIds($g_FastaSourceFile, $g_ErrFile) };
print STDERR qq(done\n);
my %results = ();
if (!open(IN, "< $g_DomTblFile")) {
&Common::writeErrorAndDie("Unable to open file, $g_DomTblFile: $!\n", $g_ErrFile);
}
while (<IN>) {
next if (/^#/);
chomp;
my @values = split(/\s+/);
if (@values != 23) {
&Common::writeErrorAndDie("Line is missing expected 23 values: $_\n", $g_ErrFile);
}
my $id = $values[0];
my $seq_len = int($values[2]);
my $domain_name = $values[3];
my $profile_len = int($values[5]);
my $c_evalue = $values[11] + 0;
my $i_evalue = $values[12] + 0;
my $dom_score = $values[13] + 0;
my $dom_bias = $values[14] + 0;
my $hmm_start = int($values[15]);
my $hmm_stop = int($values[16]);
my $ali_start = int($values[17]);
my $ali_stop = int($values[18]);
my $env_start = int($values[19]);
my $env_stop = int($values[20]);
my $acc = $values[21] + 0;
my $ali_extent = ($ali_start > 1) ? '.' : '[';
$ali_extent .= ($ali_stop < $seq_len) ? '.' : ']';
my $hmm_extent = ($hmm_start > 1) ? '.' : '[';
$hmm_extent .= ($hmm_stop < $profile_len) ? '.' : ']';
my $env_extent = ($env_start > 1) ? '.' : '[';
$env_extent .= ($env_stop < $seq_len) ? '.' : ']';
my @data = ($domain_name, $ali_start, $ali_stop, $ali_extent,
$dom_bias, $hmm_start, $hmm_stop, $hmm_extent,
$env_start, $env_stop, $env_extent,
$dom_score, $c_evalue, $i_evalue, $acc);
push @{$results{$id}}, \@data;
}
foreach my $id (@ids) {
my @hits = ();
if ($results{$id}) {
@hits = sort { $a->[13] <=> $b->[13] || $a->[1] <=> $b->[1] || $a->[0] cmp $b->[0] } @{ $results{$id} };
}
&Common::printJson($id, \@hits);
}