Skip to content

Commit

Permalink
+ [Jellyfish] compatibility upgrade to jellyfish-2.1.3++, which emplo…
Browse files Browse the repository at this point in the history
…ys an interactive (-i) as well as a file based query. Upgrade includes new tests and test data
  • Loading branch information
Thomas Hackl committed Jul 22, 2014
1 parent 1857408 commit 0d29447
Show file tree
Hide file tree
Showing 7 changed files with 137 additions and 65 deletions.
126 changes: 83 additions & 43 deletions lib/Jellyfish.pm
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ use Log::Log4perl qw(:no_extra_logdie_message);

use IPC::Run qw(harness pump finish start);

our $VERSION = '0.04';
our $VERSION = '1.00';

#-----------------------------------------------------------------------------#
# Globals
Expand Down Expand Up @@ -285,7 +285,8 @@ Kmers can be provided either as STRING, STRING reference or ARRAY reference.
$kmers;
# '0
# 1'
# 1
# '
my $kmers = $jf->query(
['--both-strands', 'path/to/jf_kmer_hash'],
Expand All @@ -295,7 +296,8 @@ Kmers can be provided either as STRING, STRING reference or ARRAY reference.
$kmers;
# 'ATTA 0
# TATT 1'
# TATT 1
# '
my @kmers = $jf->query(
['--both-strands', 'path/to/jf_kmer_hash'],
Expand All @@ -319,41 +321,65 @@ Kmers can be provided either as STRING, STRING reference or ARRAY reference.
=cut

sub query{
my $cmd = 'query';
my $self = shift;
my $opt = @_%2 ? shift : [];
my %p = (
table => 1,
kmers => '',
@_
my $cmd = 'query';
my $self = shift;
my $opt = @_%2 ? shift : [];
my %p = (
table => 1,
kmers => '',
@_
);

# short-cut options like --help
unless ($p{kmers}){
return $self->run([$cmd, @$opt]);
}

# non-interative options like --help or --sequence
unless ($p{kmers}){
my $re = $self->run([$cmd, @$opt]);
return $re unless $re; # short-cut empty output
return $re if $re =~ /^Usage/; # short-cut help or error

# process and return result
if(wantarray){
my @re = split(/\s/, $re);
chomp @re;
if ($p{table}){
return @re;
}else{
my $i=0;
return grep{$i++ % 2}@re;
}
}elsif(! $p{table}){
chomp $re;
my @re = split(/\s/, $re);
chomp @re;
# every other element
my $i=0;
return join("\n", grep{$i++ % 2}@re )."\n";
}else{
return $re;
}

# interactive query
}else{

# handle kmer inputs
my $kmers;
if(! ref $p{kmers}){
# make sure there is trailing "\n"
chomp $p{kmers};
$p{kmers}.="\n";
$kmers = \$p{kmers};
# make sure there is trailing "\n"
chomp $p{kmers};
$p{kmers}.="\n";
$kmers = \$p{kmers};
}elsif(ref $p{kmers} eq 'ARRAY'){
$p{kmers} = join("\n", @{$p{kmers}});
$p{kmers}.="\n";
$kmers = \$p{kmers};
$p{kmers} = join("\n", @{$p{kmers}});
$p{kmers}.="\n";
$kmers = \$p{kmers};
}elsif(ref $p{kmers} eq 'SCALAR'){
$kmers = $p{kmers};
$kmers = $p{kmers};
}else{
die 'kmers neither STRING nor SCALAR ref nor ARRAY ref'
die 'kmers neither STRING nor SCALAR ref nor ARRAY ref'
}


# DEPREACTED: $self->run([$cmd, @$opt], $kmers, \$re);

# add -i flag unless already specified in cmd
unshift (@$opt, "-i") unless grep{$_ eq "-i"}@$opt;

# compute a "id" from $@opt which will be the same as long as the same
# hash is queried with identical options
my $id = join("", @$opt);
Expand All @@ -363,27 +389,42 @@ sub query{

# query kmers
my $re = $self->_query_run($kmers);


return $$re unless $$re; # short-cut --output to file or no hits

# -i does not return kmers, only counts

# process and return result
if(wantarray){
my @re = split(/\s/, $$re);
chomp @re;
if ($p{table}){
return @re;
}else{
my $i=0;
return grep{$i++ % 2}@re;
chomp($$re);
my @re = split(/\s/, $$re);
if ($p{table}){
chomp($$kmers);
my @kmers = split("\n", $$kmers);
my @res;
for(my $i=0; $i<@kmers; $i++){
push @res, $kmers[$i], $re[$i];
}
return @res;
}else{
return @re;
}
}elsif(! $p{table}){
chomp $$re;
my @re = split(/\s/, $$re);
chomp @re;
# every other element
my $i=0;
return join("\n", grep{$i++ % 2}@re )."\n"
return $$re;
}else{
return $$re;
chomp($$kmers);
chomp($$re);
my @kmers = split("\n", $$kmers);
my @re = split(/\s/, $$re);
my $res = '';
for(my $i=0; $i<@kmers; $i++){
$res.=$kmers[$i]." ".$re[$i]."\n";
}
return $res;
}

}

}

=head2 dump
Expand Down Expand Up @@ -486,7 +527,6 @@ sub _query_init_interface{

sub _query_run{
my ($self, $kmers) = @_;
my $kmer_count = $$kmers =~ tr/\n//;
$self->{_query}{i} = $$kmers;
$self->{_query}{harness}->run;
die $self->{_query}{e} if $self->{_query}{e};
Expand Down
2 changes: 2 additions & 0 deletions t/01jellyfish.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>seq
ATTATATATCGACTAGCC
38 changes: 23 additions & 15 deletions t/01jellyfish.dmp
Original file line number Diff line number Diff line change
Expand Up @@ -163,21 +163,29 @@ AGCC 1
'GCTA', # TAGC rc
'AGCC', # GGCT
)],
# kmers --both-strands
# kmers --sequnce S t1
"ATTA 1
ATAA 0
TATA 2
ATAT 2
TATA 2
ATAT 2
GATA 0
ATCG 1
TCGA 1
CGAC 1
AGTC 0
ACTA 1
CTAG 1
GCTA 0
TTAT 1
TAGC 1
AGCC 1
",

# kmers --sequnce S t0
"1
1
1
1
",
# kmers --sequnce A t1
[qw(
ATTA 1
TTAT 1
TAGC 1
AGCC 1
)],
# kmers --sequnce A t0
[qw(
1
1
1
1
)],
4 changes: 4 additions & 0 deletions t/01jellyfish.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
>seq
ATTAT
>seq2
TAGCC
Binary file added t/01jellyfish.jf
Binary file not shown.
Binary file removed t/01jellyfish.mer
Binary file not shown.
32 changes: 25 additions & 7 deletions t/01jellyfish.t
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ use strict;
use warnings;

use Test::More;
use Test::Deep;
use Data::Dumper;

use FindBin qw($RealBin);
use lib "$RealBin/../lib/";

Expand All @@ -28,7 +28,10 @@ my $Class = 'Jellyfish';
# create data file names from name of this <file>.t
(my $Dat_file = $FindBin::RealScript) =~ s/t$/dat/; # data
(my $Dmp_file = $FindBin::RealScript) =~ s/t$/dmp/; # data structure dumped
(my $Mer_file = $FindBin::RealScript) =~ s/t$/mer/; # data structure dumped
(my $Mer_file = $FindBin::RealScript) =~ s/t$/jf/; # data structure dumped
(my $Seq_file = $FindBin::RealScript) =~ s/t$/fa/; # data structure dumped

# jellyfish count -m 4 -s 1M 01jellyfish.dat -o 01jellyfish.jf

# slurp <file>.dat
#my $Dat = do { local $/; local @ARGV = $Dat_file; <> }; # slurp data to string
Expand All @@ -52,7 +55,11 @@ my %Dmp;
kmers_A_t1
kmers_A_t0
kmers_nr
kmers_nr_S
kmers_file_S_t1
kmers_file_S_t0
kmers_file_A_t1
kmers_file_A_t0
)} = do "$Dmp_file"; # read and eval the dumped structure


Expand Down Expand Up @@ -123,6 +130,16 @@ subtest '$obj->histo' => sub{
subtest '$obj->query' => sub{
can_ok($Class, 'query');
like($obj->query(['--help']), qr/^Usage:/, "query() --help");

is($obj->query(['--sequence', $Seq_file, $Mer_file]), $Dmp{kmers_file_S_t1}, "query() --sequence");

is($obj->query(['--sequence', $Seq_file, $Mer_file], table => 0), $Dmp{kmers_file_S_t0}, "query() --sequence table => 0");

my @query = $obj->query(['--sequence', $Seq_file, $Mer_file]);
cmp_deeply(\@query, $Dmp{kmers_file_A_t1}, "query() --sequence LIST table => 1");
@query = $obj->query(['--sequence', $Seq_file, $Mer_file], table => 0);
cmp_deeply(\@query, $Dmp{kmers_file_A_t0}, "query() --sequence LIST table => 0");


# kmers STRING, STRING ref, ARRAY ref
# table => 0/1
Expand All @@ -134,20 +151,21 @@ subtest '$obj->query' => sub{
my $kmers_nr_AR = $Dmp{kmers_nr};

# kmers STRING, table => 1, STRING context
is(scalar $obj->query([$Mer_file], kmers => $kmers_S), $Dmp{kmers_S_t1}, "query() STRING SCALAR table => 1");
is(scalar $obj->query(["-i", $Mer_file], kmers => $kmers_S), $Dmp{kmers_S_t1}, "query() STRING SCALAR table => 1");
my $query = $obj->{_query};
is(scalar $obj->query([$Mer_file], kmers => $kmers_S), $Dmp{kmers_S_t1}, "query() STRING SCALAR table => 1");
is($obj->{_query}, $query, "query() persistent interface");
# kmers STRING ref, table => 0, STRING context
is(scalar $obj->query([$Mer_file], kmers => $kmers_SR, table => 0), $Dmp{kmers_S_t0}, "query() STRINGREF SCALAR table => 0");
# kmers ARRAY ref, table => 1, ARRAY context
my @query = $obj->query([$Mer_file], kmers => $kmers_AR);
ok(eq_array(\@query, $Dmp{kmers_A_t1}), "query() ARRAYREF LIST table => 1");
@query = $obj->query([$Mer_file], kmers => $kmers_AR);
cmp_deeply(\@query, $Dmp{kmers_A_t1}, "query() ARRAYREF LIST table => 1");
# kmers ARRAY ref, table => 0, ARRAY context
@query = $obj->query([$Mer_file], kmers => $kmers_AR, table => 0);
ok(eq_array(\@query, $Dmp{kmers_A_t0}), "query() ARRAYREF LIST table => 0");

is(scalar $obj->query(['--both-strands', $Mer_file], kmers => $kmers_nr_AR), $Dmp{kmers_nr_S}, "query() --both-strands");
# other opt for query reinit
is(scalar $obj->query(['-o','tmp.out', $Mer_file], kmers => $kmers_nr_AR), '', "query() -o tmp.out");
isnt($obj->{_query}, $query, "query() reinit interface");


Expand Down

0 comments on commit 0d29447

Please sign in to comment.