Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add BWA-MEM2 indexing tool #6823

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion tools/bwa/bwa-mem.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
<import>bwa_macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements"/>
<expand macro="requirements">
<requirement type="package" version="1.13">samtools</requirement>
</expand>
<expand macro="stdio"/>
<command><![CDATA[
@pipefail@
Expand Down
36 changes: 36 additions & 0 deletions tools/bwa_mem2/bwa-mem2-idx.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<tool id="bwa_mem2_idx" name="BWA-MEM2 indexer" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE_VERSION@">
<description>Build BWA-MEM2 reference index</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements"/>
<command><![CDATA[
mkdir '$index.extra_files_path' &&
cd '$index.extra_files_path' &&
bwa-mem2 index -p 'reference' '${reference}'
]]></command>
<inputs>
<param name="reference" type="data" format="fasta,fasta.gz" label="Select a genome to index" help="Build an index for this FASTA sequence."/>
</inputs>
<outputs>
<data name="index" format="bwa_mem2_index"/>
</outputs>
<tests>
<test>
<param name="reference" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
<output name="index" ftype="bwa_mem2_index">
<extra_files name="reference.0123" type="file" value="test-cache/reference.0123"></extra_files>
</output>
</test>
</tests>
<help><![CDATA[
**What is does**
BWA-MEM2 is the new version of the bwa-mem algorithm in bwa. It produces alignment identical to bwa and is ~1.3-3.1x faster depending on the use-case, dataset and the running machine.
The algorithm is robust to sequencing errors and applicable to a wide range of sequence lengths from 70bp to a few megabases.

This tools build a reference index for the bwa-mem2 galaxy tool.

@info@
]]></help>
<expand macro="citations" />
</tool>
25 changes: 17 additions & 8 deletions tools/bwa_mem2/bwa-mem2.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="bwa_mem2" name="BWA-MEM2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
<tool id="bwa_mem2" name="BWA-MEM2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.2">
<description>- map medium and long reads (&gt; 100 bp) against reference genome</description>
<macros>
<import>read_group_macros.xml</import>
Expand Down Expand Up @@ -281,22 +281,31 @@ bwa-mem2 mem
</outputs>

<tests>
<test>
<param name="reference_source_selector" value="history" />
<param name="ref_file" ftype="bwa_mem2_index" class="Directory" value="test-cache"/>
<param name="fastq_input_selector" value="paired"/>
<param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
<param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
<param name="analysis_type_selector" value="illumina"/>
<output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="6" />
</test>
<test>
<param name="reference_source_selector" value="history" />
<param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
<param name="fastq_input_selector" value="paired"/>
<param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
<param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
<param name="analysis_type_selector" value="illumina"/>
<output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="4" />
<output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="6" />
</test>
<test>
<param name="reference_source_selector" value="history" />
<param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
<param name="fastq_input_selector" value="single"/>
<param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fasta1.fa"/>
<param name="analysis_type_selector" value="illumina"/>
<output name="bam_output" ftype="bam" file="bwa-mem-test1-fasta.bam" lines_diff="4" />
<output name="bam_output" ftype="bam" file="bwa-mem-test1-fasta.bam" lines_diff="6" />
</test>
<test>
<param name="reference_source_selector" value="history" />
Expand All @@ -305,7 +314,7 @@ bwa-mem2 mem
<param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/>
<param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
<param name="analysis_type_selector" value="illumina"/>
<output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="4" />
<output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="6" />
</test>
<test>
<param name="reference_source_selector" value="history" />
Expand All @@ -318,7 +327,7 @@ bwa-mem2 mem
<param name="PL" value="CAPILLARY"/>
<param name="LB" value="AARDVARK-1" />
<param name="analysis_type_selector" value="illumina"/>
<output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="4" />
<output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="6" />
</test>
<test>
<param name="reference_source_selector" value="history" />
Expand All @@ -328,7 +337,7 @@ bwa-mem2 mem
<param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
<param name="analysis_type_selector" value="illumina"/>
<param name="output_sort" value="unsorted"/>
<output name="bam_output" ftype="qname_input_sorted.bam" file="bwa-mem-test3.bam" lines_diff="4" />
<output name="bam_output" ftype="qname_input_sorted.bam" file="bwa-mem-test3.bam" lines_diff="6" />
</test>
<test>
<param name="reference_source_selector" value="history" />
Expand All @@ -338,7 +347,7 @@ bwa-mem2 mem
<param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
<param name="analysis_type_selector" value="illumina"/>
<param name="output_sort" value="name"/>
<output name="bam_output" ftype="qname_sorted.bam" file="bwa-mem-test4.bam" lines_diff="4" />
<output name="bam_output" ftype="qname_sorted.bam" file="bwa-mem-test4.bam" lines_diff="6" />
</test>
<test>
<param name="reference_source_selector" value="cached" />
Expand All @@ -347,7 +356,7 @@ bwa-mem2 mem
<param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
<param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
<param name="analysis_type_selector" value="illumina"/>
<output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="4" />
<output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="6" />
</test>
</tests>
<help><![CDATA[
Expand Down
19 changes: 12 additions & 7 deletions tools/bwa_mem2/macros.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

<token name="@TOOL_VERSION@">2.2.1</token>
<token name="@VERSION_SUFFIX@">1</token>
<token name="@PROFILE_VERSION@">20.01</token>

<xml name="xrefs">
<xrefs>
Expand All @@ -29,10 +30,14 @@

<token name="@set_reference_fasta_filename@"><![CDATA[
#if str( $reference_source.reference_source_selector ) == "history":
#set $reference_fasta_filename = "localref.fa"
ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' &&
bwa-mem2 index
'${reference_fasta_filename}' &&
#if $reference_source.ref_file.is_of_type("bwa_mem2_index"):
#set $reference_fasta_filename = $reference_source.ref_file.extra_files_path + "/reference"
#else
#set $reference_fasta_filename = "localref." + $reference_source.ref_file.extension
ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' &&
bwa-mem2 index
'${reference_fasta_filename}' &&
#end if
#else:
#set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
#end if
Expand All @@ -41,7 +46,7 @@
<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">bwa-mem2</requirement>
<requirement type="package" version="1.13">samtools</requirement>
<yield></yield>
</requirements>
</xml>

Expand All @@ -59,7 +64,7 @@
<conditional name="reference_source">
<param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below">
<option value="cached">Use a built-in genome index</option>
<option value="history">Use a genome from history and build index</option>
<option value="history">Use a reference from history and build index if necessary</option>
</param>
<when value="cached">
<param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
Expand All @@ -71,7 +76,7 @@
</param>
</when>
<when value="history">
<param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
<param name="ref_file" type="data" format="fasta,fasta.gz,bwa_mem2_index" label="Use the following dataset as the reference" help="You can upload a FASTA sequence to the history and use it as reference. For better performance build a reference index separately." />
</when>
</conditional>
</macro>
Expand Down
2 changes: 1 addition & 1 deletion tools/bwa_mem2/test-data/bwa_mem2_index.loc
Original file line number Diff line number Diff line change
@@ -1 +1 @@
mtgenome mtGenome Mitochondiral genome ${__HERE__}/test-cache/bwa-mem-mt-genome.fa
mtgenome mtGenome Mitochondiral genome ${__HERE__}/test-cache/reference