From 5c15bfa230414b4e3276ad5647f12395ac445677 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Tue, 26 Mar 2024 10:11:09 +1100 Subject: [PATCH 1/7] Use fixed GRIDSS container for VIRUSBreakend The BioContainers Docker image only provides BusyBox grep, which has poor performance compared to GNU grep and increases runtime by up to an hour. The current Bioconda CI infrastructure is unable to build a patched GRIDSS image at the moment, so I've made a temporary fix here instead to restore expected performance. --- modules/local/virusbreakend/Dockerfile | 43 ++++++++++++++++++++++++++ modules/local/virusbreakend/main.nf | 4 +-- 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 modules/local/virusbreakend/Dockerfile diff --git a/modules/local/virusbreakend/Dockerfile b/modules/local/virusbreakend/Dockerfile new file mode 100644 index 00000000..50fb19ff --- /dev/null +++ b/modules/local/virusbreakend/Dockerfile @@ -0,0 +1,43 @@ +FROM continuumio/miniconda3:23.5.2-0-alpine as build + +RUN \ + conda install conda-libmamba-solver + +RUN \ + echo -e > ~/.condarc '\ +solver: libmamba\n\ +channels:\n\ + - conda-forge\n\ + - bioconda\n\ + - defaults' + +RUN \ + conda create -y -p /env/ \ + 'gridss=2.13.2=h50ea8bc_3' \ + 'bash' \ + 'coreutils' \ + 'findutils' \ + 'gawk' \ + 'grep' \ + 'procps-ng' \ + 'time' \ + 'util-linux' \ + 'which' + +RUN \ + conda clean -yaf + +# Move Conda environment into distroless image +FROM gcr.io/distroless/base-debian11:latest + +COPY --from=build /env/ /env/ + +ENV PATH="/env/bin:${PATH}" +ENV LD_LIBRARY_PATH="/env/lib/:${LD_LIBRARY_PATH}" + +# Symlink system executables as required by VIRUSBreaked +SHELL ["/env/bin/bash", "-c"] +RUN \ + ln -s /env/bin/bash /bin/bash && \ + ln -s /env/bin/env /usr/bin/env && \ + ln -s /env/bin/time /usr/bin/time diff --git a/modules/local/virusbreakend/main.nf b/modules/local/virusbreakend/main.nf index 64195c3c..9284e757 100644 --- a/modules/local/virusbreakend/main.nf +++ b/modules/local/virusbreakend/main.nf @@ -6,8 +6,8 @@ process VIRUSBREAKEND { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gridss:2.13.2--h50ea8bc_3': - 'quay.io/biocontainers/gridss:2.13.2--h50ea8bc_3' }" + 'https://pub-29f2e5b2b7384811bdbbcba44f8b5083.r2.dev/singularity/gridss:2.13.2--0' : + 'quay.io/nf-core/gridss:2.13.2--0' }" input: tuple val(meta), path(bam) From a2c6921fe7ee3d7b1f4ee100b5924878a8f996c0 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Tue, 26 Mar 2024 12:41:04 +1100 Subject: [PATCH 2/7] Apply linting --- modules/local/virusbreakend/Dockerfile | 40 +++++++++++++------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/modules/local/virusbreakend/Dockerfile b/modules/local/virusbreakend/Dockerfile index 50fb19ff..f3a41abb 100644 --- a/modules/local/virusbreakend/Dockerfile +++ b/modules/local/virusbreakend/Dockerfile @@ -1,31 +1,31 @@ FROM continuumio/miniconda3:23.5.2-0-alpine as build RUN \ - conda install conda-libmamba-solver + conda install conda-libmamba-solver RUN \ - echo -e > ~/.condarc '\ + echo -e > ~/.condarc '\ solver: libmamba\n\ channels:\n\ - - conda-forge\n\ - - bioconda\n\ - - defaults' + - conda-forge\n\ + - bioconda\n\ + - defaults' RUN \ - conda create -y -p /env/ \ - 'gridss=2.13.2=h50ea8bc_3' \ - 'bash' \ - 'coreutils' \ - 'findutils' \ - 'gawk' \ - 'grep' \ - 'procps-ng' \ - 'time' \ - 'util-linux' \ - 'which' + conda create -y -p /env/ \ + 'gridss=2.13.2=h50ea8bc_3' \ + 'bash' \ + 'coreutils' \ + 'findutils' \ + 'gawk' \ + 'grep' \ + 'procps-ng' \ + 'time' \ + 'util-linux' \ + 'which' RUN \ - conda clean -yaf + conda clean -yaf # Move Conda environment into distroless image FROM gcr.io/distroless/base-debian11:latest @@ -38,6 +38,6 @@ ENV LD_LIBRARY_PATH="/env/lib/:${LD_LIBRARY_PATH}" # Symlink system executables as required by VIRUSBreaked SHELL ["/env/bin/bash", "-c"] RUN \ - ln -s /env/bin/bash /bin/bash && \ - ln -s /env/bin/env /usr/bin/env && \ - ln -s /env/bin/time /usr/bin/time + ln -s /env/bin/bash /bin/bash && \ + ln -s /env/bin/env /usr/bin/env && \ + ln -s /env/bin/time /usr/bin/time From 4e4a6994c585609129ee61a53ab1a343cd49993a Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Tue, 26 Mar 2024 19:37:53 +1100 Subject: [PATCH 3/7] Use standard BioContainers base for VIRUSBreakend Having distroless as the base Docker image for the VIRUSBreakend container was causing RepeatMasker to crash without an error/traceback when executing a system process. Switching to `quay.io/bioconda/base-glibc-busybox-bash:2.1.0` (the standard BioContainers base image) resolves the problem. --- modules/local/virusbreakend/Dockerfile | 27 +++++++++----------------- modules/local/virusbreakend/main.nf | 4 ++-- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/modules/local/virusbreakend/Dockerfile b/modules/local/virusbreakend/Dockerfile index f3a41abb..2d0742d8 100644 --- a/modules/local/virusbreakend/Dockerfile +++ b/modules/local/virusbreakend/Dockerfile @@ -14,30 +14,21 @@ channels:\n\ RUN \ conda create -y -p /env/ \ 'gridss=2.13.2=h50ea8bc_3' \ - 'bash' \ - 'coreutils' \ - 'findutils' \ - 'gawk' \ - 'grep' \ - 'procps-ng' \ - 'time' \ - 'util-linux' \ - 'which' + 'grep' + +# Install Dfam database required for RepeatMasker +RUN \ + conda install -y curl && \ + curl -s https://www.dfam.org/releases/Dfam_3.7/families/Dfam_curatedonly.h5.gz | \ + gzip -cd > /env/share/RepeatMasker/Libraries/Dfam.h5 RUN \ conda clean -yaf -# Move Conda environment into distroless image -FROM gcr.io/distroless/base-debian11:latest +# Move Conda environment into standard BioContainers base image +FROM quay.io/bioconda/base-glibc-busybox-bash:2.1.0 COPY --from=build /env/ /env/ ENV PATH="/env/bin:${PATH}" ENV LD_LIBRARY_PATH="/env/lib/:${LD_LIBRARY_PATH}" - -# Symlink system executables as required by VIRUSBreaked -SHELL ["/env/bin/bash", "-c"] -RUN \ - ln -s /env/bin/bash /bin/bash && \ - ln -s /env/bin/env /usr/bin/env && \ - ln -s /env/bin/time /usr/bin/time diff --git a/modules/local/virusbreakend/main.nf b/modules/local/virusbreakend/main.nf index 9284e757..8302a974 100644 --- a/modules/local/virusbreakend/main.nf +++ b/modules/local/virusbreakend/main.nf @@ -6,8 +6,8 @@ process VIRUSBREAKEND { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://pub-29f2e5b2b7384811bdbbcba44f8b5083.r2.dev/singularity/gridss:2.13.2--0' : - 'quay.io/nf-core/gridss:2.13.2--0' }" + 'https://pub-29f2e5b2b7384811bdbbcba44f8b5083.r2.dev/singularity/gridss:2.13.2--1' : + 'quay.io/nf-core/gridss:2.13.2--1' }" input: tuple val(meta), path(bam) From f4703e5f7d8381b21c1e4bc4ae1c1b1fde4d9cc4 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Tue, 26 Mar 2024 20:34:14 +1100 Subject: [PATCH 4/7] Lint modules/local/virusbreakend/Dockerfile --- modules/local/virusbreakend/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/virusbreakend/Dockerfile b/modules/local/virusbreakend/Dockerfile index 2d0742d8..f8cc9d06 100644 --- a/modules/local/virusbreakend/Dockerfile +++ b/modules/local/virusbreakend/Dockerfile @@ -20,7 +20,7 @@ RUN \ RUN \ conda install -y curl && \ curl -s https://www.dfam.org/releases/Dfam_3.7/families/Dfam_curatedonly.h5.gz | \ - gzip -cd > /env/share/RepeatMasker/Libraries/Dfam.h5 + gzip -cd > /env/share/RepeatMasker/Libraries/Dfam.h5 RUN \ conda clean -yaf From 7adfda2df4bb5e920b8902a8857cf628e5acdcfb Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Wed, 3 Apr 2024 20:23:46 +1100 Subject: [PATCH 5/7] Apply @maxulysse recommendations Co-authored-by: Maxime U Garcia --- modules/local/virusbreakend/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/virusbreakend/main.nf b/modules/local/virusbreakend/main.nf index 8302a974..b20c3492 100644 --- a/modules/local/virusbreakend/main.nf +++ b/modules/local/virusbreakend/main.nf @@ -6,8 +6,8 @@ process VIRUSBREAKEND { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://pub-29f2e5b2b7384811bdbbcba44f8b5083.r2.dev/singularity/gridss:2.13.2--1' : - 'quay.io/nf-core/gridss:2.13.2--1' }" + 'https://pub-29f2e5b2b7384811bdbbcba44f8b5083.r2.dev/singularity/gridss:2.13.2--1': + 'nf-core/gridss:2.13.2--1' }" input: tuple val(meta), path(bam) From ae86ba99c6d3400f0597905fd3819ad2af8cd2d4 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Wed, 3 Apr 2024 23:24:11 +1100 Subject: [PATCH 6/7] Update VIRUSBreakend process container directive --- modules/local/virusbreakend/main.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/local/virusbreakend/main.nf b/modules/local/virusbreakend/main.nf index b20c3492..67b1a76e 100644 --- a/modules/local/virusbreakend/main.nf +++ b/modules/local/virusbreakend/main.nf @@ -5,9 +5,7 @@ process VIRUSBREAKEND { label 'process_high' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://pub-29f2e5b2b7384811bdbbcba44f8b5083.r2.dev/singularity/gridss:2.13.2--1': - 'nf-core/gridss:2.13.2--1' }" + container "quay.io/nf-core/gridss:2.13.2--1" input: tuple val(meta), path(bam) From 7432c5527eb2fe22fa202e137c6103f53d9c8d8b Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Wed, 3 Apr 2024 23:34:06 +1100 Subject: [PATCH 7/7] Do not qualify nf-core quay.io Docker URI --- modules/local/virusbreakend/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/virusbreakend/main.nf b/modules/local/virusbreakend/main.nf index 67b1a76e..da93d763 100644 --- a/modules/local/virusbreakend/main.nf +++ b/modules/local/virusbreakend/main.nf @@ -5,7 +5,7 @@ process VIRUSBREAKEND { label 'process_high' conda "${moduleDir}/environment.yml" - container "quay.io/nf-core/gridss:2.13.2--1" + container "nf-core/gridss:2.13.2--1" input: tuple val(meta), path(bam)