-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #35 from phac-nml/dev
Release 0.3.0: Added database incorporation
- Loading branch information
Showing
61 changed files
with
853 additions
and
133 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
name: nf-core linting | ||
# This workflow is triggered on pushes and PRs to the repository. | ||
# It runs the `nf-core lint` and markdown lint tests to ensure | ||
# It runs the `nf-core pipelines lint` and markdown lint tests to ensure | ||
# that the code meets the nf-core guidelines. | ||
on: | ||
push: | ||
|
@@ -41,17 +41,32 @@ jobs: | |
python-version: "3.12" | ||
architecture: "x64" | ||
|
||
- name: read .nf-core.yml | ||
uses: pietrobolcato/[email protected] | ||
id: read_yml | ||
with: | ||
config: ${{ github.workspace }}/.nf-core.yml | ||
|
||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install nf-core | ||
pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} | ||
- name: Run nf-core pipelines lint | ||
if: ${{ github.base_ref != 'master' }} | ||
env: | ||
GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} | ||
run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md | ||
|
||
- name: Run nf-core lint | ||
- name: Run nf-core pipelines lint --release | ||
if: ${{ github.base_ref == 'master' }} | ||
env: | ||
GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} | ||
run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md | ||
run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md | ||
|
||
- name: Save PR number | ||
if: ${{ always() }} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,3 +10,4 @@ testing/ | |
testing* | ||
*.pyc | ||
bin/ | ||
tests/data/irida/sample_name_add_iridanext.output.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
process APPEND_CLUSTERS { | ||
tag "Append additional clusters from database" | ||
label 'process_single' | ||
|
||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/csvtk:0.22.0--h9ee0642_1' : | ||
'biocontainers/csvtk:0.22.0--h9ee0642_1' }" | ||
|
||
input: | ||
path(initial_clusters) | ||
path(additional_clusters) | ||
|
||
output: | ||
path("reference_clusters.tsv") | ||
|
||
script: | ||
""" | ||
# Function to get the first address line from the files, handling gzipped files | ||
get_address() { | ||
if [[ "\${1##*.}" == "gz" ]]; then | ||
zcat "\$1" | awk 'NR>1 {print \$2}' | head -n 1 | ||
else | ||
awk 'NR>1 {print \$2}' "\$1" | head -n 1 | ||
fi | ||
} | ||
# Check if two files have consistent delimeter splits in the address column | ||
init_splits=\$(get_address "${initial_clusters}" | awk -F '${params.gm_delimiter}' '{print NF}') | ||
add_splits=\$(get_address "${additional_clusters}" | awk -F '${params.gm_delimiter}' '{print NF}') | ||
if [ "\$init_splits" != "\$add_splits" ]; then | ||
echo "Error: Address levels do not match between initial_clusters and --db_clusters." | ||
exit 1 | ||
fi | ||
# Add a "source" column to differentiate the reference profiles and additional profiles | ||
csvtk mutate2 -t -n source -e " 'ref' " ${initial_clusters} > reference_clusters_source.tsv | ||
csvtk mutate2 -t -n source -e " 'db' " ${additional_clusters} > additional_clusters_source.tsv | ||
# Combine profiles from both the reference and database into a single file | ||
csvtk concat -t reference_clusters_source.tsv additional_clusters_source.tsv | csvtk sort -t -k id > combined_profiles.tsv | ||
# Calculate the frequency of each sample_id across both sources | ||
csvtk freq -t -f id combined_profiles.tsv > sample_counts.tsv | ||
# For any sample_id that appears in both the reference and database, add a 'db_' prefix to the sample_id from the database | ||
csvtk join -t -f id combined_profiles.tsv sample_counts.tsv | \ | ||
csvtk mutate2 -t -n id -e '(\$source == "db" && \$frequency > 1) ? "db_" + \$id : \$id' | \ | ||
csvtk cut -t -f id,address > reference_clusters.tsv | ||
""" | ||
} |
Oops, something went wrong.