Skip to content

Commit

Permalink
Several busco-related fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
dialvarezs committed Dec 12, 2024
1 parent f85d7b0 commit 25b72f1
Show file tree
Hide file tree
Showing 10 changed files with 23 additions and 52 deletions.
4 changes: 2 additions & 2 deletions bin/combine_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ def main(args=None):
if args.busco_summary:
busco_results = pd.read_csv(args.busco_summary, sep="\t")
if not bins.equals(
busco_results["GenomeBin"].sort_values().reset_index(drop=True)
busco_results["Input_file"].sort_values().reset_index(drop=True)
):
sys.exit("Bins in BUSCO summary do not match bins in bin depths summary!")
results = pd.merge(
results, busco_results, left_on="bin", right_on="GenomeBin", how="outer"
results, busco_results, left_on="bin", right_on="Input_file", how="outer"
) # assuming depths for all bins are given

if args.checkm_summary:
Expand Down
3 changes: 1 addition & 2 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,8 @@ process {
memory = { 16.GB * task.attempt }
}
withName: BUSCO {
cpus = { 4 * task.attempt }
cpus = { 8 * task.attempt }
memory = { 10.GB * task.attempt }
errorStrategy = { task.exitStatus in ((130..145) + 104 + 250) ? 'retry' : 'ignore' }
}

withName: MAXBIN2 {
Expand Down
4 changes: 0 additions & 4 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -375,10 +375,6 @@ process {
]
}

withName: BUSCO_DB_PREPARATION {
publishDir = [path: { "${params.outdir}/GenomeBinning/QC/BUSCO" }, mode: params.publish_dir_mode, pattern: "*.tar.gz"]
}

withName: BUSCO {
ext.args = [
params.busco_db ? '--offline' : ''
Expand Down
2 changes: 1 addition & 1 deletion modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
},
"busco/busco": {
"branch": "master",
"git_sha": "abd32eb8341f565fed86738307a39404fc1da0ce",
"git_sha": "9a19690b0a3fae05fa1a6ad90a0720d681429e31",
"installed_by": ["modules"]
},
"cat/fastq": {
Expand Down
2 changes: 1 addition & 1 deletion modules/local/combine_tsv.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ process COMBINE_TSV {
'biocontainers/bioawk:1.0--hed695b0_5' }"

input:
path(bin_summaries)
path(bin_summaries, stageAs: "bin_summaries/*.tsv")

output:
path("*.tsv") , emit: combined
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/busco/busco/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ params {
busco_auto_lineage_prok = false
save_busco_db = false
busco_clean = false
busco_lineage = 'auto'
checkm_download_url = "https://zenodo.org/records/7401545/files/checkm_data_2015_01_16.tar.gz"
checkm_db = null
save_checkm_data = false
Expand Down
5 changes: 5 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,11 @@
"description": "Download URL for BUSCO lineage dataset, or path to a tar.gz archive, or local directory containing already downloaded and unpacked lineage datasets.",
"help_text": "E.g. https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2024-01-08.tar.gz or '/path/to/buscodb' (files still need to be unpacked manually). Available databases are listed here: https://busco-data.ezlab.org/v5/data/lineages/."
},
"busco_lineage": {
"type": "string",
"default": "auto",
"description": "Lineage to use for BUSCO (for example, bacteria_odb12). Additionally, supports 'auto' and 'auto_prok' for automated lineage selection."
},
"busco_auto_lineage_prok": {
"type": "boolean",
"description": "Run BUSCO with automated lineage selection, but ignoring eukaryotes (saves runtime)."
Expand Down
50 changes: 10 additions & 40 deletions subworkflows/local/busco_qc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,49 +16,19 @@ workflow BUSCO_QC {
main:
if (!busco_db.isEmpty()) {
if (busco_db.extension in ['gz', 'tgz']) {
BUSCO_UNTAR(busco_db.map { db -> [[id: 'busco_db'], db] })
BUSCO_UNTAR([[id: 'busco_db'], busco_db])

busco_lineage = busco_db.getSimpleName()
if (busco_db.getSimpleName().contains('odb')) {
busco_lineage = busco_db.getSimpleName()
}
busco_db = BUSCO_UNTAR.out.untar.map { it[1] }

// Expects to be tar.gz!
// ch_db_for_busco = BUSCO_DB_PREPARATION(busco_db).db.map { meta, db ->
// def meta_new = [:]
// meta_new['id'] = meta
// meta_new['lineage'] = 'Y'
// [meta_new, db]
// }
}
// else if (busco_db.isDirectory()) {
// // Set meta to match expected channel cardinality for BUSCO
// ch_db_for_busco = Channel
// .of(busco_db)
// .map { db ->
// def meta = [:]
// meta['id'] = db.getBaseName()
// if (meta['id'].contains('odb10') == true) {
// meta['lineage'] = 'Y'
// }
// else {
// meta['lineage'] = 'N'
// }
// [meta, db]
// }
// .collect()
// }
// BUSCO_BUSCO(bins, params.busco_lineage)
else if (busco_db.isDirectory()) {
if (busco_db.name.contains('odb')) {
busco_lineage = busco_db.name
}
}
}
// else {
// Set BUSCO database to empty to allow for --auto-lineage
// ch_db_for_busco = Channel
// .of([])
// .map { empty_db ->
// def meta = [:]
// meta['lineage'] = ''
// [meta, []]
// }
// .collect()
// }

BUSCO_BUSCO( bins, 'genome', busco_lineage, busco_db, [] )

Expand All @@ -68,7 +38,7 @@ workflow BUSCO_QC {
// BUSCO_SAVE_DOWNLOAD(ch_downloads)
// }

COMBINE_BUSCO_TSV(BUSCO_BUSCO.out.batch_summary.collect())
COMBINE_BUSCO_TSV(BUSCO_BUSCO.out.batch_summary.map { it[1] }.collect())

emit:
summary = COMBINE_BUSCO_TSV.out.combined
Expand Down
2 changes: 1 addition & 1 deletion workflows/mag.nf
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ workflow MAG {
BUSCO_QC(
ch_input_bins_for_qc.groupTuple(),
ch_busco_db,
'auto_prok'
params.busco_lineage
)
ch_busco_summary = BUSCO_QC.out.summary
ch_versions = ch_versions.mix(BUSCO_QC.out.versions.first())
Expand Down

0 comments on commit 25b72f1

Please sign in to comment.