diff --git a/docs/source/genetic.md b/docs/source/genetic.md index b511e65..a6806af 100644 --- a/docs/source/genetic.md +++ b/docs/source/genetic.md @@ -11,8 +11,39 @@ Genotyped-based deconvolution leverages the unique genetic composition of indivi ## **Quick start** ```bash -cd hadge -nextflow run main.nf -profile test --mode genetic +nextflow run ${hadge_project_dir}/main.nf -profile test,conda_singularity --mode genetic +``` + +## **Example case** + +Case 1: Run the entire genotype-based mode without known donor genotype: + +```bash +nextflow run ${hadge_project_dir}/main.nf -profile conda_singularity --outputdir ${output_dir} --mode genetic --bam ${bam_dir} --bai ${bai_dir} --barcodes ${barcodes_dir} --nsamples_genetic ${nsamples} --fasta ${fasta_dir} --fasta_index ${fasta_index_dir} --common_variants_scSplit ${common_variant_scsplit} --common_variants_souporcell ${common_variant_souporcell} --common_variants_freemuxlet ${common_variant_freemuxlet} --common_variants_cellsnp ${common_variant_cellsnp} --demuxlet False +``` + +Case 2: Skip cellSNP and run Vireo with available cell genotype file in VCF format: + +```bash +nextflow run ${hadge_project_dir}/main.nf -profile conda --mode genetic --vireo_variant False --celldata ${cell_data_dir} +``` + +Case 3: Run Demuxlet with donor genotype: + +```bash +nextflow run ${hadge_project_dir}/main.nf -profile conda --mode genetic --outputdir ${output_dir} --bam ${bam_dir} --bai ${bai_dir} --barcodes ${barcodes_dir} --vcf_donor ${donor_genotype_dir} +``` + +Case 4: Run scSplit without data pre-processing: + +```bash +nextflow run ${hadge_project_dir}/main.nf -profile conda --mode genetic --scSplit_preprocess False //additional paramters as in case 1 +``` + +Case 5: Run the pipeline with different combinations of parameter. This is only available in the single sample mode. The values should be separated by semicolumn, and double quoted if specified in a config file. + +```bash +nextflow run ${hadge_project_dir}/main.nf -profile conda_singularity --mode genetic --alpha 0.1;0.3;0.5 //additional paramters as in case 1 ``` ## **Input data preparation** diff --git a/docs/source/hashing.md b/docs/source/hashing.md index 1c3e080..71b0c55 100644 --- a/docs/source/hashing.md +++ b/docs/source/hashing.md @@ -11,8 +11,27 @@ Cell hashing is a sample processing technique that requires processing individua ## **Quick start** ```bash -cd hadge -nextflow run main.nf -profile test --mode hashing +nextflow run ${hadge_project_dir}/main.nf -profile test,conda --mode hashing +``` + +## **Example case** + +Case 1: Run the entire hashing-based mode: + +```bash +nextflow run ${hadge_project_dir}/main.nf -profile conda --outputdir ${output_dir} --mode hashing --hto_matrix_raw ${hto_raw_dir} --hto_matrix_filtered ${hto_filtered_dir} --rna_matrix_raw ${rna_raw_dir} --rna_matrix_filtered ${rna_filtered_dir} +``` + +Case 2: Run Multiseq with raw counts : + +```bash +nextflow run ${hadge_project_dir}/main.nf -profile conda --outputdir ${output_dir} --mode hashing --rna_matrix_multiseq raw --hto_matrix_multiseq raw // additional parameters as in case 1 +``` + +Case 3: Run the pipeline with different combinations of parameter. This is only available in the single sample mode. The values should be separated by semicolumn, and double quoted if specified in a config file. + +```bash +nextflow run ${hadge_project_dir}/main.nf -profile conda --mode hashing --quantile_multi 0.5;0.7 //additional paramters as in case 1 ``` ## **Input data preparation** diff --git a/docs/source/index.md b/docs/source/index.md index 103750e..871e75b 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -30,27 +30,36 @@ The hashing-based deconvolution includes 7 methods: The hadge pipeline is implemented in Nextflow. To get started, you need to install Nextflow. Please refer to [Nextflow](https://www.nextflow.io/docs/latest/getstarted.html#installation) for more details. Alternatively, you can also install Nextflow via [conda](https://anaconda.org/bioconda/nextflow). -As next, please run the pipeline +## **Quick start** + +To execute the pipeline locally, start by cloning the repository into a directory, for example, named ${hadge_project_dir}. + +```bash +cd ${hadge_project_dir} && git clone https://github.com/theislab/hadge.git +nextflow run ${hadge_project_dir}/hadge/main.nf -profile conda_singularity +``` + +It is also allowed to run the pipeline from a directory outside the hadge project folder. + +Alternatively, you can also run the pipeline on the cloud: ```bash -nextflow run http://github.com/theislab/hadge -r main +nextflow run http://github.com/theislab/hadge -r main -profile conda_singularity ``` -You can also: +Please note: - Choose the mode: `--mode=` - Specify the folder name `--outdir` to save the output files. This will create a folder automatically in the project directory. -- Specify the input data for each process. +- To run the pipeline with your own dataset, specify the input data and additional parrameters if needed. - The pipeline can be run either locally or on a HPC with different resource specifications. As default, the pipeline will run locally. You can also set the SLURM executor by running the pipeline with `-profile cluster`. - Please also check [](general) for more details. -## **Quick start** +To get familiar with hadge, we provide the test profile for a quick start. To access the test sample data, you can use the provided bash script to download the test data to the project directory of hadge and run the pipeline locally. ```bash -git clone https://github.com/theislab/hadge.git -cd hadge -sh test_data/download_data.sh -nextflow run main.nf -profile test +cd ${hadge_project_dir}/hadge && sh test_data/download_data.sh +nextflow run main.nf -profile test,conda_singularity ``` ## Notebook diff --git a/modules/gene_demultiplexing.nf b/modules/gene_demultiplexing.nf index d6d4bfb..434360f 100644 --- a/modules/gene_demultiplexing.nf +++ b/modules/gene_demultiplexing.nf @@ -1,14 +1,14 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -include { data_preprocess } from './gene_demulti/samtools' -include { filter_variant } from './gene_demulti/bcftools' -include { variant_cellSNP } from './gene_demulti/cellsnp' -include { variant_freebayes } from './gene_demulti/freebayes' -include { demultiplex_demuxlet } from './gene_demulti/demuxlet' -include { demultiplex_freemuxlet } from './gene_demulti/freemuxlet' -include { demultiplex_scSplit } from './gene_demulti/scsplit' -include { demultiplex_souporcell } from './gene_demulti/souporcell' -include { demultiplex_vireo } from './gene_demulti/vireo' +include { data_preprocess } from "$projectDir/gene_demulti/samtools" +include { filter_variant } from "$projectDir/gene_demulti/bcftools" +include { variant_cellSNP } from "$projectDir/gene_demulti/cellsnp" +include { variant_freebayes } from "$projectDir/gene_demulti/freebayes" +include { demultiplex_demuxlet } from "$projectDir/gene_demulti/demuxlet" +include { demultiplex_freemuxlet } from "$projectDir/gene_demulti/freemuxlet" +include { demultiplex_scSplit } from "$projectDir/gene_demulti/scsplit" +include { demultiplex_souporcell } from "$projectDir/gene_demulti/souporcell" +include { demultiplex_vireo } from "$projectDir/gene_demulti/vireo" process summary { publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/gene_demulti", mode: 'copy' diff --git a/modules/hash_demultiplexing.nf b/modules/hash_demultiplexing.nf index 6a1c229..03938ce 100644 --- a/modules/hash_demultiplexing.nf +++ b/modules/hash_demultiplexing.nf @@ -1,15 +1,15 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -include { preprocessing_hashing as preprocessing_hashing_htodemux } from './hash_demulti/preprocess' -include { preprocessing_hashing as preprocessing_hashing_multiseq } from './hash_demulti/preprocess' -include { multiseq_hashing } from './hash_demulti/multiseq' -include { htodemux_hashing } from './hash_demulti/htodemux' -include { hash_solo_hashing } from './hash_demulti/hashsolo' -include { hashedDrops_hashing } from './hash_demulti/hashedDrops' -include { demuxem_hashing } from './hash_demulti/demuxem' -include { demuxmix_hashing } from './hash_demulti/demuxmix' -include { gmm_demux_hashing } from './hash_demulti/gmm_demux' -include { bff_hashing } from './hash_demulti/bff' +include { preprocessing_hashing as preprocessing_hashing_htodemux } from "$projectDir/hash_demulti/preprocess" +include { preprocessing_hashing as preprocessing_hashing_multiseq } from "$projectDir/hash_demulti/preprocess" +include { multiseq_hashing } from "$projectDir/hash_demulti/multiseq" +include { htodemux_hashing } from "$projectDir/hash_demulti/htodemux" +include { hash_solo_hashing } from "$projectDir/hash_demulti/hashsolo" +include { hashedDrops_hashing } from "$projectDir/hash_demulti/hashedDrops" +include { demuxem_hashing } from "$projectDir/hash_demulti/demuxem" +include { demuxmix_hashing } from "$projectDir/hash_demulti/demuxmix" +include { gmm_demux_hashing } from "$projectDir/hash_demulti/gmm_demux" +include { bff_hashing } from "$projectDir/hash_demulti/bff" process summary { publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti", mode: 'copy' diff --git a/modules/multi_demultiplexing.nf b/modules/multi_demultiplexing.nf index 24e1cf0..ee1c2c7 100644 --- a/modules/multi_demultiplexing.nf +++ b/modules/multi_demultiplexing.nf @@ -1,8 +1,8 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -include { hash_demultiplexing } from './hash_demultiplexing' -include { gene_demultiplexing } from './gene_demultiplexing' -include { donor_match } from './donor_match' +include { hash_demultiplexing } from "$projectDir/hash_demultiplexing" +include { gene_demultiplexing } from "$projectDir/gene_demultiplexing" +include { donor_match } from "$projectDir/donor_match" process generate_data { publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/data_output", mode: 'copy' diff --git a/modules/single/gene_demultiplexing.nf b/modules/single/gene_demultiplexing.nf index 0e6f317..7a23941 100644 --- a/modules/single/gene_demultiplexing.nf +++ b/modules/single/gene_demultiplexing.nf @@ -1,15 +1,15 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -include { data_preprocess } from './gene_demulti/samtools' -include { filter_variant } from './gene_demulti/bcftools' -include { variant_cellSNP } from './gene_demulti/cellsnp' -include { variant_freebayes } from './gene_demulti/freebayes' -include { demultiplex_demuxlet } from './gene_demulti/demuxlet' -include { demultiplex_freemuxlet } from './gene_demulti/freemuxlet' -include { demultiplex_scSplit } from './gene_demulti/scsplit' -include { demultiplex_souporcell } from './gene_demulti/souporcell' -include { demultiplex_vireo } from './gene_demulti/vireo' +include { data_preprocess } from "$projectDir/gene_demulti/samtools" +include { filter_variant } from "$projectDir/gene_demulti/bcftools" +include { variant_cellSNP } from "$projectDir/gene_demulti/cellsnp" +include { variant_freebayes } from "$projectDir/gene_demulti/freebayes" +include { demultiplex_demuxlet } from "$projectDir/gene_demulti/demuxlet" +include { demultiplex_freemuxlet } from "$projectDir/gene_demulti/freemuxlet" +include { demultiplex_scSplit } from "$projectDir/gene_demulti/scsplit" +include { demultiplex_souporcell } from "$projectDir/gene_demulti/souporcell" +include { demultiplex_vireo } from "$projectDir/gene_demulti/vireo" def split_input(input) { if (input =~ /;/) { diff --git a/modules/single/hash_demultiplexing.nf b/modules/single/hash_demultiplexing.nf index 799c7b5..7d59d5f 100644 --- a/modules/single/hash_demultiplexing.nf +++ b/modules/single/hash_demultiplexing.nf @@ -1,15 +1,15 @@ #!/usr/bin/env nextflow nextflow.enable.dsl = 2 -include { preprocessing_hashing as preprocessing_hashing_htodemux } from './hash_demulti/preprocess' -include { preprocessing_hashing as preprocessing_hashing_multiseq } from './hash_demulti/preprocess' -include { multiseq_hashing } from './hash_demulti/multiseq' -include { htodemux_hashing } from './hash_demulti/htodemux' -include { hash_solo_hashing } from './hash_demulti/hashsolo' -include { hashedDrops_hashing } from './hash_demulti/hashedDrops' -include { demuxem_hashing } from './hash_demulti/demuxem' -include { demuxmix_hashing } from './hash_demulti/demuxmix' -include { gmm_demux_hashing } from './hash_demulti/gmm_demux' -include { bff_hashing } from './hash_demulti/bff' +include { preprocessing_hashing as preprocessing_hashing_htodemux } from "$projectDir/hash_demulti/preprocess" +include { preprocessing_hashing as preprocessing_hashing_multiseq } from "$projectDir/hash_demulti/preprocess" +include { multiseq_hashing } from "$projectDir/hash_demulti/multiseq" +include { htodemux_hashing } from "$projectDir/hash_demulti/htodemux" +include { hash_solo_hashing } from "$projectDir/hash_demulti/hashsolo" +include { hashedDrops_hashing } from "$projectDir/hash_demulti/hashedDrops" +include { demuxem_hashing } from "$projectDir/hash_demulti/demuxem" +include { demuxmix_hashing } from "$projectDir/hash_demulti/demuxmix" +include { gmm_demux_hashing } from "$projectDir/hash_demulti/gmm_demux" +include { bff_hashing } from "$projectDir/hash_demulti/bff" process summary { publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti", mode: 'copy' diff --git a/nextflow.config b/nextflow.config index f6af465..ec1b796 100644 --- a/nextflow.config +++ b/nextflow.config @@ -212,7 +212,7 @@ params { r2_info = "R2" min_mac = 1 min_callrate = 0.50 - alpha = "0.5" // must be string, multiple values in a single run should be comma separated + alpha = 0.5 doublet_prior = 0.5 demuxlet_out = "demuxlet_res" @@ -290,7 +290,7 @@ params { report_all_haplotype_alleles = "False" report_monomorphic = "False" pvar = 0.0 - strict_vcf = "False" + strict_vcf = "False" theta = 0.001 pooled_discrete = "False" diff --git a/test.config b/test.config index 5a60ee6..5c74183 100644 --- a/test.config +++ b/test.config @@ -12,7 +12,7 @@ params { barcodes = "$projectDir/test_data/barcodes.tsv" fasta = "$projectDir/test_data/genome_chr1.fa" fasta_index = "$projectDir/test_data/genome_chr1.fa.fai" - nsample = 2 + nsamples_genetic = 2 common_variants_scSplit = "$projectDir/test_data/common_variants_hg19_list.vcf" common_variants_souporcell = "$projectDir/test_data/common_variants_hg19.vcf" common_variants_freemuxlet = "$projectDir/test_data/jurkat_293t_exons_only.vcf.withAF.vcf.gz"