diff --git a/bin/demuxem.py b/bin/demuxem.py index 3924f04..f3e69ef 100755 --- a/bin/demuxem.py +++ b/bin/demuxem.py @@ -31,16 +31,8 @@ if __name__ == '__main__': output_name = args.outputdir + "/" + args.objectOutDemuxem # load input rna data - #data = io.read_input(args.rna_matrix_dir, modality="rna") rna_data = sc.read_10x_mtx(args.rna_matrix_dir) hashing_data = sc.read_10x_mtx(args.hto_matrix_dir,gex_only=False) - #data.subset_data(modality_subset=['rna']) - #data.concat_data() # in case of multi-organism mixing data - # load input hashing data - #data.update(io.read_input(args.hto_matrix_dir, modality="hashing")) - # Extract rna and hashing data - #rna_data = data.get_data(modality="rna") - #hashing_data = data.get_data(modality="hashing") filter = "" if args.filter_demuxem.lower() in ['true', 't', 'yes', 'y', '1']: filter = True @@ -96,7 +88,7 @@ pg.write_output(mudata, output_name + ".out.demuxEM.zarr.zip") print("\nSummary statistics:") print("total\t{}".format(rna_data.shape[0])) - for name, value in rna_data.obs["demux_type"].value_counts().iteritems(): + for name, value in rna_data.obs["demux_type"].value_counts().items(): print("{}\t{}".format(name, value)) summary = rna_data.obs["demux_type"].value_counts().rename_axis('classification').reset_index(name='counts') total = ["total", rna_data.shape[0]] diff --git a/bin/generate_data.py b/bin/generate_data.py index 96fc494..7139cd3 100755 --- a/bin/generate_data.py +++ b/bin/generate_data.py @@ -3,7 +3,6 @@ import os import scanpy as sc import argparse -import muon as mu parser = argparse.ArgumentParser(description="Parameters for generating anndata and mudata") parser.add_argument("--assignment", help="Folder which contains cSV file with demultiplexing assignment", default=None) diff --git a/bin/summary_gene.py b/bin/summary_gene.py index 3564494..7e50812 100755 --- a/bin/summary_gene.py +++ b/bin/summary_gene.py @@ -1,9 +1,11 @@ #!/usr/bin/env python -import pandas as pd import os -import scanpy as sc import argparse -import muon as mu +import numpy as np +import scanpy as sc +import pandas as pd +from mudata import MuData + parser = argparse.ArgumentParser(description="Parameters for summary process") parser.add_argument("--demuxlet", help="Folder containing output files of Demuxlet", default=None) @@ -15,7 +17,6 @@ parser.add_argument("--generate_mudata", help="Generate mudata", action='store_true') parser.add_argument("--read_rna_mtx", help="10x-Genomics-formatted mtx directory for gene expression", default=None) parser.add_argument("--read_hto_mtx", help="10x-Genomics-formatted mtx directory for HTO expression", default=None) -#parser.add_argument("--sampleId", help="sampleID if multiple samples are demultiplexed", default=None) args = parser.parse_args() @@ -26,14 +27,12 @@ def demuxlet_summary(demuxlet_res, raw_adata, raw_mudata): obs_res_dir = [file for file in os.listdir(x) if file.endswith('.best')][0] obs_res = pd.read_csv(os.path.join(x, obs_res_dir), sep='\t') obs_res = obs_res.iloc[:, [1, 4, 5]] - obs_res['Assignment'] = obs_res['BEST.GUESS'].apply(lambda x: x.split(',')[1] if x.split(',')[0] == x.split(',')[1] else "NA") - obs_res.loc[obs_res['Assignment'] == "NA", 'Assignment'] = "doublet" - obs_res.loc[obs_res['DROPLET.TYPE'] == "AMB", 'Assignment'] = "negative" - obs_res.rename(columns={'BARCODE': 'Barcode'}, inplace=True) - demuxlet_assign = obs_res[['Barcode', 'Assignment']] - demuxlet_assign.index = demuxlet_assign.Barcode - demuxlet_assign = demuxlet_assign.drop(columns=['Barcode']) - demuxlet_assign.rename(columns={'Assignment': os.path.basename(x)}, inplace=True) + obs_res['Assignment'] = np.where(obs_res['BEST.GUESS'].str.split(',').str[0] == obs_res['BEST.GUESS'].str.split(',').str[1], + obs_res['BEST.GUESS'].str.split(',').str[0], "doublet") + obs_res['Assignment'] = np.where(obs_res['DROPLET.TYPE'] == 'AMB', 'negative', obs_res['Assignment']) + obs_res.rename(columns={"BARCODE": "Barcode", "Assignment": os.path.basename(x)}, inplace=True) + obs_res.set_index('Barcode', inplace=True) + demuxlet_assign = obs_res[[os.path.basename(x)]] if raw_adata is not None: adata = raw_adata.copy() @@ -53,7 +52,7 @@ def demuxlet_summary(demuxlet_res, raw_adata, raw_mudata): assign.to_csv("genetic_summary/demuxlet_assignment.csv", quoting=False) classi = assign.copy() - classi[(classi != "negative") & (classi != "doublet")] = "singlet" + classi[~classi.isin(["doublet", "negative"])] = "singlet" classi.to_csv("genetic_summary/demuxlet_classification.csv", quoting=False) params = pd.concat(params, axis=1) @@ -67,14 +66,12 @@ def freemuxlet_summary(freemuxlet_res, raw_adata, raw_mudata): obs_res_dir = [file for file in os.listdir(x) if file.endswith('.clust1.samples.gz')][0] obs_res = pd.read_csv(os.path.join(x, obs_res_dir), sep='\t') obs_res = obs_res.iloc[:, [1, 4, 5]] - obs_res['Assignment'] = obs_res['BEST.GUESS'].apply(lambda x: x.split(',')[1] if x.split(',')[0] == x.split(',')[1] else "NA") - obs_res.loc[obs_res['Assignment'] == "NA", 'Assignment'] = "doublet" - obs_res.loc[obs_res['DROPLET.TYPE'] == "AMB", 'Assignment'] = "negative" - obs_res.rename(columns={'BARCODE': 'Barcode'}, inplace=True) - freemuxlet_assign = obs_res[['Barcode', 'Assignment']] - freemuxlet_assign.index = freemuxlet_assign.Barcode - freemuxlet_assign = freemuxlet_assign.drop(columns=['Barcode']) - freemuxlet_assign.rename(columns={'Assignment': os.path.basename(x)}, inplace=True) + obs_res['Assignment'] = np.where(obs_res['BEST.GUESS'].str.split(',').str[0] == obs_res['BEST.GUESS'].str.split(',').str[1], + obs_res['BEST.GUESS'].str.split(',').str[0], "doublet") + obs_res['Assignment'] = np.where(obs_res['DROPLET.TYPE'] == 'AMB', 'negative', obs_res['Assignment']) + obs_res.rename(columns={"BARCODE": "Barcode", "Assignment": os.path.basename(x)}, inplace=True) + obs_res.set_index('Barcode', inplace=True) + freemuxlet_assign = obs_res[[os.path.basename(x)]] if raw_adata is not None: adata = raw_adata.copy() @@ -94,7 +91,7 @@ def freemuxlet_summary(freemuxlet_res, raw_adata, raw_mudata): assign.to_csv("genetic_summary/freemuxlet_assignment.csv", quoting=False) classi = assign.copy() - classi[(classi != "negative") & (classi != "doublet")] = "singlet" + classi[~classi.isin(["doublet", "negative"])] = "singlet" classi.to_csv("genetic_summary/freemuxlet_classification.csv", quoting=False) params = pd.concat(params, axis=1) @@ -112,11 +109,9 @@ def souporcell_summary(souporcell_res, raw_adata, raw_mudata): obs_res = obs_res.iloc[:, 0:3] obs_res.loc[obs_res['status'] == 'doublet', 'assignment'] = 'doublet' obs_res.loc[obs_res['status'] == 'unassigned', 'assignment'] = 'negative' - obs_res = obs_res.drop('status', axis=1) - obs_res.rename(columns={'barcode': 'Barcode'}, inplace=True) - obs_res.index = obs_res.Barcode - obs_res = obs_res.drop(columns=['Barcode']) - obs_res.columns = [os.path.basename(x)] + obs_res.rename(columns={'barcode': 'Barcode', 'assignment': os.path.basename(x)}, inplace=True) + obs_res.set_index('Barcode', inplace=True) + obs_res = obs_res[[os.path.basename(x)]] if raw_adata is not None: adata = raw_adata.copy() @@ -136,7 +131,7 @@ def souporcell_summary(souporcell_res, raw_adata, raw_mudata): assign.to_csv("genetic_summary/souporcell_assignment.csv", quoting=False) classi = assign.copy() - classi[(classi != "negative") & (classi != "doublet")] = "singlet" + classi[~classi.isin(["doublet", "negative"])] = "singlet" classi.to_csv("genetic_summary/souporcell_classification.csv", quoting=False) params = pd.concat(params, axis=1) @@ -152,12 +147,11 @@ def vireo_summary(vireo_res, raw_adata, raw_mudata): if "donor_ids.tsv" in files: obs_res_dir = os.path.join(root, "donor_ids.tsv") obs_res = pd.read_csv(os.path.join(x, obs_res_dir), sep='\t') - obs_res = obs_res.iloc[:, [0, 1]] + bs_res = obs_res.iloc[:, [0, 1]] obs_res[obs_res == "unassigned"] = "negative" - obs_res.rename(columns={'cell': 'Barcode'}, inplace=True) - obs_res.index = obs_res.Barcode - obs_res = obs_res.drop(columns=['Barcode']) - obs_res.columns = [os.path.basename(x)] + obs_res.rename(columns={'cell': 'Barcode', 'donor_id': os.path.basename(x)}, inplace=True) + obs_res.set_index('Barcode', inplace=True) + obs_res = obs_res[[os.path.basename(x)]] if raw_adata is not None: adata = raw_adata.copy() @@ -177,7 +171,7 @@ def vireo_summary(vireo_res, raw_adata, raw_mudata): assign.to_csv("genetic_summary/vireo_assignment.csv", quoting=False) classi = assign.copy() - classi[(classi != "negative") & (classi != "doublet")] = "singlet" + classi[~classi.isin(["doublet", "negative"])] = "singlet" classi.to_csv("genetic_summary/vireo_classification.csv", quoting=False) params = pd.concat(params, axis=1) @@ -188,17 +182,13 @@ def scsplit_summary(scsplit_res, raw_adata, raw_mudata): params = [] for x in scsplit_res: - obs_res_dir = "" - for root, dirs, files in os.walk(x): - if "scSplit_result.csv" in files: - obs_res_dir = os.path.join(root, "scSplit_result.csv") + obs_res_dir = next((os.path.join(root, "scSplit_result.csv") for root, dirs, files in os.walk(x) if "scSplit_result.csv" in files),"") obs_res = pd.read_table(obs_res_dir) obs_res['Assignment'] = obs_res['Cluster'].str.split('-').str[1] obs_res['Classification'] = obs_res['Cluster'].str.split('-').str[0] - obs_res = obs_res.drop('Cluster', axis=1) obs_res.loc[obs_res['Classification'] == 'DBL', 'Assignment'] = 'doublet' - obs_res.index = obs_res.Barcode - obs_res = obs_res.drop(columns=['Barcode', 'Classification']) + obs_res = obs_res.drop(columns=['Cluster', 'Classification']) + obs_res.set_index('Barcode', inplace=True) obs_res.columns = [os.path.basename(x)] if raw_adata is not None: @@ -236,9 +226,11 @@ def scsplit_summary(scsplit_res, raw_adata, raw_mudata): adata = sc.read_10x_mtx(args.read_rna_mtx) if args.generate_mudata is True: - # TODO - os.mkdir("genetic_summary/mudata") - pass + if not os.path.exists("genetic_summary/mudata"): + os.mkdir("genetic_summary/mudata") + rna_data = sc.read_10x_mtx(args.read_rna_mtx) + hto_data = sc.read_10x_mtx(args.read_hto_mtx, gex_only=False) + mudata = MuData({"rna": rna_data, "hto": hto_data }) if args.demuxlet is not None: demuxlet_res = args.demuxlet.split(':') diff --git a/bin/summary_hash.py b/bin/summary_hash.py index f43603a..11d1e91 100755 --- a/bin/summary_hash.py +++ b/bin/summary_hash.py @@ -19,7 +19,6 @@ parser.add_argument("--generate_mudata", help="Generate mudata", action='store_true') parser.add_argument("--read_rna_mtx", help="10x-Genomics-formatted mtx directory for gene expression", default=None) parser.add_argument("--read_hto_mtx", help="10x-Genomics-formatted mtx directory for HTO expression", default=None) -parser.add_argument("--sampleId", help="sampleID if multiple samples are demultiplexed", default=None) args = parser.parse_args() @@ -30,13 +29,10 @@ def demuxem_summary(demuxem_res, raw_adata, raw_mudata): for x in demuxem_res: obs_res_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("_obs.csv")][0]) obs_res = pd.read_csv(obs_res_dir) - obs_res.rename(columns={obs_res.columns[0]: "Barcode"}, inplace=True) - demuxem_assign = obs_res[["Barcode", "assignment"]] - demuxem_assign.columns = ["Barcode", os.path.basename(x)] - demuxem_assign.loc[:, "Barcode"] = demuxem_assign["Barcode"].apply(lambda x: x + "-1") - #demuxem_assign["Barcode"] = demuxem_assign["Barcode"].astype(str) + "-1" - demuxem_assign.index = demuxem_assign.Barcode - demuxem_assign = demuxem_assign.drop(columns=['Barcode']) + obs_res.loc[:, "barcodekey"] = obs_res["barcodekey"].apply(lambda x: x + "-1") + demuxem_assign = obs_res[["barcodekey", "assignment"]] + demuxem_assign.columns = ["Barcode",os.path.basename(x)] + demuxem_assign.set_index("Barcode", inplace=True) assign.append(demuxem_assign) if raw_adata is not None: @@ -56,21 +52,17 @@ def demuxem_summary(demuxem_res, raw_adata, raw_mudata): mudata.update() mudata.write("hash_summary/mudata/mudata_with_mudata_"+ os.path.basename(x)+".h5mu") - demuxem_classi = obs_res[["Barcode", "demux_type"]] + demuxem_classi = obs_res[["barcodekey", "demux_type"]] demuxem_classi.columns = ["Barcode", os.path.basename(x)] demuxem_classi = demuxem_classi.replace("unknown", "negative") - demuxem_classi.loc[:, "Barcode"] = demuxem_classi["Barcode"].apply(lambda x: x + '-1') - demuxem_classi.index = demuxem_classi.Barcode - demuxem_classi = demuxem_classi.drop(columns=['Barcode']) + demuxem_classi.set_index("Barcode", inplace=True) classi.append(demuxem_classi) params_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("params.csv")][0]) params_res = pd.read_csv(params_dir, keep_default_na=False, index_col=0) - #params_res.rename(columns={params_res.columns[1]: os.path.basename(x)}, inplace=True) params_res.columns = [os.path.basename(x)] params.append(params_res) - assign = pd.concat(assign, axis=1) assign.to_csv("hash_summary/demuxem_assignment.csv", quoting=False) @@ -113,13 +105,9 @@ def hashsolo_summary(hashsolo_res, raw_adata, raw_mudata): mudata.write("hash_summary/mudata/mudata_with_mudata_"+ os.path.basename(x)+".h5mu") hashsolo_classi = obs_res[["most_likely_hypothesis"]] - hashsolo_classi_copy = hashsolo_classi.copy() - hashsolo_classi_copy.loc[hashsolo_classi_copy["most_likely_hypothesis"] == 0.0 , "most_likely_hypothesis"] = "negative" - hashsolo_classi_copy.loc[hashsolo_classi_copy["most_likely_hypothesis"] == 1.0 , "most_likely_hypothesis"] = "singlet" - hashsolo_classi_copy.loc[hashsolo_classi_copy["most_likely_hypothesis"] == 2.0 , "most_likely_hypothesis"] = "doublet" - - hashsolo_classi_copy.columns = [os.path.basename(x)] - classi.append(hashsolo_classi_copy) + hashsolo_classi.loc[:, "most_likely_hypothesis"] = hashsolo_classi["most_likely_hypothesis"].replace({0.0: "negative", 1.0: "singlet", 2.0: "doublet"}) + hashsolo_classi.columns = [os.path.basename(x)] + classi.append(hashsolo_classi) params_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("params.csv")][0]) params_res = pd.read_csv(params_dir, keep_default_na=False, index_col=0) @@ -170,7 +158,6 @@ def hasheddrops_summary(hasheddrops_res, raw_adata, raw_mudata): mudata.write("hash_summary/mudata/mudata_with_mudata_"+ os.path.basename(x)+".h5mu") - hasheddrops_classi = obs_res[["Barcode", "Classification"]] hasheddrops_classi = hasheddrops_classi.rename(columns={"Classification": os.path.basename(x)}) classi.append(hasheddrops_classi) @@ -196,10 +183,8 @@ def multiseq_summary(multiseq_res, raw_adata, raw_mudata): obs_res_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("_res.csv")][0]) multiseq_assign = pd.read_csv(obs_res_dir) multiseq_assign.columns = ["Barcode", os.path.basename(x)] - multiseq_assign.index = multiseq_assign.Barcode - multiseq_assign = multiseq_assign.drop(columns=['Barcode']) - multiseq_assign.replace("Doublet", "doublet", inplace=True) - multiseq_assign.replace("Negative", "negative", inplace=True) + multiseq_assign.set_index("Barcode", inplace=True) + multiseq_assign.replace({"Doublet": "doublet", "Negative": "negative"}, inplace=True) assign.append(multiseq_assign) if raw_adata is not None: @@ -219,7 +204,6 @@ def multiseq_summary(multiseq_res, raw_adata, raw_mudata): mudata.update() mudata.write("hash_summary/mudata/mudata_with_mudata_"+ os.path.basename(x)+".h5mu") - params_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("params.csv")][0]) params_res = pd.read_csv(params_dir, usecols=[1, 2], keep_default_na=False, index_col=0) params_res.columns = [os.path.basename(x)] @@ -229,7 +213,7 @@ def multiseq_summary(multiseq_res, raw_adata, raw_mudata): assign.to_csv("hash_summary/multiseq_assignment.csv", quoting=False) classi = assign.copy() - classi[(classi != "doublet") & (classi != "negative")] = "singlet" + classi[~classi.isin(["doublet", "negative"])] = "singlet" classi.to_csv("hash_summary/multiseq_classification.csv", quoting=False) params = pd.concat(params, axis=1) @@ -243,10 +227,8 @@ def htodemux_summary(htodemux_res, raw_adata, raw_mudata): obs_res_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("_assignment_htodemux.csv")][0]) htodemux_assign = pd.read_csv(obs_res_dir) htodemux_assign.columns = ["Barcode", os.path.basename(x)] - htodemux_assign.replace("Doublet", "doublet", inplace=True) - htodemux_assign.replace("Negative", "negative", inplace=True) - htodemux_assign.index = htodemux_assign.Barcode - htodemux_assign = htodemux_assign.drop(columns=['Barcode']) + htodemux_assign.replace({"Doublet": "doublet", "Negative": "negative"}, inplace=True) + htodemux_assign.set_index("Barcode", inplace=True) assign.append(htodemux_assign) if raw_adata is not None: @@ -266,18 +248,13 @@ def htodemux_summary(htodemux_res, raw_adata, raw_mudata): mudata.update() mudata.write("hash_summary/mudata/mudata_with_mudata_"+ os.path.basename(x)+".h5mu") - obs_res_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename.endswith("_classification_htodemux.csv")][0]) htodemux_classi = pd.read_csv(obs_res_dir) htodemux_classi.columns = ["Barcode", os.path.basename(x)] - htodemux_classi.replace("Singlet", "singlet", inplace=True) - htodemux_classi.replace("Doublet", "doublet", inplace=True) - htodemux_classi.replace("Negative", "negative", inplace=True) - htodemux_classi.index = htodemux_classi.Barcode - htodemux_classi = htodemux_classi.drop(columns=['Barcode']) + htodemux_classi.replace({"Doublet": "doublet", "Negative": "negative", "Singlet": "singlet"}, inplace=True) + htodemux_classi.set_index("Barcode", inplace=True) classi.append(htodemux_classi) - params_dir = os.path.join(x, [filename for filename in os.listdir(x) if filename == "params.csv"][0]) params_res = pd.read_csv(params_dir, usecols=[1, 2], keep_default_na=False, index_col=0) params_res.columns = [os.path.basename(x)] @@ -345,10 +322,10 @@ def demuxmix_summary(demuxmix_res,raw_adata, raw_mudata): classi_df.to_csv("hash_summary" + "/demuxmix_classification.csv",index=False) assign_df = pd.concat(assign, axis=1, join="outer") - assign_df.to_csv("hash_summary" +"/demuxmix_assignment.csv",index=False) + assign_df.to_csv("hash_summary" + "/demuxmix_assignment.csv",index=False) params = pd.concat(params, axis=1) - params.to_csv("hash_summary" +"/demuxmix_params.csv",index=False) + params.to_csv("hash_summary" + "/demuxmix_params.csv",index=False) else: print("No results found for Demuxmix") @@ -473,7 +450,7 @@ def bff_summary(bff_res,raw_adata, raw_mudata): mudata.update() mudata.write("hash_summary/mudata/mudata_with_mudata_"+ os.path.basename(x)+".h5mu") - + dt_classi = data_bff.copy() column_names_class = ["bff_raw","bff_cluster","consensuscall"] for column in column_names_class: @@ -507,13 +484,14 @@ def bff_summary(bff_res,raw_adata, raw_mudata): os.mkdir("hash_summary") if args.generate_anndata is True: - os.mkdir("hash_summary/adata") + if not os.path.exists("hash_summary/adata"): + os.mkdir("hash_summary/adata") adata = sc.read_10x_mtx(args.read_rna_mtx) if args.generate_mudata is True: - os.mkdir("hash_summary/mudata") + if not os.path.exists("hash_summary/mudata"): + os.mkdir("hash_summary/mudata") rna_data = sc.read_10x_mtx(args.read_rna_mtx) - path_hto = args.read_hto_mtx hto_data = sc.read_10x_mtx(args.read_hto_mtx, gex_only=False) mudata = MuData({"rna": rna_data, "hto": hto_data }) @@ -549,7 +527,6 @@ def bff_summary(bff_res,raw_adata, raw_mudata): bff_res = args.bff.split(':') bff_summary(bff_res, adata, mudata) - # Read and combine assignment files assignment = [file for file in os.listdir("hash_summary") if file.endswith("_assignment.csv")] assignment_all = pd.read_csv(os.path.join("hash_summary", assignment[0])) diff --git a/main.nf b/main.nf index 897cb4c..547e332 100644 --- a/main.nf +++ b/main.nf @@ -9,7 +9,7 @@ process summary_all{ publishDir "$projectDir/$params.outdir/$params.mode", mode: 'copy' label 'small_mem' - conda "python=3.9 'pandas<2.0' scanpy muon numpy" + conda "pandas scanpy mudata" input: path gene_demulti_result @@ -26,7 +26,7 @@ process summary_all{ process generate_data{ publishDir "$projectDir/$params.outdir/$params.mode/data_output", mode: 'copy' - conda "python=3.9 'pandas<2.0' scanpy muon numpy" + conda "pandas scanpy mudata" input: path assignment diff --git a/modules/gene_demultiplexing.nf b/modules/gene_demultiplexing.nf index e52d19d..dbf5c2a 100644 --- a/modules/gene_demultiplexing.nf +++ b/modules/gene_demultiplexing.nf @@ -14,7 +14,7 @@ process summary{ publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/gene_demulti", mode: 'copy' label 'small_mem' - conda "python=3.9 'pandas<2.0' scanpy muon numpy" + conda "pandas scanpy mudata" input: tuple val(sampleId), path(hto_matrix, stageAs: 'hto_data'), path(rna_matrix, stageAs: 'rna_data') diff --git a/modules/hash_demulti/demuxem.nf b/modules/hash_demulti/demuxem.nf index 42e4106..65b5033 100644 --- a/modules/hash_demulti/demuxem.nf +++ b/modules/hash_demulti/demuxem.nf @@ -5,7 +5,7 @@ process demuxem{ publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti/demuxem", mode:'copy' label 'small_mem' - conda "python=3.9 bioconda::pegasuspy pandas<2.0.0 demuxEM" + conda "python bioconda::pegasuspy pandas scanpy demuxEM" input: tuple val(sampleId), path(raw_hto_matrix_dir, stageAs: "hto_data_${params.hto_matrix_demuxem}"), diff --git a/modules/hash_demultiplexing.nf b/modules/hash_demultiplexing.nf index 41649ae..9ae2a9e 100644 --- a/modules/hash_demultiplexing.nf +++ b/modules/hash_demultiplexing.nf @@ -15,7 +15,7 @@ process summary{ publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/hash_demulti", mode: 'copy' label 'small_mem' - conda "python=3.9 'pandas<2.0' scanpy muon numpy" + conda "pandas scanpy mudata" input: tuple val(sampleId), path(hto_matrix, stageAs: 'hto_data'), path(rna_matrix, stageAs: 'rna_data') @@ -94,7 +94,7 @@ process summary{ } """ - summary_hash.py $demuxem_files $htodemux_files $multiseq_files $hashedDrops_files $hashsolo_files $demuxmix_files $gmmDemux_files $bff_files $generate_adata $generate_mdata --sampleId $sampleId + summary_hash.py $demuxem_files $htodemux_files $multiseq_files $hashedDrops_files $hashsolo_files $demuxmix_files $gmmDemux_files $bff_files $generate_adata $generate_mdata """ } diff --git a/modules/multi_demultiplexing.nf b/modules/multi_demultiplexing.nf index 757cf20..cfbeafb 100644 --- a/modules/multi_demultiplexing.nf +++ b/modules/multi_demultiplexing.nf @@ -8,7 +8,7 @@ process generate_data{ publishDir "$projectDir/$params.outdir/$sampleId/$params.mode/data_output", mode: 'copy' label 'small_mem' - conda "python=3.9 'pandas<2.0' scanpy muon numpy" + conda "pandas scanpy mudata" input: tuple val(sampleId), val(hto_matrix), val(rna_matrix), path(assignment) @@ -48,7 +48,7 @@ process summary_all{ publishDir "$projectDir/$params.outdir/$sampleId/$params.mode", mode: 'copy' label 'small_mem' - conda "python=3.9 'pandas<2.0' scanpy muon numpy" + conda "pandas scanpy mudata" input: tuple val(sampleId), path(gene_demulti_result), path(hash_demulti_result) diff --git a/modules/single/gene_demultiplexing.nf b/modules/single/gene_demultiplexing.nf index 2254bed..20d5bd4 100644 --- a/modules/single/gene_demultiplexing.nf +++ b/modules/single/gene_demultiplexing.nf @@ -24,7 +24,7 @@ process summary{ publishDir "$projectDir/$params.outdir/$params.mode/gene_demulti", mode: 'copy' label 'small_mem' - conda "python=3.9 'pandas<2.0' scanpy muon numpy" + conda "pandas scanpy mudata" input: val demuxlet_result diff --git a/modules/single/hash_demulti/demuxem.nf b/modules/single/hash_demulti/demuxem.nf index ff872fa..5e1f734 100644 --- a/modules/single/hash_demulti/demuxem.nf +++ b/modules/single/hash_demulti/demuxem.nf @@ -5,7 +5,7 @@ process demuxem{ publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti/demuxem", mode:'copy' label 'small_mem' - conda "python=3.9 bioconda::pegasuspy 'pandas<2.0.0' demuxEM" + conda "python bioconda::pegasuspy pandas demuxEM scanpy" input: path raw_rna_matrix_dir, stageAs: "rna_data_${params.rna_matrix_demuxem}" diff --git a/modules/single/hash_demultiplexing.nf b/modules/single/hash_demultiplexing.nf index 6a109ec..bde9466 100644 --- a/modules/single/hash_demultiplexing.nf +++ b/modules/single/hash_demultiplexing.nf @@ -15,7 +15,7 @@ process summary{ publishDir "$projectDir/$params.outdir/$params.mode/hash_demulti", mode: 'copy' label 'small_mem' - conda "python=3.9 'pandas<2.0' scanpy muon numpy" + conda "pandas scanpy mudata" input: val demuxem_result diff --git a/nextflow.config b/nextflow.config index 5f59f4e..60866af 100644 --- a/nextflow.config +++ b/nextflow.config @@ -2,7 +2,7 @@ params { outdir = "result" mode = "rescue" generate_anndata = "True" - generate_mudata = "False" + generate_mudata = "True" multi_input = null // run only one sample // hashing-based deconvolution @@ -247,7 +247,7 @@ params { vireo_out = "vireo_out" // souporcell - souporcell = "False" + souporcell = "True" souporcell_preprocess = "False" threads = 5 ploidy = 2 @@ -437,7 +437,7 @@ profiles { singularity { singularity.enabled = true singularity.runOptions = "--bind $PWD" - singularity.cacheDir = "$PWD" + singularity.cacheDir = "$PWD" conda.enabled = false docker.enabled = false podman.enabled = false @@ -447,10 +447,17 @@ profiles { } test { includeConfig 'test.config' + } + conda_singularity { singularity.enabled = true singularity.runOptions = "--bind $PWD" - singularity.cacheDir = "$PWD" - conda.enabled = true + singularity.cacheDir = "$PWD" + conda.enabled = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } } diff --git a/test_data/temporary_hto/matrix.mtx b/test_data/temporary_hto/matrix.mtx deleted file mode 100644 index e3e87d9..0000000 --- a/test_data/temporary_hto/matrix.mtx +++ /dev/null @@ -1,1002 +0,0 @@ -%%MatrixMarket matrix coordinate integer general -2 500 1000 -1 1 500 -2 1 1000 -1 2 1000 -2 2 500 -1 3 1000 -2 3 500 -1 4 1000 -2 4 500 -1 5 500 -2 5 1000 -1 6 1000 -2 6 500 -1 7 1000 -2 7 500 -1 8 1000 -2 8 500 -1 9 1000 -2 9 500 -1 10 500 -2 10 1000 -1 11 500 -2 11 1000 -1 12 500 -2 12 1000 -1 13 1000 -2 13 500 -1 14 500 -2 14 1000 -1 15 1000 -2 15 500 -1 16 500 -2 16 1000 -1 17 1000 -2 17 500 -1 18 500 -2 18 1000 -1 19 1000 -2 19 500 -1 20 500 -2 20 1000 -1 21 500 -2 21 1000 -1 22 1000 -2 22 500 -1 23 500 -2 23 1000 -1 24 500 -2 24 1000 -1 25 1000 -2 25 500 -1 26 1000 -2 26 500 -1 27 1000 -2 27 500 -1 28 1000 -2 28 500 -1 29 1000 -2 29 500 -1 30 1000 -2 30 500 -1 31 1000 -2 31 500 -1 32 1000 -2 32 500 -1 33 1000 -2 33 500 -1 34 500 -2 34 1000 -1 35 500 -2 35 1000 -1 36 500 -2 36 1000 -1 37 1000 -2 37 500 -1 38 1000 -2 38 500 -1 39 1000 -2 39 500 -1 40 500 -2 40 1000 -1 41 1000 -2 41 21 -1 42 1000 -2 42 20 -1 43 1000 -2 43 25 -1 44 1000 -2 44 25 -1 45 1000 -2 45 28 -1 46 1000 -2 46 22 -1 47 1000 -2 47 22 -1 48 1000 -2 48 20 -1 49 1000 -2 49 18 -1 50 1000 -2 50 17 -1 51 1000 -2 51 24 -1 52 1000 -2 52 24 -1 53 1000 -2 53 17 -1 54 1000 -2 54 28 -1 55 1000 -2 55 22 -1 56 1000 -2 56 18 -1 57 1000 -2 57 27 -1 58 1000 -2 58 26 -1 59 1000 -2 59 21 -1 60 1000 -2 60 21 -1 61 1000 -2 61 27 -1 62 1000 -2 62 16 -1 63 1000 -2 63 22 -1 64 1000 -2 64 25 -1 65 1000 -2 65 18 -1 66 1000 -2 66 14 -1 67 1000 -2 67 20 -1 68 1000 -2 68 17 -1 69 1000 -2 69 29 -1 70 1000 -2 70 22 -1 71 1000 -2 71 23 -1 72 1000 -2 72 17 -1 73 1000 -2 73 24 -1 74 1000 -2 74 16 -1 75 1000 -2 75 28 -1 76 1000 -2 76 20 -1 77 1000 -2 77 16 -1 78 1000 -2 78 27 -1 79 1000 -2 79 12 -1 80 1000 -2 80 13 -1 81 1000 -2 81 14 -1 82 1000 -2 82 20 -1 83 1000 -2 83 13 -1 84 1000 -2 84 21 -1 85 1000 -2 85 17 -1 86 1000 -2 86 20 -1 87 1000 -2 87 17 -1 88 1000 -2 88 19 -1 89 1000 -2 89 16 -1 90 1000 -2 90 25 -1 91 1000 -2 91 19 -1 92 1000 -2 92 29 -1 93 1000 -2 93 18 -1 94 1000 -2 94 28 -1 95 1000 -2 95 28 -1 96 1000 -2 96 21 -1 97 1000 -2 97 13 -1 98 1000 -2 98 27 -1 99 1000 -2 99 17 -1 100 1000 -2 100 21 -1 101 1000 -2 101 13 -1 102 1000 -2 102 19 -1 103 1000 -2 103 25 -1 104 1000 -2 104 16 -1 105 1000 -2 105 15 -1 106 1000 -2 106 17 -1 107 1000 -2 107 17 -1 108 1000 -2 108 19 -1 109 1000 -2 109 27 -1 110 1000 -2 110 15 -1 111 1000 -2 111 21 -1 112 1000 -2 112 25 -1 113 1000 -2 113 20 -1 114 1000 -2 114 25 -1 115 1000 -2 115 11 -1 116 1000 -2 116 22 -1 117 1000 -2 117 19 -1 118 1000 -2 118 25 -1 119 1000 -2 119 24 -1 120 1000 -2 120 20 -1 121 1000 -2 121 17 -1 122 1000 -2 122 24 -1 123 1000 -2 123 22 -1 124 1000 -2 124 17 -1 125 1000 -2 125 27 -1 126 1000 -2 126 20 -1 127 1000 -2 127 16 -1 128 1000 -2 128 34 -1 129 1000 -2 129 18 -1 130 1000 -2 130 14 -1 131 1000 -2 131 11 -1 132 1000 -2 132 20 -1 133 1000 -2 133 19 -1 134 1000 -2 134 23 -1 135 1000 -2 135 16 -1 136 1000 -2 136 21 -1 137 1000 -2 137 22 -1 138 1000 -2 138 24 -1 139 1000 -2 139 19 -1 140 1000 -2 140 20 -1 141 1000 -2 141 20 -1 142 1000 -2 142 15 -1 143 1000 -2 143 16 -1 144 1000 -2 144 19 -1 145 1000 -2 145 24 -1 146 1000 -2 146 27 -1 147 1000 -2 147 20 -1 148 1000 -2 148 16 -1 149 1000 -2 149 29 -1 150 1000 -2 150 23 -1 151 1000 -2 151 17 -1 152 1000 -2 152 25 -1 153 1000 -2 153 29 -1 154 1000 -2 154 11 -1 155 1000 -2 155 22 -1 156 1000 -2 156 18 -1 157 1000 -2 157 26 -1 158 1000 -2 158 10 -1 159 1000 -2 159 23 -1 160 1000 -2 160 19 -1 161 1000 -2 161 22 -1 162 1000 -2 162 17 -1 163 1000 -2 163 29 -1 164 1000 -2 164 24 -1 165 1000 -2 165 24 -1 166 1000 -2 166 19 -1 167 1000 -2 167 16 -1 168 1000 -2 168 23 -1 169 1000 -2 169 16 -1 170 1000 -2 170 25 -1 171 1000 -2 171 18 -1 172 1000 -2 172 24 -1 173 1000 -2 173 23 -1 174 1000 -2 174 16 -1 175 1000 -2 175 24 -1 176 1000 -2 176 23 -1 177 1000 -2 177 21 -1 178 1000 -2 178 21 -1 179 1000 -2 179 24 -1 180 1000 -2 180 14 -1 181 1000 -2 181 17 -1 182 1000 -2 182 21 -1 183 1000 -2 183 17 -1 184 1000 -2 184 18 -1 185 1000 -2 185 28 -1 186 1000 -2 186 17 -1 187 1000 -2 187 16 -1 188 1000 -2 188 21 -1 189 1000 -2 189 18 -1 190 1000 -2 190 21 -1 191 1000 -2 191 20 -1 192 1000 -2 192 22 -1 193 1000 -2 193 12 -1 194 1000 -2 194 12 -1 195 1000 -2 195 20 -1 196 1000 -2 196 18 -1 197 1000 -2 197 21 -1 198 1000 -2 198 16 -1 199 1000 -2 199 22 -1 200 1000 -2 200 21 -1 201 1000 -2 201 26 -1 202 1000 -2 202 21 -1 203 1000 -2 203 18 -1 204 1000 -2 204 17 -1 205 1000 -2 205 27 -1 206 1000 -2 206 14 -1 207 1000 -2 207 19 -1 208 1000 -2 208 22 -1 209 1000 -2 209 19 -1 210 1000 -2 210 21 -1 211 1000 -2 211 18 -1 212 1000 -2 212 20 -1 213 1000 -2 213 15 -1 214 1000 -2 214 10 -1 215 1000 -2 215 13 -1 216 1000 -2 216 23 -1 217 1000 -2 217 14 -1 218 1000 -2 218 16 -1 219 1000 -2 219 23 -1 220 1000 -2 220 16 -1 221 24 -2 221 1000 -1 222 22 -2 222 1000 -1 223 20 -2 223 1000 -1 224 16 -2 224 1000 -1 225 11 -2 225 1000 -1 226 21 -2 226 1000 -1 227 16 -2 227 1000 -1 228 20 -2 228 1000 -1 229 22 -2 229 1000 -1 230 10 -2 230 1000 -1 231 23 -2 231 1000 -1 232 18 -2 232 1000 -1 233 19 -2 233 1000 -1 234 15 -2 234 1000 -1 235 19 -2 235 1000 -1 236 14 -2 236 1000 -1 237 17 -2 237 1000 -1 238 21 -2 238 1000 -1 239 17 -2 239 1000 -1 240 16 -2 240 1000 -1 241 16 -2 241 1000 -1 242 18 -2 242 1000 -1 243 21 -2 243 1000 -1 244 20 -2 244 1000 -1 245 10 -2 245 1000 -1 246 16 -2 246 1000 -1 247 22 -2 247 1000 -1 248 21 -2 248 1000 -1 249 32 -2 249 1000 -1 250 21 -2 250 1000 -1 251 25 -2 251 1000 -1 252 18 -2 252 1000 -1 253 22 -2 253 1000 -1 254 15 -2 254 1000 -1 255 19 -2 255 1000 -1 256 20 -2 256 1000 -1 257 25 -2 257 1000 -1 258 26 -2 258 1000 -1 259 20 -2 259 1000 -1 260 17 -2 260 1000 -1 261 14 -2 261 1000 -1 262 21 -2 262 1000 -1 263 19 -2 263 1000 -1 264 13 -2 264 1000 -1 265 20 -2 265 1000 -1 266 23 -2 266 1000 -1 267 24 -2 267 1000 -1 268 26 -2 268 1000 -1 269 26 -2 269 1000 -1 270 23 -2 270 1000 -1 271 15 -2 271 1000 -1 272 17 -2 272 1000 -1 273 20 -2 273 1000 -1 274 22 -2 274 1000 -1 275 15 -2 275 1000 -1 276 15 -2 276 1000 -1 277 21 -2 277 1000 -1 278 25 -2 278 1000 -1 279 21 -2 279 1000 -1 280 28 -2 280 1000 -1 281 20 -2 281 1000 -1 282 15 -2 282 1000 -1 283 21 -2 283 1000 -1 284 22 -2 284 1000 -1 285 24 -2 285 1000 -1 286 18 -2 286 1000 -1 287 24 -2 287 1000 -1 288 21 -2 288 1000 -1 289 21 -2 289 1000 -1 290 16 -2 290 1000 -1 291 14 -2 291 1000 -1 292 29 -2 292 1000 -1 293 22 -2 293 1000 -1 294 19 -2 294 1000 -1 295 25 -2 295 1000 -1 296 17 -2 296 1000 -1 297 25 -2 297 1000 -1 298 16 -2 298 1000 -1 299 17 -2 299 1000 -1 300 18 -2 300 1000 -1 301 26 -2 301 1000 -1 302 18 -2 302 1000 -1 303 13 -2 303 1000 -1 304 16 -2 304 1000 -1 305 19 -2 305 1000 -1 306 13 -2 306 1000 -1 307 22 -2 307 1000 -1 308 16 -2 308 1000 -1 309 16 -2 309 1000 -1 310 13 -2 310 1000 -1 311 26 -2 311 1000 -1 312 23 -2 312 1000 -1 313 27 -2 313 1000 -1 314 15 -2 314 1000 -1 315 16 -2 315 1000 -1 316 21 -2 316 1000 -1 317 29 -2 317 1000 -1 318 22 -2 318 1000 -1 319 22 -2 319 1000 -1 320 20 -2 320 1000 -1 321 26 -2 321 1000 -1 322 10 -2 322 1000 -1 323 18 -2 323 1000 -1 324 25 -2 324 1000 -1 325 15 -2 325 1000 -1 326 26 -2 326 1000 -1 327 12 -2 327 1000 -1 328 14 -2 328 1000 -1 329 29 -2 329 1000 -1 330 18 -2 330 1000 -1 331 21 -2 331 1000 -1 332 19 -2 332 1000 -1 333 15 -2 333 1000 -1 334 18 -2 334 1000 -1 335 14 -2 335 1000 -1 336 21 -2 336 1000 -1 337 26 -2 337 1000 -1 338 20 -2 338 1000 -1 339 20 -2 339 1000 -1 340 15 -2 340 1000 -1 341 27 -2 341 1000 -1 342 18 -2 342 1000 -1 343 10 -2 343 1000 -1 344 17 -2 344 1000 -1 345 19 -2 345 1000 -1 346 21 -2 346 1000 -1 347 18 -2 347 1000 -1 348 18 -2 348 1000 -1 349 24 -2 349 1000 -1 350 16 -2 350 1000 -1 351 21 -2 351 1000 -1 352 25 -2 352 1000 -1 353 20 -2 353 1000 -1 354 15 -2 354 1000 -1 355 21 -2 355 1000 -1 356 22 -2 356 1000 -1 357 17 -2 357 1000 -1 358 17 -2 358 1000 -1 359 20 -2 359 1000 -1 360 17 -2 360 1000 -1 361 23 -2 361 1000 -1 362 28 -2 362 1000 -1 363 19 -2 363 1000 -1 364 19 -2 364 1000 -1 365 24 -2 365 1000 -1 366 17 -2 366 1000 -1 367 18 -2 367 1000 -1 368 15 -2 368 1000 -1 369 19 -2 369 1000 -1 370 27 -2 370 1000 -1 371 21 -2 371 1000 -1 372 32 -2 372 1000 -1 373 19 -2 373 1000 -1 374 27 -2 374 1000 -1 375 21 -2 375 1000 -1 376 23 -2 376 1000 -1 377 21 -2 377 1000 -1 378 26 -2 378 1000 -1 379 29 -2 379 1000 -1 380 29 -2 380 1000 -1 381 20 -2 381 1000 -1 382 27 -2 382 1000 -1 383 28 -2 383 1000 -1 384 21 -2 384 1000 -1 385 17 -2 385 1000 -1 386 22 -2 386 1000 -1 387 17 -2 387 1000 -1 388 21 -2 388 1000 -1 389 14 -2 389 1000 -1 390 27 -2 390 1000 -1 391 15 -2 391 1000 -1 392 25 -2 392 1000 -1 393 16 -2 393 1000 -1 394 28 -2 394 1000 -1 395 20 -2 395 1000 -1 396 21 -2 396 1000 -1 397 20 -2 397 1000 -1 398 18 -2 398 1000 -1 399 14 -2 399 1000 -1 400 22 -2 400 1000 -1 401 22 -2 401 20 -1 402 23 -2 402 24 -1 403 19 -2 403 21 -1 404 18 -2 404 10 -1 405 20 -2 405 20 -1 406 22 -2 406 26 -1 407 18 -2 407 18 -1 408 16 -2 408 20 -1 409 23 -2 409 20 -1 410 16 -2 410 18 -1 411 21 -2 411 22 -1 412 13 -2 412 23 -1 413 18 -2 413 11 -1 414 26 -2 414 14 -1 415 16 -2 415 32 -1 416 21 -2 416 21 -1 417 17 -2 417 13 -1 418 23 -2 418 14 -1 419 21 -2 419 18 -1 420 22 -2 420 17 -1 421 12 -2 421 17 -1 422 26 -2 422 26 -1 423 23 -2 423 19 -1 424 16 -2 424 14 -1 425 24 -2 425 19 -1 426 12 -2 426 15 -1 427 22 -2 427 17 -1 428 24 -2 428 12 -1 429 16 -2 429 20 -1 430 16 -2 430 23 -1 431 19 -2 431 25 -1 432 28 -2 432 20 -1 433 11 -2 433 19 -1 434 23 -2 434 31 -1 435 23 -2 435 19 -1 436 20 -2 436 20 -1 437 20 -2 437 28 -1 438 16 -2 438 20 -1 439 23 -2 439 26 -1 440 20 -2 440 17 -1 441 18 -2 441 22 -1 442 18 -2 442 25 -1 443 14 -2 443 14 -1 444 20 -2 444 18 -1 445 26 -2 445 19 -1 446 19 -2 446 25 -1 447 18 -2 447 28 -1 448 17 -2 448 14 -1 449 21 -2 449 20 -1 450 26 -2 450 28 -1 451 15 -2 451 21 -1 452 18 -2 452 24 -1 453 22 -2 453 17 -1 454 25 -2 454 22 -1 455 17 -2 455 24 -1 456 21 -2 456 25 -1 457 20 -2 457 17 -1 458 18 -2 458 10 -1 459 17 -2 459 27 -1 460 14 -2 460 24 -1 461 18 -2 461 15 -1 462 18 -2 462 21 -1 463 24 -2 463 18 -1 464 26 -2 464 15 -1 465 16 -2 465 28 -1 466 20 -2 466 29 -1 467 28 -2 467 25 -1 468 15 -2 468 17 -1 469 20 -2 469 18 -1 470 15 -2 470 24 -1 471 26 -2 471 21 -1 472 11 -2 472 22 -1 473 22 -2 473 13 -1 474 22 -2 474 19 -1 475 22 -2 475 20 -1 476 25 -2 476 13 -1 477 23 -2 477 23 -1 478 18 -2 478 15 -1 479 15 -2 479 22 -1 480 19 -2 480 17 -1 481 26 -2 481 24 -1 482 17 -2 482 13 -1 483 16 -2 483 13 -1 484 21 -2 484 18 -1 485 18 -2 485 9 -1 486 22 -2 486 19 -1 487 10 -2 487 27 -1 488 22 -2 488 17 -1 489 18 -2 489 23 -1 490 31 -2 490 17 -1 491 20 -2 491 24 -1 492 21 -2 492 21 -1 493 24 -2 493 23 -1 494 21 -2 494 21 -1 495 22 -2 495 14 -1 496 15 -2 496 18 -1 497 18 -2 497 20 -1 498 12 -2 498 21 -1 499 25 -2 499 28 -1 500 11 -2 500 21 diff --git a/test_data/temporary_hto/matrix_hto611905d177.mtx.gz b/test_data/temporary_hto/matrix_hto611905d177.mtx.gz deleted file mode 100644 index cc368ff..0000000 Binary files a/test_data/temporary_hto/matrix_hto611905d177.mtx.gz and /dev/null differ