Skip to content

Commit

Permalink
Merge pull request #35 from rajewsky-lab/dev
Browse files Browse the repository at this point in the history
v0.5.5
  • Loading branch information
sztankatt authored Jun 30, 2022
2 parents e1b9c37 + f4f3066 commit ff9060f
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 55 deletions.
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

project = 'spacemake'
copyright = '2021, Rajewsky lab'
author = 'Tamas Ryszard Sztanka-Toth, Nikolaos Karaiskos, Marvin Jens, Nikolaus Rajewsky'
author = 'Tamas Ryszard Sztanka-Toth, Marvin Jens, Nikos Karaiskos, Nikolaus Rajewsky'

version = '0.5.4'
version = '0.5.5'
release = version

# -- General configuration
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ snakemake>=5.32.0,<6.5.0
pysam
pandas
scanpy
leidenalg
numpy
more-itertools
biopython
Expand All @@ -19,3 +18,4 @@ nbsphinx>=0.4
spacemake
novosparc
ipython
importlib-metadata
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[metadata]
name = spacemake
version = 0.5.4
author = Nikolaos Karaiskos Tamas Sztanka-Toth Marvin Jens
version = 0.5.5
author = Tamas Ryszard Sztanka-Toth, Marvin Jens, Nikos Karaiskos, Nikolaus Rajewsky
author_email = [email protected]
description = A bioinformatic pipeline for the analysis of spatial transcriptomic data
long_description = file: README.md
Expand Down
1 change: 0 additions & 1 deletion spacemake/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

logger_name = "spacemake.config"


def get_puck_parser(required=True):
parser = argparse.ArgumentParser(allow_abbrev=False, add_help=False)
parser.add_argument("--name", help="name of the puck", type=str, required=True)
Expand Down
84 changes: 53 additions & 31 deletions spacemake/project_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ class ProjectDF:

# default values of the project dataframe columns
project_df_default_values = {
"puck_barcode_file_id": "no_spatial_data",
"puck_barcode_file_id": ["no_spatial_data"],
"sample_sheet": None,
"species": None,
"demux_barcode_mismatch": 1,
Expand All @@ -597,6 +597,29 @@ class ProjectDF:
"dge": None,
}

project_df_dtypes = {
"puck_barcode_file_id": "object",
"sample_sheet": "str",
"species": "str",
"demux_barcode_mismatch": "int64",
"demux_dir": "str",
"basecalls_dir": "str",
"R1": "object",
"R2": "object",
"longreads": "str",
"longread_signature": "str",
"investigator": "str",
"sequencing_date": "str",
"experiment": "str",
"puck_barcode_file": "object",
"run_mode": "object",
"barcode_flavor": "str",
"is_merged": "bool",
"merged_from": "object",
"puck": "str",
"dge": "str",
}

def __init__(self, file_path, config: ConfigFile = None):
"""__init__.
Expand All @@ -617,6 +640,7 @@ def __init__(self, file_path, config: ConfigFile = None):
file_path,
index_col=["project_id", "sample_id"],
na_values=["None", "none"],
dtype = self.project_df_dtypes
)
failed=True
except pd.errors.EmptyDataError as e:
Expand All @@ -630,25 +654,25 @@ def __init__(self, file_path, config: ConfigFile = None):
failed=True

if self.df.empty:
index = pd.MultiIndex(
names=["project_id", "sample_id"], levels=[[], []], codes=[[], []]
)
self.df = pd.DataFrame(
columns=self.project_df_default_values.keys(), index=index
)
self.create_empty_df()
else:
# 'fix' the dataframe if there are inconsistencies
self.fix()
else:
index = pd.MultiIndex(
names=["project_id", "sample_id"], levels=[[], []], codes=[[], []]
)
self.df = pd.DataFrame(
columns=self.project_df_default_values.keys(), index=index
)
self.create_empty_df()

self.logger = logging.getLogger(logger_name)

def create_empty_df(self):
index = pd.MultiIndex(
names=["project_id", "sample_id"], levels=[[], []], codes=[[], []]
)
self.df = pd.DataFrame(
self.project_df_default_values, index=index
)

self.df = self.df.astype(self.project_df_dtypes)

def compute_max_barcode_mismatch(self, indices: List[str]) -> int:
"""compute_max_barcode_mismatch.
Expand Down Expand Up @@ -852,8 +876,11 @@ def is_spatial(
return False

def fix(self):
# replacing NaN with None
import numpy as np
# convert types
self.df = self.df.where(pd.notnull(self.df), None)
self.df = self.df.replace({np.nan: None})
# replacing NaN with None

# rename puck_id to puck_barcode_file_id, for backward
# compatibility
Expand Down Expand Up @@ -894,13 +921,14 @@ def fix(self):
# the name of the puck, and the puck_barcode_file was set to None.
# Here we populate the puck_barcode_file into the path to the actual
# file so that no errors are caused downstream.
if row['puck_barcode_file'] is None:
if (row['puck_barcode_file'] is None and
row['puck_barcode_file_id'] is not None):
if len(row['puck_barcode_file_id']) > 1:
raise SpacemakeError('When no barcode file provided, there ' +
'only should be one id available')

pbf_id = row['puck_barcode_file_id'][0]
if pbf_id != self.project_df_default_values['puck_barcode_file_id']:
if pbf_id not in self.project_df_default_values['puck_barcode_file_id']:
puck = self.config.get_puck(pbf_id)

row['puck_barcode_file'] = [puck.variables['barcodes']]
Expand All @@ -918,7 +946,7 @@ def get_puck_barcode_file(
sample_id: str,
puck_barcode_file_id: str
) -> str:
if (puck_barcode_file_id == self.project_df_default_values['puck_barcode_file_id']):
if (puck_barcode_file_id in self.project_df_default_values['puck_barcode_file_id']):
# if sample is not spatial, or we request the non-spatial puck
return None
else:
Expand Down Expand Up @@ -947,18 +975,14 @@ def get_puck_barcode_ids_and_files(self,
project_id = project_id,
sample_id = sample_id)

puck_barcode_files = [self.get_puck_barcode_file(
project_id = project_id,
sample_id = sample_id,
puck_barcode_file_id = pbf_id)
for pbf_id in puck_barcode_file_ids]
puck_barcode_files = self.get_metadata('puck_barcode_file')

out_puck_barcode_files = []
out_puck_barcode_file_ids = []

# return only id-file pairs, for which file is not none
for pbf_id, pbf in zip(puck_barcode_file_ids, puck_barcode_files):
if pbf is not None:
if puck_barcode_files is not None:
for pbf_id, pbf in zip(puck_barcode_file_ids, puck_barcode_files):
out_puck_barcode_files.append(pbf)
out_puck_barcode_file_ids.append(pbf_id)

Expand All @@ -973,15 +997,15 @@ def get_matching_puck_barcode_file_ids(self,
sample_id = sample_id)

if not os.path.isfile(summary_file):
return [self.project_df_default_values['puck_barcode_file_id']]
return self.project_df_default_values['puck_barcode_file_id']

df = pd.read_csv(summary_file)

df = df.loc[(df.n_matching > 500) & (df.matching_ratio > 0.1)]

pdf_ids = df.puck_barcode_file_id.to_list()

pdf_ids.append(self.project_df_default_values['puck_barcode_file_id'])
pdf_ids.append(self.project_df_default_values['puck_barcode_file_id'][0])

return pdf_ids

Expand Down Expand Up @@ -1298,7 +1322,6 @@ def add_update_sample(
puck_barcode_file = [puck_barcode_file]

# if there are duplicates, raise error
print(puck_barcode_file)
if len(puck_barcode_file) != len(set(puck_barcode_file)):
raise SpacemakeError('Duplicate files provided for '
+ '--puck_barcode_file. \n'
Expand Down Expand Up @@ -1343,10 +1366,7 @@ def add_update_sample(
kwargs['puck_barcode_file'] = puck_barcode_file

else:
# put the default value in a list
kwargs['puck_barcode_file_id'] = [
self.project_df_default_values['puck_barcode_file_id']
]
# if no puck barcode files are provided, we check if the puck has barcodes
puck_name = kwargs.get('puck', None)

if puck_name is not None:
Expand All @@ -1365,7 +1385,9 @@ def add_update_sample(
new_project = pd.Series(self.project_df_default_values)
new_project.name = ix
new_project.update(kwargs)
#before addition

#after addition
self.df = pd.concat([self.df, pd.DataFrame(new_project).T], axis=0)

if return_series:
Expand Down
7 changes: 4 additions & 3 deletions spacemake/smk.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import scanpy as sc
import pandas as pd
import anndata
import importlib.metadata

from shutil import copyfile
from spacemake.project_df import ProjectDF, get_project_sample_parser
Expand Down Expand Up @@ -575,12 +574,15 @@ def spacemake_run(pdf, args):
parser_spatial = setup_spatial_parser(spmk, parser_main_subparsers)

def cmdline():
import importlib.metadata
"""cmdline."""
args = parser_main.parse_args()

if args.version:
if args.version and args.subcommand is None:
print(importlib.metadata.version('spacemake'))
return 0
else:
del args.version

parser_dict = {
"init": parser_init,
Expand All @@ -590,7 +592,6 @@ def cmdline():
"main": parser_main,
"spatial": parser_spatial
}

# get the function to be run
if "func" in args:
func = args.func
Expand Down
23 changes: 8 additions & 15 deletions spacemake/snakemake/scripts/snakemake_helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,32 +47,25 @@ def get_output_files(
# and sample is external, skip
continue


# get puck files
puck_barcode_file_ids = row['puck_barcode_file_id']


non_spatial_pbf_id = project_df.project_df_default_values[
'puck_barcode_file_id']

if non_spatial_pbf_id not in puck_barcode_file_ids:
puck_barcode_file_ids.append(non_spatial_pbf_id)

if puck_barcode_file_matching_type == 'none':
# reset to empty string
puck_barcode_file_ids = []
elif puck_barcode_file_matching_type == 'all':
# add non-spatial as well to every sample
pass
elif puck_barcode_file_matching_type == 'spatial':
# get only spatial
puck_barcode_file_ids = project_df.get_puck_barcode_file_ids_and_files(
puck_barcode_file_ids = project_df.get_puck_barcode_ids_and_files(
project_id, sample_id)[0]
elif puck_barcode_file_matching_type == 'spatial_matching':
puck_barcode_file_ids = project_df.get_matching_puck_barcode_file_ids(
project_id = project_id,
sample_id = sample_id)

# add the non spatial barcode by default
non_spatial_pbf_id = project_df.project_df_default_values[
'puck_barcode_file_id'][0]

if non_spatial_pbf_id not in puck_barcode_file_ids:
puck_barcode_file_ids.append(non_spatial_pbf_id)

for run_mode in row["run_mode"]:
run_mode_variables = project_df.config.get_run_mode(run_mode).variables

Expand Down
5 changes: 5 additions & 0 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ spacemake projects add_sample --project_id test \
--puck visium \
--puck_barcode_file spacemake/data/test/test_bc1.csv spacemake/data/test/test_bc2.csv

# update sample
spacemake projects update_sample --project_id test \
--sample_id two_bc_files \
--investigator Test

spacemake projects merge_samples --merged_project_id test \
--merged_sample_id test_merged \
--project_id_list test \
Expand Down

0 comments on commit ff9060f

Please sign in to comment.