From 4e4ced7bdd909f8f45ff594522880cf0df37872c Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Wed, 19 Feb 2025 14:27:50 +0000 Subject: [PATCH] Make sure that replicates use different cells within samples --- .../decoupler/decoupler_pseudobulk.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/tertiary-analysis/decoupler/decoupler_pseudobulk.py b/tools/tertiary-analysis/decoupler/decoupler_pseudobulk.py index 6abb44ed..d48eb47a 100644 --- a/tools/tertiary-analysis/decoupler/decoupler_pseudobulk.py +++ b/tools/tertiary-analysis/decoupler/decoupler_pseudobulk.py @@ -74,12 +74,14 @@ def create_pseudo_replicates(adata, sample_key, num_replicates, seed=None): adata.obs[new_sample_key] = adata.obs[sample_key].astype(str) for sample in adata.obs[sample_key].unique(): - sample_indices = adata.obs[adata.obs[sample_key] == sample].index + sample_indices = adata.obs[ + adata.obs[sample_key] == sample].index.copy() + np.random.shuffle(sample_indices) # Shuffle the indices to randomize + replicate_size = int(len(sample_indices) / num_replicates) for i in range(num_replicates): - replicate_indices = np.random.choice( - sample_indices, size=int(len(sample_indices) / num_replicates), - replace=False - ) + start_idx = i * replicate_size + end_idx = start_idx + replicate_size + replicate_indices = sample_indices[start_idx:end_idx] adata.obs.loc[replicate_indices, new_sample_key] = ( adata.obs.loc[replicate_indices, new_sample_key] + f"_rep{i+1}" )