Skip to content

Commit

Permalink
Merge pull request #95 from aertslab/dev
Browse files Browse the repository at this point in the history
top_n regions option for tfmodisco
  • Loading branch information
nkempynck authored Jan 22, 2025
2 parents d177e81 + dfcd56f commit e5ecc0c
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions src/crested/tl/modisco/_tfmodisco.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def tfmodisco(
fdr: float = 0.05,
sliding_window_size: int = 20,
flank_size: int = 5,
top_n_regions: int | None = None,
):
"""
Run tf-modisco on one-hot encoded sequences and contribution scores stored in .npz files.
Expand Down Expand Up @@ -71,6 +72,8 @@ def tfmodisco(
Sliding window size for seqlet finding in tfmodiscolite.
flank_size
Flank size of seqlets.
top_n_regions
The top n regions from the one hot encoded sequences and contribution scores to run modisco on.
See Also
--------
Expand Down Expand Up @@ -139,6 +142,12 @@ def tfmodisco(
sequences = one_hot_seqs[:, :, start:end]
attributions = contribution_scores[:, :, start:end]

if top_n_regions:
top_n = top_n_regions if top_n_regions < len(sequences) else len(sequences)
top_n = max(top_n,1) # avoid faulty inputs
sequences = sequences[:top_n]
attributions = attributions[:top_n]

sequences = sequences.transpose(0, 2, 1)
attributions = attributions.transpose(0, 2, 1)

Expand Down

0 comments on commit e5ecc0c

Please sign in to comment.