Skip to content

Commit

Permalink
v0.2.4
Browse files Browse the repository at this point in the history
  • Loading branch information
xinehc committed Dec 8, 2024
1 parent eddce72 commit bbbe15c
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 5 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# Changelog
## [0.2.4] - 2024-12-08
### Added
- Add `-g` to control the minimum number of unique marker genes (default: 1) required for a species to report its genome copies. Increase `-g` (1 -> 2) lowers recall (detection limit: 0.125 -> 0.25) but improves precision.


## [0.2.3] - 2024-12-01
### Fixed
- Fix a bug introduced in v0.2.2 causing ties not resolved properly. Results should be identical to v0.2.1.
Expand Down
2 changes: 1 addition & 1 deletion src/melon/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__version__ = '0.2.3'
__version__ = '0.2.4'

from .melon import GenomeProfiler
10 changes: 9 additions & 1 deletion src/melon/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,14 @@ def cli(argv=sys.argv):
default=0.9,
help='Min. secondary-to-primary score ratio to report secondary alignments (-p in minimap2). [0.9]')

additional.add_argument(
'-g',
metavar='INT',
type=int,
choices=range(1, 9),
default=1,
help='Min. number of unique marker genes required for a species to report its genome copies. [1]')

additional_em.add_argument(
'-a',
metavar='INT',
Expand Down Expand Up @@ -186,7 +194,7 @@ def run(opt):
GenomeProfiler(file, opt.db, opt.output, opt.threads).run(
db_kraken=opt.db_kraken, skip_profile=opt.skip_profile, skip_clean=opt.skip_clean,
max_target_seqs=opt.m, evalue=opt.e, identity=opt.i, subject_cover=opt.s,
secondary_num=opt.n, secondary_ratio=opt.p,
secondary_num=opt.n, secondary_ratio=opt.p, min_markers=opt.g,
max_iterations=opt.a, epsilon=opt.c)

if index == len(opt.FILE) - 1:
Expand Down
11 changes: 8 additions & 3 deletions src/melon/melon.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def run_em(self, max_iterations=1000, epsilon=1e-10):

def run(self, db_kraken=None, skip_profile=False, skip_clean=False,
max_target_seqs=25, evalue=1e-15, identity=0, subject_cover=75,
secondary_num=2147483647, secondary_ratio=0.9,
secondary_num=2147483647, secondary_ratio=0.9, min_markers=1,
max_iterations=1000, epsilon=1e-10):
'''
Run the pipeline.
Expand Down Expand Up @@ -326,10 +326,15 @@ def run(self, db_kraken=None, skip_profile=False, skip_clean=False,
) for kingdom, replacement in replacements.items()}

## count assigned taxonomic labels
self.hits = [[*hit, self.assignments.get(hit[0], replacements.get(hit[1]))] for hit in self.hits]
counts, total_counts, lineage2identity = defaultdict(lambda: 0), defaultdict(lambda: 0), defaultdict(list)
lineage2rpg = defaultdict(set)
for hit in self.hits:
hit.append(self.assignments.get(hit[0], replacements.get(hit[1])))
lineage2rpg[hit[-1]].add(hit[2])

counts, total_counts, lineage2identity = defaultdict(lambda: 0), defaultdict(lambda: 0), defaultdict(list)
for hit in self.hits:
if len(lineage2rpg.get(hit[-1])) < min_markers:
hit[-1] = replacements.get(hit[1])
total_counts[hit[1]] += 1
counts[(hit[-1], hit[1])] += 1
lineage2identity[hit[-1]].append(self.identities.get((hit[0], hit[-1]), (0, 0)))
Expand Down

0 comments on commit bbbe15c

Please sign in to comment.