Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pull] master from biopython:master #54

Merged
merged 1 commit into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions Bio/Align/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3723,7 +3723,7 @@ def substitutions(self):
start1, start2 = end1, end2
return m

def counts(self, substitution_matrix=None, gaps_only=False):
def counts(self, substitution_matrix=None, ignore_sequences=False):
"""Count the number of identities, mismatches, and gaps of an alignment.

Arguments:
Expand All @@ -3733,12 +3733,13 @@ def counts(self, substitution_matrix=None, gaps_only=False):
(typically from the ``Bio.Align.substitution_matrices``
submodule) to also calculate the number of positive
matches in an amino acid alignment.
- gaps_only - If True, do not calculate the number of identities,
- ignore_sequences - If True, do not calculate the number of identities,
positives, and mismatches, but only calculate the
number of gaps. This will speed up the calculation.
number of aligned sequences and number of gaps
to speed up the calculation.
Default value: False.

A ValueError is raised if gaps_only is True and substitution_matrix is not None.
A ValueError is raised if ignore_sequences is True and substitution_matrix is not None.

>>> aligner = PairwiseAligner(mode='global', match_score=2, mismatch_score=-1)
>>> for alignment in aligner.align("TACCG", "ACG"):
Expand Down Expand Up @@ -3793,16 +3794,18 @@ def counts(self, substitution_matrix=None, gaps_only=False):
right_insertions = right_deletions = 0
internal_insertions = internal_deletions = 0
aligned = 0
if gaps_only:
if ignore_sequences:
identities = None
mismatches = None
else:
identities = 0
mismatches = 0
if substitution_matrix is None:
positives = None
elif gaps_only:
raise ValueError("gaps_only cannot be True if substitution_matrix is used")
elif ignore_sequences:
raise ValueError(
"ignore_sequences cannot be True if substitution_matrix is used"
)
else:
positives = 0
sequences = [None] * len(self.sequences)
Expand All @@ -3813,7 +3816,7 @@ def counts(self, substitution_matrix=None, gaps_only=False):
for i, sequence in enumerate(self.sequences):
start = min(coordinates[i, :])
end = max(coordinates[i, :])
if not gaps_only:
if not ignore_sequences:
try:
sequence = sequence[start:end]
except ValueError:
Expand All @@ -3823,10 +3826,10 @@ def counts(self, substitution_matrix=None, gaps_only=False):
if sum(aligned_steps > 0) > sum(aligned_steps < 0):
coordinates[i, :] = coordinates[i, :] - start
else:
if not gaps_only:
if not ignore_sequences:
sequence = reverse_complement(sequence)
coordinates[i, :] = end - coordinates[i, :]
if gaps_only:
if ignore_sequences:
sequences[i] = None
else:
try:
Expand Down
27 changes: 24 additions & 3 deletions Doc/Tutorial/chapter_align.rst
Original file line number Diff line number Diff line change
Expand Up @@ -581,9 +581,10 @@ alignment are indicated by -1:
Counting identities, mismatches, and gaps
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The ``counts`` method counts the number of identities, mismatches, and gaps
(insertions and deletions) of an alignment. The return value is an
``AlignmentCounts`` object, from which the counts can be obtained as properties.
The ``counts`` method counts the number of identities, mismatches, aligned
letters, and agaps (insertions and deletions) of an alignment. The return
value is an ``AlignmentCounts`` object, from which the counts can be obtained
as properties.

.. cont-doctest

Expand Down Expand Up @@ -656,6 +657,26 @@ number of gaps (= insertions + deletions):
>>> counts.internal_gaps
2

To speed up the calculation, you can use ``ignore_sequences=True`` to skip
counting the number of matches and mismatches (this will still calculate the
number of aligned sequences):

.. cont-doctest

.. code:: pycon

>>> counts = alignment.counts(ignore_sequences=True)
>>> counts.aligned
16
>>> print(counts.identities)
None
>>> print(counts.mismatches)
None
>>> counts.insertions
1
>>> counts.deletions
5

For protein alignments, in addition to the number of identities and mismatches,
you can also count the number of positive matches by supplying a substitution
matrix (see Chapter :ref:`sec:substitution_matrices`):
Expand Down
6 changes: 4 additions & 2 deletions Tests/test_Align_Alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2376,13 +2376,15 @@ def test_counts(self):
str(counts),
"AlignmentCounts(left_insertions=0, left_deletions=0, internal_insertions=0, internal_deletions=0, right_insertions=80, right_deletions=4, aligned=3084, identities=3020, mismatches=64, positives=None)",
)
counts = alignment.counts(gaps_only=True)
counts = alignment.counts(ignore_sequences=True)
self.assertEqual(
str(counts),
"AlignmentCounts(left_insertions=0, left_deletions=0, internal_insertions=0, internal_deletions=0, right_insertions=80, right_deletions=4, aligned=3084, identities=None, mismatches=None, positives=None)",
)
with self.assertRaises(ValueError):
alignment.counts(substitution_matrix=substitution_matrix, gaps_only=True)
alignment.counts(
substitution_matrix=substitution_matrix, ignore_sequences=True
)
for i, sequence in enumerate(alignment.sequences):
length = len(sequence)
alignment.sequences[i] = Seq(None, length)
Expand Down