Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pull] master from biopython:master #55

Merged
merged 2 commits into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions Bio/Align/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3723,7 +3723,7 @@ def substitutions(self):
start1, start2 = end1, end2
return m

def counts(self, substitution_matrix=None, ignore_sequences=False):
def counts(self, substitution_matrix=None, wildcard=None, ignore_sequences=False):
"""Count the number of identities, mismatches, and gaps of an alignment.

Arguments:
Expand All @@ -3733,6 +3733,10 @@ def counts(self, substitution_matrix=None, ignore_sequences=False):
(typically from the ``Bio.Align.substitution_matrices``
submodule) to also calculate the number of positive
matches in an amino acid alignment.
- wildcard - The wildcard character. This character is
ignored in the calculation of the number of
matches, mismatches, and positives.
Default value: None.
- ignore_sequences - If True, do not calculate the number of identities,
positives, and mismatches, but only calculate the
number of aligned sequences and number of gaps
Expand Down Expand Up @@ -3790,6 +3794,8 @@ def counts(self, substitution_matrix=None, ignore_sequences=False):
- internal_gaps - the number of gaps in the interior of the alignment;
- gaps - the total number of gaps in the alignment;
"""
if wildcard is not None:
wildcard = ord(wildcard)
left_insertions = left_deletions = 0
right_insertions = right_deletions = 0
internal_insertions = internal_deletions = 0
Expand Down Expand Up @@ -3872,7 +3878,9 @@ def counts(self, substitution_matrix=None, ignore_sequences=False):
for c1, c2 in zip(
sequence1[start1:end1], sequence2[start2:end2]
):
if c1 == c2:
if c1 == wildcard or c2 == wildcard:
pass
elif c1 == c2:
identities += 1
else:
mismatches += 1
Expand All @@ -3881,7 +3889,9 @@ def counts(self, substitution_matrix=None, ignore_sequences=False):
for c1, c2 in zip(
sequence1[start1:end1], sequence2[start2:end2]
):
if c1 == c2:
if c1 == wildcard or c2 == wildcard:
pass
elif c1 == c2:
identities += 1
else:
mismatches += 1
Expand Down
6 changes: 6 additions & 0 deletions Doc/Tutorial/chapter_align.rst
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,8 @@ An ``Alignment`` object created by the parser in ``Bio.Align`` may have
additional attributes, depending on the alignment file format from which
the alignment was read.

.. _`subsec:slicing-indexing-alignment`:

Slicing and indexing an alignment
---------------------------------

Expand Down Expand Up @@ -613,6 +615,10 @@ as properties.
>>> counts.right_deletions
2

Use the ``wildcard`` argument to specify a letter that should be ignored when
counting identities, positives, and mismatches (e.g. ``wildcard="?"`` or
``wildcard="N"`` are common choices).

For an alignment of more than two sequences, the number of identities,
mismatches, and gaps are calculated and summed for all pairs of sequences in
the alignment.
Expand Down
11 changes: 11 additions & 0 deletions Doc/Tutorial/chapter_pairwise.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,17 @@ alignments:
query 0 G-A-T 3
<BLANKLINE>

Use indices to get the aligned sequence (see :ref:`subsec:slicing-indexing-alignment`):

.. cont-doctest

.. code:: pycon

>>> alignment[0]
'GAACT'
>>> alignment[1]
'G-A-T'

Each alignment stores the alignment score:

.. cont-doctest
Expand Down
160 changes: 160 additions & 0 deletions Tests/test_pairwise_aligner.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,14 @@ def test_needlemanwunsch_simple1(self):
self.assertTrue(
np.array_equal(alignment.aligned, np.array([[[0, 4]], [[0, 4]]]))
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 3)
self.assertEqual(counts.mismatches, 1)
counts = alignment.counts(wildcard="?")
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 3)
self.assertEqual(counts.mismatches, 0)
alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
self.assertEqual(len(alignments), 1)
alignment = alignments[0]
Expand All @@ -588,6 +596,14 @@ def test_needlemanwunsch_simple1(self):
self.assertTrue(
np.array_equal(alignment.aligned, np.array([[[0, 4]], [[4, 0]]]))
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 3)
self.assertEqual(counts.mismatches, 1)
counts = alignment.counts(wildcard="?")
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 3)
self.assertEqual(counts.mismatches, 0)
seq2 = "GAXT"
aligner.wildcard = "X"
score = aligner.score(seq1, seq2)
Expand All @@ -610,6 +626,14 @@ def test_needlemanwunsch_simple1(self):
self.assertTrue(
np.array_equal(alignment.aligned, np.array([[[0, 4]], [[0, 4]]]))
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 3)
self.assertEqual(counts.mismatches, 1)
counts = alignment.counts(wildcard="X")
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 3)
self.assertEqual(counts.mismatches, 0)
alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
self.assertEqual(len(alignments), 1)
alignment = alignments[0]
Expand All @@ -626,6 +650,14 @@ def test_needlemanwunsch_simple1(self):
self.assertTrue(
np.array_equal(alignment.aligned, np.array([[[0, 4]], [[4, 0]]]))
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 3)
self.assertEqual(counts.mismatches, 1)
counts = alignment.counts(wildcard="X")
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 3)
self.assertEqual(counts.mismatches, 0)
aligner.wildcard = None
score = aligner.score(seq1, seq2)
self.assertAlmostEqual(score, 2.0)
Expand Down Expand Up @@ -693,6 +725,14 @@ def test_needlemanwunsch_simple2(self):
np.array([[[0, 2], [3, 4], [4, 5]], [[0, 2], [2, 3], [4, 5]]]),
)
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
counts = alignment.counts(wildcard="?")
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
self.assertEqual(len(alignments), 1)
alignment = alignments[0]
Expand All @@ -712,6 +752,14 @@ def test_needlemanwunsch_simple2(self):
np.array([[[0, 2], [3, 4], [4, 5]], [[5, 3], [3, 2], [1, 0]]]),
)
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
counts = alignment.counts(wildcard="?")
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
seq1 = "GAXAT"
seq2 = "GAAXT"
aligner.wildcard = "X"
Expand All @@ -738,6 +786,14 @@ def test_needlemanwunsch_simple2(self):
np.array([[[0, 2], [3, 4], [4, 5]], [[0, 2], [2, 3], [4, 5]]]),
)
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
counts = alignment.counts(wildcard="?")
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
self.assertEqual(len(alignments), 1)
alignment = alignments[0]
Expand All @@ -757,6 +813,14 @@ def test_needlemanwunsch_simple2(self):
np.array([[[0, 2], [3, 4], [4, 5]], [[5, 3], [3, 2], [1, 0]]]),
)
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
counts = alignment.counts(wildcard="?")
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)

def test_fogsaa_simple2(self):
seq1 = "GA?AT"
Expand Down Expand Up @@ -787,6 +851,14 @@ def test_fogsaa_simple2(self):
np.array([[[0, 2], [3, 4], [4, 5]], [[0, 2], [2, 3], [4, 5]]]),
)
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
counts = alignment.counts(wildcard="?")
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
self.assertEqual(len(alignments), 1)
alignment = alignments[0]
Expand All @@ -806,6 +878,14 @@ def test_fogsaa_simple2(self):
np.array([[[0, 2], [3, 4], [4, 5]], [[5, 3], [3, 2], [1, 0]]]),
)
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
counts = alignment.counts(wildcard="?")
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
seq1 = "GAXAT"
seq2 = "GAAXT"
aligner.wildcard = "X"
Expand All @@ -832,6 +912,14 @@ def test_fogsaa_simple2(self):
np.array([[[0, 2], [3, 4], [4, 5]], [[0, 2], [2, 3], [4, 5]]]),
)
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
counts = alignment.counts(wildcard="X")
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
alignments = aligner.align(seq1, reverse_complement(seq2), strand="-")
self.assertEqual(len(alignments), 1)
alignment = alignments[0]
Expand All @@ -851,6 +939,14 @@ def test_fogsaa_simple2(self):
np.array([[[0, 2], [3, 4], [4, 5]], [[5, 3], [3, 2], [1, 0]]]),
)
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)
counts = alignment.counts(wildcard="X")
self.assertEqual(counts.aligned, 4)
self.assertEqual(counts.identities, 4)
self.assertEqual(counts.mismatches, 0)


class TestPairwiseOpenPenalty(unittest.TestCase):
Expand Down Expand Up @@ -5306,6 +5402,14 @@ def test_alignment_wildcard(self):
""",
)
self.assertEqual(alignment.shape, (2, 17))
counts = alignment.counts()
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 2)
counts = alignment.counts(wildcard="N")
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 1)
self.assertEqual(
alignment.format("psl"),
"""\
Expand Down Expand Up @@ -5336,6 +5440,14 @@ def test_alignment_wildcard(self):
""",
)
self.assertEqual(alignment.shape, (2, 17))
counts = alignment.counts()
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 2)
counts = alignment.counts(wildcard="N")
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 1)
self.assertEqual(
alignment.format("psl"),
"""\
Expand Down Expand Up @@ -5367,6 +5479,14 @@ def test_alignment_wildcard(self):
""",
)
self.assertEqual(alignment.shape, (2, 17))
counts = alignment.counts()
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 2)
counts = alignment.counts(wildcard="N")
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 1)
self.assertEqual(
alignment.format("psl"),
"""\
Expand Down Expand Up @@ -5398,6 +5518,14 @@ def test_alignment_wildcard(self):
query 22 ACGATCGAGCNGCTACG 5
""",
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 2)
counts = alignment.counts(wildcard="N")
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 1)
self.assertEqual(alignment.shape, (2, 17))
self.assertEqual(
alignment.format("psl"),
Expand Down Expand Up @@ -5432,6 +5560,14 @@ def test_alignment_wildcard(self):
query 0 ------ACGATCGAGCNGCTACGCCCNC 22
""",
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 2)
counts = alignment.counts(wildcard="N")
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 1)
self.assertEqual(alignment.shape, (2, 28))
self.assertEqual(
alignment.format("psl"),
Expand Down Expand Up @@ -5462,6 +5598,14 @@ def test_alignment_wildcard(self):
query 22 ------ACGATCGAGCNGCTACGCCCNC 0
""",
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 2)
counts = alignment.counts(wildcard="N")
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 1)
self.assertEqual(alignment.shape, (2, 28))
self.assertEqual(
alignment.format("psl"),
Expand Down Expand Up @@ -5493,6 +5637,14 @@ def test_alignment_wildcard(self):
query 0 ------ACGATCGAGCNGCTACGCCCNC 22
""",
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 2)
counts = alignment.counts(wildcard="N")
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 1)
self.assertEqual(alignment.shape, (2, 28))
self.assertEqual(
alignment.format("psl"),
Expand Down Expand Up @@ -5525,6 +5677,14 @@ def test_alignment_wildcard(self):
query 22 ------ACGATCGAGCNGCTACGCCCNC 0
""",
)
counts = alignment.counts()
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 2)
counts = alignment.counts(wildcard="N")
self.assertEqual(counts.aligned, 17)
self.assertEqual(counts.identities, 15)
self.assertEqual(counts.mismatches, 1)
self.assertEqual(alignment.shape, (2, 28))
self.assertEqual(
alignment.format("psl"),
Expand Down