From 650c2677f56ecdf70ae8e6b0e22b98acd7e80b31 Mon Sep 17 00:00:00 2001 From: mdehoon Date: Fri, 7 Feb 2025 11:00:34 +0900 Subject: [PATCH 1/2] update (#4930) Co-authored-by: Michiel de Hoon --- Doc/Tutorial/chapter_align.rst | 2 ++ Doc/Tutorial/chapter_pairwise.rst | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/Doc/Tutorial/chapter_align.rst b/Doc/Tutorial/chapter_align.rst index 869380a36d8..efe5ec3cf03 100644 --- a/Doc/Tutorial/chapter_align.rst +++ b/Doc/Tutorial/chapter_align.rst @@ -278,6 +278,8 @@ An ``Alignment`` object created by the parser in ``Bio.Align`` may have additional attributes, depending on the alignment file format from which the alignment was read. +.. _`subsec:slicing-indexing-alignment`: + Slicing and indexing an alignment --------------------------------- diff --git a/Doc/Tutorial/chapter_pairwise.rst b/Doc/Tutorial/chapter_pairwise.rst index 227832456dc..eeed02745c7 100644 --- a/Doc/Tutorial/chapter_pairwise.rst +++ b/Doc/Tutorial/chapter_pairwise.rst @@ -91,6 +91,17 @@ alignments: query 0 G-A-T 3 +Use indices to get the aligned sequence (see :ref:`subsec:slicing-indexing-alignment`): + +.. cont-doctest + +.. code:: pycon + + >>> alignment[0] + 'GAACT' + >>> alignment[1] + 'G-A-T' + Each alignment stores the alignment score: .. cont-doctest From 6adade16fbf55debabeab77a0afa16a677126d9e Mon Sep 17 00:00:00 2001 From: mdehoon Date: Fri, 7 Feb 2025 14:46:30 +0900 Subject: [PATCH 2/2] wildcard (#4931) Co-authored-by: Michiel Jan Laurens de Hoon --- Bio/Align/__init__.py | 16 +++- Doc/Tutorial/chapter_align.rst | 4 + Tests/test_pairwise_aligner.py | 160 +++++++++++++++++++++++++++++++++ 3 files changed, 177 insertions(+), 3 deletions(-) diff --git a/Bio/Align/__init__.py b/Bio/Align/__init__.py index c7fe0b7d530..1926c2d9e3d 100644 --- a/Bio/Align/__init__.py +++ b/Bio/Align/__init__.py @@ -3723,7 +3723,7 @@ def substitutions(self): start1, start2 = end1, end2 return m - def counts(self, substitution_matrix=None, ignore_sequences=False): + def counts(self, substitution_matrix=None, wildcard=None, ignore_sequences=False): """Count the number of identities, mismatches, and gaps of an alignment. Arguments: @@ -3733,6 +3733,10 @@ def counts(self, substitution_matrix=None, ignore_sequences=False): (typically from the ``Bio.Align.substitution_matrices`` submodule) to also calculate the number of positive matches in an amino acid alignment. + - wildcard - The wildcard character. This character is + ignored in the calculation of the number of + matches, mismatches, and positives. + Default value: None. - ignore_sequences - If True, do not calculate the number of identities, positives, and mismatches, but only calculate the number of aligned sequences and number of gaps @@ -3790,6 +3794,8 @@ def counts(self, substitution_matrix=None, ignore_sequences=False): - internal_gaps - the number of gaps in the interior of the alignment; - gaps - the total number of gaps in the alignment; """ + if wildcard is not None: + wildcard = ord(wildcard) left_insertions = left_deletions = 0 right_insertions = right_deletions = 0 internal_insertions = internal_deletions = 0 @@ -3872,7 +3878,9 @@ def counts(self, substitution_matrix=None, ignore_sequences=False): for c1, c2 in zip( sequence1[start1:end1], sequence2[start2:end2] ): - if c1 == c2: + if c1 == wildcard or c2 == wildcard: + pass + elif c1 == c2: identities += 1 else: mismatches += 1 @@ -3881,7 +3889,9 @@ def counts(self, substitution_matrix=None, ignore_sequences=False): for c1, c2 in zip( sequence1[start1:end1], sequence2[start2:end2] ): - if c1 == c2: + if c1 == wildcard or c2 == wildcard: + pass + elif c1 == c2: identities += 1 else: mismatches += 1 diff --git a/Doc/Tutorial/chapter_align.rst b/Doc/Tutorial/chapter_align.rst index efe5ec3cf03..1030a52f122 100644 --- a/Doc/Tutorial/chapter_align.rst +++ b/Doc/Tutorial/chapter_align.rst @@ -615,6 +615,10 @@ as properties. >>> counts.right_deletions 2 +Use the ``wildcard`` argument to specify a letter that should be ignored when +counting identities, positives, and mismatches (e.g. ``wildcard="?"`` or +``wildcard="N"`` are common choices). + For an alignment of more than two sequences, the number of identities, mismatches, and gaps are calculated and summed for all pairs of sequences in the alignment. diff --git a/Tests/test_pairwise_aligner.py b/Tests/test_pairwise_aligner.py index 8dcd514d19d..1fcbe3bb54e 100644 --- a/Tests/test_pairwise_aligner.py +++ b/Tests/test_pairwise_aligner.py @@ -572,6 +572,14 @@ def test_needlemanwunsch_simple1(self): self.assertTrue( np.array_equal(alignment.aligned, np.array([[[0, 4]], [[0, 4]]])) ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 3) + self.assertEqual(counts.mismatches, 1) + counts = alignment.counts(wildcard="?") + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 3) + self.assertEqual(counts.mismatches, 0) alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") self.assertEqual(len(alignments), 1) alignment = alignments[0] @@ -588,6 +596,14 @@ def test_needlemanwunsch_simple1(self): self.assertTrue( np.array_equal(alignment.aligned, np.array([[[0, 4]], [[4, 0]]])) ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 3) + self.assertEqual(counts.mismatches, 1) + counts = alignment.counts(wildcard="?") + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 3) + self.assertEqual(counts.mismatches, 0) seq2 = "GAXT" aligner.wildcard = "X" score = aligner.score(seq1, seq2) @@ -610,6 +626,14 @@ def test_needlemanwunsch_simple1(self): self.assertTrue( np.array_equal(alignment.aligned, np.array([[[0, 4]], [[0, 4]]])) ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 3) + self.assertEqual(counts.mismatches, 1) + counts = alignment.counts(wildcard="X") + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 3) + self.assertEqual(counts.mismatches, 0) alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") self.assertEqual(len(alignments), 1) alignment = alignments[0] @@ -626,6 +650,14 @@ def test_needlemanwunsch_simple1(self): self.assertTrue( np.array_equal(alignment.aligned, np.array([[[0, 4]], [[4, 0]]])) ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 3) + self.assertEqual(counts.mismatches, 1) + counts = alignment.counts(wildcard="X") + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 3) + self.assertEqual(counts.mismatches, 0) aligner.wildcard = None score = aligner.score(seq1, seq2) self.assertAlmostEqual(score, 2.0) @@ -693,6 +725,14 @@ def test_needlemanwunsch_simple2(self): np.array([[[0, 2], [3, 4], [4, 5]], [[0, 2], [2, 3], [4, 5]]]), ) ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) + counts = alignment.counts(wildcard="?") + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") self.assertEqual(len(alignments), 1) alignment = alignments[0] @@ -712,6 +752,14 @@ def test_needlemanwunsch_simple2(self): np.array([[[0, 2], [3, 4], [4, 5]], [[5, 3], [3, 2], [1, 0]]]), ) ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) + counts = alignment.counts(wildcard="?") + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) seq1 = "GAXAT" seq2 = "GAAXT" aligner.wildcard = "X" @@ -738,6 +786,14 @@ def test_needlemanwunsch_simple2(self): np.array([[[0, 2], [3, 4], [4, 5]], [[0, 2], [2, 3], [4, 5]]]), ) ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) + counts = alignment.counts(wildcard="?") + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") self.assertEqual(len(alignments), 1) alignment = alignments[0] @@ -757,6 +813,14 @@ def test_needlemanwunsch_simple2(self): np.array([[[0, 2], [3, 4], [4, 5]], [[5, 3], [3, 2], [1, 0]]]), ) ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) + counts = alignment.counts(wildcard="?") + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) def test_fogsaa_simple2(self): seq1 = "GA?AT" @@ -787,6 +851,14 @@ def test_fogsaa_simple2(self): np.array([[[0, 2], [3, 4], [4, 5]], [[0, 2], [2, 3], [4, 5]]]), ) ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) + counts = alignment.counts(wildcard="?") + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") self.assertEqual(len(alignments), 1) alignment = alignments[0] @@ -806,6 +878,14 @@ def test_fogsaa_simple2(self): np.array([[[0, 2], [3, 4], [4, 5]], [[5, 3], [3, 2], [1, 0]]]), ) ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) + counts = alignment.counts(wildcard="?") + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) seq1 = "GAXAT" seq2 = "GAAXT" aligner.wildcard = "X" @@ -832,6 +912,14 @@ def test_fogsaa_simple2(self): np.array([[[0, 2], [3, 4], [4, 5]], [[0, 2], [2, 3], [4, 5]]]), ) ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) + counts = alignment.counts(wildcard="X") + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) alignments = aligner.align(seq1, reverse_complement(seq2), strand="-") self.assertEqual(len(alignments), 1) alignment = alignments[0] @@ -851,6 +939,14 @@ def test_fogsaa_simple2(self): np.array([[[0, 2], [3, 4], [4, 5]], [[5, 3], [3, 2], [1, 0]]]), ) ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) + counts = alignment.counts(wildcard="X") + self.assertEqual(counts.aligned, 4) + self.assertEqual(counts.identities, 4) + self.assertEqual(counts.mismatches, 0) class TestPairwiseOpenPenalty(unittest.TestCase): @@ -5306,6 +5402,14 @@ def test_alignment_wildcard(self): """, ) self.assertEqual(alignment.shape, (2, 17)) + counts = alignment.counts() + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 2) + counts = alignment.counts(wildcard="N") + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 1) self.assertEqual( alignment.format("psl"), """\ @@ -5336,6 +5440,14 @@ def test_alignment_wildcard(self): """, ) self.assertEqual(alignment.shape, (2, 17)) + counts = alignment.counts() + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 2) + counts = alignment.counts(wildcard="N") + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 1) self.assertEqual( alignment.format("psl"), """\ @@ -5367,6 +5479,14 @@ def test_alignment_wildcard(self): """, ) self.assertEqual(alignment.shape, (2, 17)) + counts = alignment.counts() + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 2) + counts = alignment.counts(wildcard="N") + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 1) self.assertEqual( alignment.format("psl"), """\ @@ -5398,6 +5518,14 @@ def test_alignment_wildcard(self): query 22 ACGATCGAGCNGCTACG 5 """, ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 2) + counts = alignment.counts(wildcard="N") + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 1) self.assertEqual(alignment.shape, (2, 17)) self.assertEqual( alignment.format("psl"), @@ -5432,6 +5560,14 @@ def test_alignment_wildcard(self): query 0 ------ACGATCGAGCNGCTACGCCCNC 22 """, ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 2) + counts = alignment.counts(wildcard="N") + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 1) self.assertEqual(alignment.shape, (2, 28)) self.assertEqual( alignment.format("psl"), @@ -5462,6 +5598,14 @@ def test_alignment_wildcard(self): query 22 ------ACGATCGAGCNGCTACGCCCNC 0 """, ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 2) + counts = alignment.counts(wildcard="N") + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 1) self.assertEqual(alignment.shape, (2, 28)) self.assertEqual( alignment.format("psl"), @@ -5493,6 +5637,14 @@ def test_alignment_wildcard(self): query 0 ------ACGATCGAGCNGCTACGCCCNC 22 """, ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 2) + counts = alignment.counts(wildcard="N") + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 1) self.assertEqual(alignment.shape, (2, 28)) self.assertEqual( alignment.format("psl"), @@ -5525,6 +5677,14 @@ def test_alignment_wildcard(self): query 22 ------ACGATCGAGCNGCTACGCCCNC 0 """, ) + counts = alignment.counts() + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 2) + counts = alignment.counts(wildcard="N") + self.assertEqual(counts.aligned, 17) + self.assertEqual(counts.identities, 15) + self.assertEqual(counts.mismatches, 1) self.assertEqual(alignment.shape, (2, 28)) self.assertEqual( alignment.format("psl"),