diff --git a/fgpyo/sam/__init__.py b/fgpyo/sam/__init__.py index 1d80beb..a8d465f 100644 --- a/fgpyo/sam/__init__.py +++ b/fgpyo/sam/__init__.py @@ -677,33 +677,48 @@ def from_recs( # noqa: C901 # `from_recs` is too complex (11 > 10) return PairOrientation.RF -def isize(r1: AlignedSegment, r2: Optional[AlignedSegment] = None) -> int: - """Computes the insert size for a pair of records.""" - if r2 is None: - r2_is_unmapped = r1.mate_is_unmapped - r2_reference_id = r1.next_reference_id +def isize(rec1: AlignedSegment, rec2: Optional[AlignedSegment] = None) -> int: + """Computes the insert size ("template length" or "TLEN") for a pair of records. + + Args: + rec1: The first record in the pair. + rec2: The second record in the pair. If None, then mate info on `rec1` will be used. + + """ + if rec2 is None: + rec2_is_unmapped = rec1.mate_is_unmapped + rec2_reference_id = rec1.next_reference_id else: - r2_is_unmapped = r2.is_unmapped - r2_reference_id = r2.reference_id + rec2_is_unmapped = rec2.is_unmapped + rec2_reference_id = rec2.reference_id - if r1.is_unmapped or r2_is_unmapped or r1.reference_id != r2_reference_id: + if rec1.is_unmapped or rec2_is_unmapped or rec1.reference_id != rec2_reference_id: return 0 - if r2 is None: - if not r1.has_tag("MC"): - raise ValueError('Cannot determine proper pair status without R2\'s cigar ("MC")!') - r2_cigar = Cigar.from_cigarstring(str(r1.get_tag("MC"))) - r2_is_reverse = r1.mate_is_reverse - r2_reference_start = r1.next_reference_start - r2_reference_end = r1.next_reference_start + r2_cigar.length_on_target() + if rec2 is None: + rec2_is_forward = rec1.mate_is_forward + rec2_reference_start = rec1.next_reference_start else: - r2_is_reverse = r2.is_reverse - r2_reference_start = r2.reference_start - r2_reference_end = r2.reference_end + rec2_is_forward = rec2.is_forward + rec2_reference_start = rec2.reference_start - r1_pos = r1.reference_end if r1.is_reverse else r1.reference_start - r2_pos = r2_reference_end if r2_is_reverse else r2_reference_start - return r2_pos - r1_pos + if rec1.is_forward and rec2_is_forward: + return rec2_reference_start - rec1.reference_start + if rec1.is_reverse and rec2_is_forward: + return rec2_reference_start - rec1.reference_end + + if rec2 is None: + if not rec1.has_tag("MC"): + raise ValueError('Cannot determine proper pair status without a mate cigar ("MC")!') + rec2_cigar = Cigar.from_cigarstring(str(rec1.get_tag("MC"))) + rec2_reference_end = rec1.next_reference_start + rec2_cigar.length_on_target() + else: + rec2_reference_end = rec2.reference_end + + if rec1.is_forward: + return rec2_reference_end - rec1.reference_start + else: + return rec2_reference_end - rec1.reference_end DefaultProperlyPairedOrientations: set[PairOrientation] = {PairOrientation.FR} diff --git a/tests/fgpyo/sam/test_sam.py b/tests/fgpyo/sam/test_sam.py index acbba6c..b5b6fc9 100755 --- a/tests/fgpyo/sam/test_sam.py +++ b/tests/fgpyo/sam/test_sam.py @@ -594,7 +594,9 @@ def test_isize_raises_when_r2_not_provided_and_no_mate_cigar_tag() -> None: builder = SamBuilder() r1, _ = builder.add_pair(chrom="chr1", start1=100, cigar1="115M", start2=250, cigar2="40M") r1.set_tag("MC", None) - with pytest.raises(ValueError, match="Cannot determine proper pair status without R2's cigar"): + with pytest.raises( + ValueError, match="Cannot determine proper pair status without a mate cigar" + ): sam.isize(r1) @@ -769,7 +771,7 @@ def test_set_mate_info_one_unmapped() -> None: def test_set_mate_info_both_mapped() -> None: """Test set_mate_info sets mate info for two mapped records.""" builder = SamBuilder() - r1, r2 = builder.add_pair(chrom="chr1", start1=200, start2=300) + r1, r2 = builder.add_pair(chrom="chr1", start1=200, start2=301) assert r1.is_mapped is True assert r2.is_mapped is True @@ -789,11 +791,11 @@ def test_set_mate_info_both_mapped() -> None: assert rec.is_proper_pair is True assert r1.reference_start == 200 - assert r1.next_reference_start == 300 - assert r2.reference_start == 300 + assert r1.next_reference_start == 301 + assert r2.reference_start == 301 assert r2.next_reference_start == 200 - assert r1.template_length == 200 - assert r2.template_length == -200 + assert r1.template_length == 201 + assert r2.template_length == -201 assert r1.is_forward is True assert r2.is_reverse is True assert r1.mate_is_reverse is True