Skip to content

Commit

Permalink
Add tabix indexing of gzipped VCFs in VcfBuilder
Browse files Browse the repository at this point in the history
  • Loading branch information
tfenne committed Jan 17, 2025
1 parent b0b4227 commit 24122f4
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
6 changes: 5 additions & 1 deletion fgpyo/vcf/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from typing import Tuple
from typing import Union

import pysam
from pysam import VariantHeader
from pysam import VariantRecord

Expand Down Expand Up @@ -279,6 +280,9 @@ def to_path(self, path: Optional[Path] = None) -> Path:
for variant in self.to_sorted_list():
writer.write(variant)

if str(path.suffix) == ".gz":
pysam.tabix_index(str(path), preset="vcf")

return path

@staticmethod
Expand All @@ -291,7 +295,7 @@ def _to_vcf_path(path: Optional[Path]) -> Path:
path: optionally the path to the VCF, or a directory to create a temporary VCF.
"""
if path is None:
with NamedTemporaryFile(suffix=".vcf", delete=False) as fp:
with NamedTemporaryFile(suffix=".vcf.gz", delete=False) as fp:

Check warning on line 298 in fgpyo/vcf/builder.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/vcf/builder.py#L298

Added line #L298 was not covered by tests
path = Path(fp.name)
assert path.is_file()
return path
Expand Down
20 changes: 20 additions & 0 deletions tests/fgpyo/vcf/test_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import pysam
import pytest
from pysam.libcbcf import VariantFile

from fgpyo.vcf import reader as vcf_reader
from fgpyo.vcf.builder import VariantBuilder
Expand Down Expand Up @@ -251,6 +252,25 @@ def test_zero_sample_vcf_round_trip(
_assert_equal(expected_value=builder_record, actual_value=vcf_record)


def test_indexing_gzipped_vcf(temp_path: Path) -> None:
vcf = temp_path / "test.vcf.gz"
builder = VariantBuilder()
_add_headers(builder)
builder.add(contig="chr1", pos=1000, ref="A", alts="G")
builder.add(contig="chr1", pos=2000, ref="A", alts="G")
builder.add(contig="chr1", pos=3000, ref="A", alts="G")
builder.add(contig="chr2", pos=1000, ref="A", alts="G")
builder.to_path(vcf)

reader: VariantFile
with vcf_reader(vcf) as reader:
assert len(list(reader.fetch(contig="chr1", start=900, end=1100))) == 1
assert len(list(reader.fetch(contig="chr1", start=900, end=2100))) == 2
assert len(list(reader.fetch(contig="chr1", start=900, end=3100))) == 3
assert len(list(reader.fetch(contig="chr2", start=900, end=1100))) == 1
assert len(list(reader.fetch(contig="chr2", start=5000, end=6000))) == 0


def _add_random_genotypes(
random_generator: random.Random,
record_input: Mapping[str, Any],
Expand Down

0 comments on commit 24122f4

Please sign in to comment.