Skip to content

Commit

Permalink
feat(IPVC-2229): add utility to download from eutils (#11)
Browse files Browse the repository at this point in the history
  • Loading branch information
sptaylor authored Mar 20, 2024
1 parent 0c23e73 commit 3dece55
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/uta/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ class InvalidIntervalError(UTAError):
class InvalidHGVSVariantError(UTAError):
pass


class EutilsDownloadError(Exception):
pass

# <LICENSE>
# Copyright 2014 UTA Contributors (https://bitbucket.org/biocommons/uta)
##
Expand Down
35 changes: 35 additions & 0 deletions src/uta/tools/eutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from enum import Enum

import requests

from uta import EutilsDownloadError


class NcbiFileFormatEnum(str, Enum):
FASTA = "fasta"
GENBANK = "gb"


def download_from_eutils(accession: str, file_format: NcbiFileFormatEnum, output_file: str) -> None:
"""
Download a file from NCBI using the eutils endpoint.
Args:
- accession: NCBI accession ID
- file_format: File format to download ("fasta" or "gb")
- output_file: Path to the file where the downloaded content will be saved
"""

base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
params = {
"db": "nuccore",
"id": accession,
"retmode": "text",
"rettype": file_format
}
response = requests.get(base_url, params=params)

if response.status_code == 200:
with open(output_file, 'w') as file:
file.write(response.text)
else:
raise EutilsDownloadError(f"Failed to download {file_format} file for {accession}. HTTP status code: {response.status_code}")
47 changes: 47 additions & 0 deletions tests/test_uta_tools_eutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import os
import unittest
from unittest.mock import Mock, patch

from uta import EutilsDownloadError
from uta.tools.eutils import download_from_eutils, NcbiFileFormatEnum


class TestEutils(unittest.TestCase):
URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'

def setUp(self):
self.output_file = 'test_output.fa'

def tearDown(self):
if os.path.exists(self.output_file):
os.remove(self.output_file)

@patch('requests.get')
def test_successful_download(self, mock_get):
mock_response = Mock()
mock_response.status_code = 200
mock_response.text = 'file content'
mock_get.return_value = mock_response

download_from_eutils('accession', NcbiFileFormatEnum.FASTA, self.output_file)

mock_get.assert_called_once_with(
self.URL,
params={
'db': 'nuccore',
'id': 'accession',
'retmode': 'text',
'rettype': 'fasta'
}
)

with open(self.output_file, 'r') as file:
self.assertEqual(file.read(), 'file content')

@patch('requests.get')
def test_unsuccessful_download(self, mock_get):
mock_response = Mock()
mock_response.status_code = 404
mock_get.return_value = mock_response
with self.assertRaises(EutilsDownloadError):
download_from_eutils('accession', NcbiFileFormatEnum.FASTA, self.output_file)

0 comments on commit 3dece55

Please sign in to comment.