Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Download all files from a Catalog at once #48 #57

Merged
merged 11 commits into from
Jun 13, 2024
61 changes: 60 additions & 1 deletion sospice/catalog/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
import matplotlib.colors as mcolors
import pandas as pd
import numpy as np

import warnings
from parfive import Downloader
from astropy.utils.data import download_file

from .release import Release

from .file_metadata import FileMetadata, required_columns


Expand Down Expand Up @@ -451,3 +453,60 @@ def plot_fov(self, ax, **kwargs):
fontsize="xx-large",
borderaxespad=2,
)

def download_files(
self,
base_dir,
base_url=None,
keep_tree=True,
downloader=None,
max_download=1000,
):
"""
Download all files from Catalog.,

Parameters
----------
base_dir: Path or str
Base directory to download file to
base_url: str
Base URL for file
keep_tree: bool
Keep tree directory structure (by level and date)
downloader: parfive.Downloader
If provided, enqueue file for download instead of downloading it.
To download enqueued files, run `downloader.download()`
max_download: int
default maximum of 1000 files can be downloaded.
User can override it by changing the value.

Return
------
parfive.Result
Download result (or None if file has only been enqueued)
"""
get_catalog = self.read_catalog()
if max_download > 1000:
warnings.warn(
"You are overriding the default max_download: This might cause performance issues.",
UserWarning,
)
do_download = False
if downloader is None:
downloader = Downloader(overwrite=False)
do_download = True

processed_downloads = get_catalog.iloc[:max_download].apply(
lambda row: FileMetadata(row).download_file(
base_dir=base_dir,
base_url=base_url,
keep_tree=keep_tree,
downloader=downloader,
),
axis=1,
)
if do_download:
result = downloader.download()
return result

return processed_downloads
32 changes: 31 additions & 1 deletion sospice/catalog/tests/test_catalog.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import pytest
from datetime import datetime
import pandas as pd

from pathlib import Path
import shutil
from parfive import Downloader

from ..catalog import Catalog

Expand All @@ -11,6 +13,11 @@ def catalog2():
return Catalog(release_tag="2.0")


@pytest.fixture
def filename(): # noqa: F811
return "solo_L2_spice-n-exp_20220305T072522_V01_100663707-014.fits"


@pytest.fixture
def catalog3():
return Catalog(release_tag="3.0")
Expand All @@ -26,6 +33,11 @@ def catalog_empty():
return Catalog()


@pytest.fixture
def max_download():
return 2


@pytest.fixture
def catalog_df():
df = pd.DataFrame(
Expand Down Expand Up @@ -189,3 +201,21 @@ def test_mid_time(self, catalog2):
).total_seconds()
< 1 # noqa: W503
)

def test_download_files(self, catalog2, max_download, filename): # noqa: F811
base_dir = Path("./local/test_download_file")
if base_dir.exists():
shutil.rmtree(base_dir)
result = catalog2.download_files(base_dir, max_download=2, keep_tree=False)
assert len(result) == max_download

if len(result) > 0:
expected_first_file_path = (base_dir / result[0].split("/")[-1]).as_posix()
assert result[0] == expected_first_file_path

downloader = Downloader(overwrite=False)
result = catalog2.download_files(
base_dir, max_download=1, downloader=downloader
)

assert downloader.queued_downloads == 1