Skip to content

Commit

Permalink
add --force tag for biobricks install, otherwise only download outs n…
Browse files Browse the repository at this point in the history
…ot in cache
  • Loading branch information
tomlue committed Sep 14, 2024
1 parent 5b824a9 commit 14fcf38
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 15 deletions.
19 changes: 11 additions & 8 deletions biobricks/brick.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ def get_dvc_lock(self):
with open(self.path() / "dvc.lock") as f:
return yaml.safe_load(f)

def install(self):
def install(self, force_redownload=False):
"install this brick"
logger.info(f"running checks on brick")

if bblib(self.commit).exists():
if bblib(self.commit).exists() and not force_redownload:
logger.info(f"\033[91m{self.url}\033[0m already exists in BioBricks library.")
return True

Expand All @@ -133,13 +133,16 @@ def install(self):

cmd = functools.partial(run,shell=True,stdout=DEVNULL,stderr=DEVNULL)

logger.info(f"git clone {self.remote} {self._relpath()} in {bblib()}")
cmd(f"git clone {self.remote} {self._relpath()}", cwd = bblib())
cmd(f"git checkout {self.commit}", cwd = self.path())

DVCFetcher().fetch_outs(self)
if not self.path().exists() or force_redownload:
logger.info(f"git clone {self.remote} {self._relpath()} in {bblib()}")
if self.path().exists():
shutil.rmtree(self.path())
cmd(f"git clone {self.remote} {self._relpath()}", cwd = bblib())
cmd(f"git checkout {self.commit}", cwd = self.path())

DVCFetcher().fetch_outs(self, force_redownload=force_redownload)

logger.info(f"\033[94m{self.url()}\033[0m succesfully downloaded to BioBricks library.")
logger.info(f"\033[94m{self.url()}\033[0m successfully downloaded to BioBricks library.")
return self

def assets(self):
Expand Down
15 changes: 12 additions & 3 deletions biobricks/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,18 @@ def configure(bblib, token, overwrite, interactive):
click.echo(click.style(msg, fg="green"))

@cli.command(help="Install a data dependency", section=Sect.GLOBAL)
@click.argument("ref",type=str)
def install(ref):
return Brick.Resolve(ref, force_remote=True).install()
@click.argument("ref", type=str)
@click.option('--force', is_flag=True, help="Force redownload of the brick and all its assets")
def install(ref, force):
try:
brick = Brick.Resolve(ref, force_remote=True)
result = brick.install(force_redownload=force)
if result is True:
click.echo(f"Brick '{ref}' is already installed.")
else:
click.echo(f"Successfully installed brick '{ref}'.")
except Exception as e:
click.echo(f"Error occurred while installing '{ref}': {e}", err=True)

@cli.command(help="Uninstall a data dependency", section=Sect.GLOBAL)
@click.argument("ref", type=str)
Expand Down
18 changes: 15 additions & 3 deletions biobricks/dvc_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def _link_cache_to_brick(self, cache_path, brick_path):
else:
os.symlink(cache_path, brick_path)

def fetch_outs(self, brick, prefixes=['brick/']) -> tuple[list[dict], int]:
def fetch_outs(self, brick, prefixes=['brick/'], force_redownload=False) -> tuple[list[dict], int]:
dvc_lock = brick.get_dvc_lock()
stages = [stage for stage in dvc_lock.get('stages', []).values()]
all_outs = [out for stage in stages for out in stage.get('outs', [])]
Expand All @@ -177,8 +177,20 @@ def fetch_outs(self, brick, prefixes=['brick/']) -> tuple[list[dict], int]:
# download files
cache_paths = [self._remote_url_to_cache_path(url) for url in urls]
downloader = DownloadManager()
downloader.download_files(urls, cache_paths, total_size)


if force_redownload:
# If force_redownload is True, download all files
to_download = list(zip(urls, cache_paths))
else:
# Check which files need to be downloaded
to_download = [(url, cache_path) for url, cache_path in zip(urls, cache_paths) if not cache_path.exists()]

if to_download:
download_urls, download_paths = zip(*to_download) if to_download else ([], [])
downloader.download_files(download_urls, download_paths, total_size)
else:
logger.info("All files already exist in cache. No downloads needed.")

# build a symlink between each cache_path and its corresponding path
brick_paths = [brick.path() / path for path in paths]
for cache_path, brick_path in zip(cache_paths, brick_paths):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "biobricks"
version = "0.3.8"
version = "0.3.9"
authors = [
{ name="Thomas Luechtefeld", email="[email protected]" },
{ name="Jose A. Jaramillo", email="[email protected]" }
Expand Down
35 changes: 35 additions & 0 deletions tests/test_bricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,5 +99,40 @@ def test_install_without_symlink(self, mocked_can_symlink):
self.assertTrue(full_path.exists())
self.assertFalse(full_path.is_symlink(), f"{path} should not be a symlink")

def test_force_redownload_hello_brick(self):
brick = Brick.Resolve("hello-brick")

# Initial install
brick.install()
initial_mtime = (brick.path() / "brick/iris.sqlite").stat().st_mtime

# Wait a second to ensure the modification time will be different if files are redownloaded
import time
time.sleep(1)

# Regular install (should not redownload)
brick.install()
regular_mtime = (brick.path() / "brick/iris.sqlite").stat().st_mtime

# Check that the file was not redownloaded
self.assertEqual(initial_mtime, regular_mtime, "File was redownloaded when it shouldn't have been")

# Force redownload
brick.install(force_redownload=True)
force_mtime = (brick.path() / "brick/iris.sqlite").stat().st_mtime

# Check if the file was actually redownloaded (modification time should be different)
self.assertNotEqual(initial_mtime, force_mtime, "File was not redownloaded when force_redownload was True")

# Check if all expected files still exist after force redownload
paths = [
"brick/iris.sqlite",
"brick/mtcars.parquet",
"brick/rtbls/iris.parquet",
"brick/rtbls/mtcars.parquet"
]
for path in paths:
self.assertTrue((brick.path() / path).exists(), f"{path} should exist after force redownload")

if __name__ == '__main__':
unittest.main()

0 comments on commit 14fcf38

Please sign in to comment.