Skip to content

Commit

Permalink
Merge pull request #47 from ImageMarkup/isic-116-batched-image-downloads
Browse files Browse the repository at this point in the history
  • Loading branch information
danlamanna authored Apr 24, 2023
2 parents aff160d + a527e76 commit bb6b147
Showing 1 changed file with 10 additions and 15 deletions.
25 changes: 10 additions & 15 deletions isic_cli/cli/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import click
from click.types import IntRange
from humanize import intcomma
from more_itertools.more import chunked
from rich.console import Console
from rich.progress import Progress

Expand Down Expand Up @@ -98,28 +99,22 @@ def download(
archive_num_images = get_num_images(ctx.session, search, collections)
download_num_images = archive_num_images if limit == 0 else min(archive_num_images, limit)
nice_num_images = intcomma(download_num_images)

task1 = progress.add_task(
f"Downloading image information ({nice_num_images} total)",
task = progress.add_task(
f"Downloading images (and metadata) ({nice_num_images} total)",
total=download_num_images,
)
task2 = progress.add_task(
f"Downloading image files ({nice_num_images} total)", total=download_num_images
)
# the futures ThreadPoolExecutor doesn't allow one to easily Ctrl-c
thread_pool = ThreadPool(max(10, os.cpu_count() or 10))
images_iterator = itertools.islice(
get_images(ctx.session, search, collections), download_num_images
)
images = []

# See comment above _extract_metadata for why this is necessary
for image in images_iterator:
images.append(image)
progress.update(task1, advance=1)

# the futures ThreadPoolExecutor doesn't allow one to easily Ctrl-c
thread_pool = ThreadPool(max(10, os.cpu_count() or 10))
func = functools.partial(download_image, to=outdir, progress=progress, task=task2)
thread_pool.map(func, images)
images = []
func = functools.partial(download_image, to=outdir, progress=progress, task=task)
for image_chunk in chunked(images_iterator, 100):
images.extend(image_chunk)
thread_pool.map(func, image_chunk)

headers, records = _extract_metadata(images)
with (outdir / "metadata.csv").open("w", encoding="utf8") as outfile:
Expand Down

0 comments on commit bb6b147

Please sign in to comment.