diff --git a/.github/workflows/auto_update_main.yaml b/.github/workflows/auto_update_main.yaml index 49b7207c59..279915b189 100644 --- a/.github/workflows/auto_update_main.yaml +++ b/.github/workflows/auto_update_main.yaml @@ -69,6 +69,8 @@ jobs: lxml requests typer + pooch + pillow - name: update external resources id: update_external shell: bash -l {0} diff --git a/.github/workflows/validate_resources.yaml b/.github/workflows/validate_resources.yaml index d52a11ae42..c878b5ac66 100644 --- a/.github/workflows/validate_resources.yaml +++ b/.github/workflows/validate_resources.yaml @@ -56,6 +56,8 @@ jobs: lxml packaging typer + pooch + pillow - name: update RDFs id: update_rdfs shell: bash -l {0} diff --git a/scripts/prepare_to_deploy.py b/scripts/prepare_to_deploy.py index 4ca2a89d1f..5356739821 100644 --- a/scripts/prepare_to_deploy.py +++ b/scripts/prepare_to_deploy.py @@ -4,9 +4,8 @@ from typing import Any, Dict, List import typer -from packaging.version import Version - from bioimageio.spec.shared import yaml +from packaging.version import Version from utils import iterate_known_resource_versions @@ -74,9 +73,11 @@ def main( updated_rdf_gh_pages_path.parent.mkdir(exist_ok=True, parents=True) shutil.copy(str(updated_rdf_path), str(updated_rdf_gh_pages_path)) - updated_rdf_deploy_path = dist / "rdfs" / updated_rdf_path.relative_to(static_validation_artifact_dir) - updated_rdf_deploy_path.parent.mkdir(exist_ok=True, parents=True) - shutil.move(str(updated_rdf_path), str(updated_rdf_deploy_path)) + updated_rdf_deploy_dir = (dist / "rdfs" / updated_rdf_path.relative_to(static_validation_artifact_dir)).parent + updated_rdf_deploy_dir.mkdir(exist_ok=True, parents=True) + for p in updated_rdf_path.parent.glob("*"): + # move all files (rdf.yaml + any thumbnails) + shutil.move(p, updated_rdf_deploy_dir / p.name) for krv in iterate_known_resource_versions( collection=collection, gh_pages=gh_pages, resource_id=resource_id_pattern, status="accepted" diff --git a/scripts/run_main_ci_equivalent_local.py b/scripts/run_main_ci_equivalent_local.py index 0c7fe91bd4..19b4d82ff2 100644 --- a/scripts/run_main_ci_equivalent_local.py +++ b/scripts/run_main_ci_equivalent_local.py @@ -9,10 +9,8 @@ import requests import typer - from bare_utils import GH_API_URL, GITHUB_REPOSITORY_OWNER from dynamic_validation import main as dynamic_validation_script -from generate_collection_rdf import main as generate_collection_rdf_script from prepare_to_deploy import main as prepare_to_deploy_script from static_validation import main as static_validation_script from update_external_resources import main as update_external_resources_script @@ -20,6 +18,8 @@ from update_rdfs import main as update_rdfs_script from utils import iterate_over_gh_matrix +from scripts.generate_collection_rdf_and_thumbnails import main as generate_collection_rdf_script + def download_from_gh(owner: str, repo: str, branch: str, folder: Path): r = requests.get( diff --git a/scripts/utils.py b/scripts/utils.py index eebd7e4c8a..576491bbba 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -2,6 +2,7 @@ import dataclasses import json import pathlib +import shutil import warnings from hashlib import sha256 from itertools import product @@ -10,9 +11,6 @@ import numpy import requests -from marshmallow import missing -from ruamel.yaml import YAML, comments - from bare_utils import DEPLOYED_BASE_URL, GH_API_URL from bioimageio.spec import ( load_raw_resource_description, @@ -21,6 +19,7 @@ from bioimageio.spec.collection.v0_2.raw_nodes import Collection from bioimageio.spec.collection.v0_2.utils import resolve_collection_entries from bioimageio.spec.partner.utils import enrich_partial_rdf_with_imjoy_plugin +from ruamel.yaml import YAML, comments # todo: use MyYAML from bioimageio.spec. see comment below @@ -268,7 +267,7 @@ def write_rdfs_for_resource(resource: dict, dist: Path, only_for_version_id: Opt rdf["rdf_source"] = f"{DEPLOYED_BASE_URL}/rdfs/{resource_id}/{version_id}/rdf.yaml" rdf.pop("root_path", None) - assert missing not in rdf.values(), rdf + deploy_thumbnails(rdf, dist, resource_id, version_id) # sort rdf to avoid random diffs rdf = rec_sort(rdf) @@ -385,3 +384,63 @@ def load_yaml_dict(path: Path, raise_missing_keys: Sequence[str]) -> Optional[Di raise KeyError(f"Expected missing keys {missing} in {path}") return data + + +def downsize_image(image_path: Path, dist: Path, size: Tuple[int, int]): + """downsize or copy an image""" + from PIL import Image + + output_path = dist / f"{image_path.stem}.png" + try: + with Image.open(image_path) as img: + img.thumbnail(size) + img.save(output_path, "PNG") + except Exception as e: + warnings.warn(str(e)) + output_path = output_path.with_name(image_path.name) + shutil.copy(image_path, output_path) + + return output_path + + +def deploy_thumbnails(rdf_like: Dict[str, Any], dist: Path, resource_id: str, version_id: str) -> None: + import pooch + + dist /= f"rdfs/{resource_id}/{version_id}" + dist.mkdir(exist_ok=True, parents=True) + covers: Union[Any, List[Any]] = rdf_like.get("covers") + if isinstance(covers, list): + for i, cover_url in enumerate(covers): + if not isinstance(cover_url, str) or cover_url.startswith(DEPLOYED_BASE_URL): + continue # invalid or already cached + + try: + downloaded_cover = Path(pooch.retrieve(cover_url, None)) # type: ignore + except Exception as e: + warnings.warn(str(e)) + continue + + resized_cover = downsize_image(downloaded_cover, dist, size=(600, 340)) + + rdf_like["covers"][i] = f"{DEPLOYED_BASE_URL}/rdfs/{resource_id}/{version_id}/{resized_cover.name}" + + badges: Union[Any, List[Union[Any, Dict[Any, Any]]]] = rdf_like.get("badges") + if isinstance(badges, list): + for i, badge in enumerate(badges): + if not isinstance(badge, dict): + continue + + icon = badge.get("icon") + if not isinstance(icon, str) or not icon.startswith("https://zenodo.org/api/files/"): + # only cache badges stored on zenodo + continue + + try: + downloaded_icon = Path(pooch.retrieve(icon, None, path=dist)) # type: ignore + except Exception as e: + warnings.warn(str(e)) + continue + + resized_icon = downsize_image(downloaded_icon, dist, size=(320, 320)) + + rdf_like["badges"][i]["icon"] = f"{DEPLOYED_BASE_URL}/rdfs/{resource_id}/{version_id}/{resized_icon.name}"