From 133c9fb2f27ce364634d2df6c817d7e67b0417ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20W=C3=B6rpel?= Date: Wed, 16 Oct 2024 14:28:17 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20(sync/aleph)=20Cache=20par?= =?UTF-8?q?ent=20folders?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- leakrfc/sync/aleph.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/leakrfc/sync/aleph.py b/leakrfc/sync/aleph.py index 446355d..6eb592b 100644 --- a/leakrfc/sync/aleph.py +++ b/leakrfc/sync/aleph.py @@ -16,14 +16,28 @@ from leakrfc.worker import DatasetWorker -def get_upload_cache_key(self: "AlephUploadWorker", file: OriginalFile) -> str: +def _make_cache_key(self: "AlephUploadWorker", *parts: str) -> str: host = urlparse(self.host).netloc - return f"aleph/upload/{host}/{self.dataset.name}/{file.key}" + base = f"aleph/upload/{host}/{self.dataset.name}/" + return base + "/".join(parts) + + +def get_upload_cache_key(self: "AlephUploadWorker", file: OriginalFile) -> str: + return _make_cache_key(self, file.key) + + +def get_parent_cache_key( + self: "AlephUploadWorker", key: str, prefix: str | None = None +) -> str: + parts = [key] + if prefix: + parts += prefix + return _make_cache_key(self, *parts) class AlephUploadWorker(DatasetWorker): """ - Sync leakrfc to an Aleph instance + Sync leakrfc dataset to an Aleph instance """ def __init__( @@ -46,6 +60,7 @@ def __init__( self.prefix = prefix self.consumer_threads = min(10, self.consumer_threads) # urllib connection pool + @anycache(key_func=get_parent_cache_key) def get_parent(self, key: str, prefix: str | None = None) -> dict[str, str] | None: with self.lock: p = Path(key)