Enhance scene downloading by implementing repeated attempts as for bu…

…rst downloads
AlexeyPechnikov · Jan 13, 2025 · cdda568 · cdda568
1 parent 16c8eed
commit cdda568
Showing 1 changed file with 23 additions and 6 deletions.
diff --git a/pygmtsar/pygmtsar/ASF.py b/pygmtsar/pygmtsar/ASF.py
@@ -82,7 +82,8 @@ def download(self, basedir, scenes_or_bursts, subswaths=None, polarization='VV',
             return pd.concat(results)
 
     def download_scenes(self, basedir, scenes, subswaths, polarization='VV', session=None,
-                        n_jobs=4, joblib_backend='loky', skip_exist=True, debug=False):
+                        n_jobs=4, joblib_backend='loky', skip_exist=True,
+                        retries=30, timeout_second=3, debug=False):
         """
         Downloads the specified subswaths extracted from Sentinel-1 SLC scenes.
     
@@ -122,6 +123,7 @@ def download_scenes(self, basedir, scenes, subswaths, polarization='VV', session
         import re
         import glob
         from datetime import datetime, timedelta
+        import time
         import warnings
         # supress asf_search 'UserWarning: File already exists, skipping download'
         warnings.filterwarnings("ignore", category=UserWarning)
@@ -228,11 +230,26 @@ def download_scene(scene, subswaths, polarization, basedir, session):
             print ('Note: sequential joblib processing is applied when "n_jobs" is None or "debug" is True.')
             joblib_backend = 'sequential'
 
+        def download_scene_with_retry(scene, subswaths, polarization, basedir, session, retries, timeout_second):
+            for retry in range(retries):
+                try:
+                    download_scene(scene, subswaths, polarization, basedir, session)
+                    return True
+                except Exception as e:
+                    print(f'ERROR: download attempt {retry+1} failed for {scene}: {e}')
+                    if retry + 1 == retries:
+                        return False
+                time.sleep(timeout_second)
+
         # download scenes
-        with self.tqdm_joblib(tqdm(desc='ASF Downloading Sentinel-1 SLC:', total=len(scenes_missed))) as progress_bar:
-            joblib.Parallel(n_jobs=n_jobs, backend=joblib_backend)(joblib.delayed(download_scene)\
-                                    (scene, subswaths, polarization, basedir, session) for scene in scenes_missed)
+        with self.tqdm_joblib(tqdm(desc='ASF Downloading Sentinel-1 SLC Scenes:', total=len(scenes_missed))) as progress_bar:
+            statuses = joblib.Parallel(n_jobs=n_jobs, backend=joblib_backend)(joblib.delayed(download_scene_with_retry)\
+                                    (scene, subswaths, polarization, basedir, session,
+                                    retries=retries, timeout_second=timeout_second) for scene in scenes_missed)
 
+        failed_count = statuses.count(False)
+        if failed_count > 0:
+            raise Exception(f'Scenes downloading failed for {failed_count} items.')
         # parse processed scenes and convert to dataframe
         #print ('scenes', len(scenes))
         scenes_downloaded = pd.DataFrame(scenes_missed, columns=['scene'])
@@ -621,13 +638,13 @@ def download_burst_with_retry(result, basedir, session, retries, timeout_second)
                 time.sleep(timeout_second)
 
         # download bursts
-        with self.tqdm_joblib(tqdm(desc='ASF Downloading Sentinel-1 Bursts', total=len(bursts_missed))) as progress_bar:
+        with self.tqdm_joblib(tqdm(desc='ASF Downloading Sentinel-1 SLC Bursts', total=len(bursts_missed))) as progress_bar:
             statuses = joblib.Parallel(n_jobs=n_jobs, backend=joblib_backend)(joblib.delayed(download_burst_with_retry)\
                                     (result, basedir, session, retries=retries, timeout_second=timeout_second) for result in results)
 
         failed_count = statuses.count(False)
         if failed_count > 0:
-            raise Exception(f'Bursts downloading failed for {failed_count} bursts.')
+            raise Exception(f'Bursts downloading failed for {failed_count} items.')
         # parse processed bursts and convert to dataframe
         bursts_downloaded = pd.DataFrame(bursts_missed, columns=['burst'])
         # return the results in a user-friendly dataframe