Skip to content

Commit

Permalink
Enhance scene downloading by implementing repeated attempts as for bu…
Browse files Browse the repository at this point in the history
…rst downloads
  • Loading branch information
Alexey Pechnikov committed Jan 13, 2025
1 parent 16c8eed commit cdda568
Showing 1 changed file with 23 additions and 6 deletions.
29 changes: 23 additions & 6 deletions pygmtsar/pygmtsar/ASF.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def download(self, basedir, scenes_or_bursts, subswaths=None, polarization='VV',
return pd.concat(results)

def download_scenes(self, basedir, scenes, subswaths, polarization='VV', session=None,
n_jobs=4, joblib_backend='loky', skip_exist=True, debug=False):
n_jobs=4, joblib_backend='loky', skip_exist=True,
retries=30, timeout_second=3, debug=False):
"""
Downloads the specified subswaths extracted from Sentinel-1 SLC scenes.
Expand Down Expand Up @@ -122,6 +123,7 @@ def download_scenes(self, basedir, scenes, subswaths, polarization='VV', session
import re
import glob
from datetime import datetime, timedelta
import time
import warnings
# supress asf_search 'UserWarning: File already exists, skipping download'
warnings.filterwarnings("ignore", category=UserWarning)
Expand Down Expand Up @@ -228,11 +230,26 @@ def download_scene(scene, subswaths, polarization, basedir, session):
print ('Note: sequential joblib processing is applied when "n_jobs" is None or "debug" is True.')
joblib_backend = 'sequential'

def download_scene_with_retry(scene, subswaths, polarization, basedir, session, retries, timeout_second):
for retry in range(retries):
try:
download_scene(scene, subswaths, polarization, basedir, session)
return True
except Exception as e:
print(f'ERROR: download attempt {retry+1} failed for {scene}: {e}')
if retry + 1 == retries:
return False
time.sleep(timeout_second)

# download scenes
with self.tqdm_joblib(tqdm(desc='ASF Downloading Sentinel-1 SLC:', total=len(scenes_missed))) as progress_bar:
joblib.Parallel(n_jobs=n_jobs, backend=joblib_backend)(joblib.delayed(download_scene)\
(scene, subswaths, polarization, basedir, session) for scene in scenes_missed)
with self.tqdm_joblib(tqdm(desc='ASF Downloading Sentinel-1 SLC Scenes:', total=len(scenes_missed))) as progress_bar:
statuses = joblib.Parallel(n_jobs=n_jobs, backend=joblib_backend)(joblib.delayed(download_scene_with_retry)\
(scene, subswaths, polarization, basedir, session,
retries=retries, timeout_second=timeout_second) for scene in scenes_missed)

failed_count = statuses.count(False)
if failed_count > 0:
raise Exception(f'Scenes downloading failed for {failed_count} items.')
# parse processed scenes and convert to dataframe
#print ('scenes', len(scenes))
scenes_downloaded = pd.DataFrame(scenes_missed, columns=['scene'])
Expand Down Expand Up @@ -621,13 +638,13 @@ def download_burst_with_retry(result, basedir, session, retries, timeout_second)
time.sleep(timeout_second)

# download bursts
with self.tqdm_joblib(tqdm(desc='ASF Downloading Sentinel-1 Bursts', total=len(bursts_missed))) as progress_bar:
with self.tqdm_joblib(tqdm(desc='ASF Downloading Sentinel-1 SLC Bursts', total=len(bursts_missed))) as progress_bar:
statuses = joblib.Parallel(n_jobs=n_jobs, backend=joblib_backend)(joblib.delayed(download_burst_with_retry)\
(result, basedir, session, retries=retries, timeout_second=timeout_second) for result in results)

failed_count = statuses.count(False)
if failed_count > 0:
raise Exception(f'Bursts downloading failed for {failed_count} bursts.')
raise Exception(f'Bursts downloading failed for {failed_count} items.')
# parse processed bursts and convert to dataframe
bursts_downloaded = pd.DataFrame(bursts_missed, columns=['burst'])
# return the results in a user-friendly dataframe
Expand Down

0 comments on commit cdda568

Please sign in to comment.