Skip to content

Commit

Permalink
add in shell and python scripts to run automatically to update s3 bucket
Browse files Browse the repository at this point in the history
  • Loading branch information
jacquelynsmale committed Nov 21, 2023
1 parent 983d3d3 commit baacff9
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 0 deletions.
77 changes: 77 additions & 0 deletions image_services/opera/update_opera_uris.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import argparse
import csv
import os
import requests
import boto3
from pathlib import Path
import logging
import json


S3_CLIENT = boto3.client('s3')
log = logging.getLogger(__name__)


def query_cmr(params):
session = requests.Session()
search_url = 'https://cmr.earthdata.nasa.gov/search/granules.umm_json'
headers = {}
vsis3_uris = []
while True:
response = session.get(search_url, params=params, headers=headers)
response.raise_for_status()
for granule in response.json()['items']:
for url in granule['umm']['RelatedUrls']:
if url['URL'].startswith('s3://') and url['URL'].endswith('VV.tif'):
vsis3_uris.append(url['URL'].replace('s3://', '/vsis3/'))
break
if 'CMR-Search-After' not in response.headers:
break
headers['CMR-Search-After'] = response.headers['CMR-Search-After']
return vsis3_uris


def upload_file_to_s3(path_to_file: Path, bucket: str, prefix: str = ''):
path_to_file = Path(path_to_file)
key = str(Path(prefix) / path_to_file.name)

log.info(f'Uploading s3://{bucket}/{key}')
S3_CLIENT.upload_file(str(path_to_file), bucket, key)


def main():
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--working-directory', default=os.getcwd())
parser.add_argument('config_file')
args = parser.parse_args()

os.environ['AWS_PROFILE'] = 'hyp3'

with open(args.config_file) as f:
config = json.load(f)

polarization = config['s3_suffix'][1:3]
_, _, bucket, _, _ = config['overview_path'].split('/')
csv_file = Path(f'{os.getcwd()}/opera_vsis3_{polarization}.csv')

params = {
'short_name': 'OPERA_L2_RTC-S1_V1',
'attribute[]': f'string,POLARIZATION,{polarization}',
'page_size': 2000,
}

log.info(f'Querying CMR for OPERA {polarization} products')
vsis3_uris = query_cmr(params)

with open(csv_file, 'w', newline='') as f:
writer = csv.writer(f)
for uri in vsis3_uris:
writer.writerow([uri])

upload_file_to_s3(csv_file, bucket, 'opera-uris')


if __name__ == '__main__':
main()
12 changes: 12 additions & 0 deletions image_services/opera/update_opera_uris.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

# wrapper script to run update_opera_uris.py via a cron schedule
# some arcpy commands require the python session to be tied to a terminal, so the crontab should look like:
# 0 8 * * * script -qef -c "/home/arcgis/gis-services/image_services/rtc_services/make_rtc_service.sh /home/arcgis/gis-services/image_services/rtc_services/nasa_disasters /home/arcgis/gis-services/image_services/rtc_services/nasa_disasters/rgb.json" -a /home/arcgis/gis-services/image_services/rtc_services/nasa_disasters/make_rgb_service.log

set -e
source /home/arcgis/mambaforge/etc/profile.d/conda.sh
conda activate url-query
python /home/arcgis/gis-services/image_services/opera/update_opera_uris.py \
--working-directory $1 \
$2

0 comments on commit baacff9

Please sign in to comment.