From 28b66120c5357a650af91fbf2ece3083dfb31581 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Fri, 12 Jan 2024 13:34:13 +0100 Subject: [PATCH 01/22] Refactor flask app and config --- .env.sample | 43 ------- .gitignore | 3 +- isimip_files_api/__init__.py | 2 +- isimip_files_api/app.py | 24 +++- isimip_files_api/cdo.py | 15 ++- isimip_files_api/{settings.py => config.py} | 107 +++++++++++------ isimip_files_api/jobs.py | 15 ++- isimip_files_api/logging.py | 40 +++++++ isimip_files_api/nco.py | 5 +- isimip_files_api/netcdf.py | 6 +- isimip_files_api/responses.py | 24 ++++ isimip_files_api/scripts.py | 124 -------------------- isimip_files_api/tasks.py | 9 +- isimip_files_api/utils.py | 29 +---- isimip_files_api/validators.py | 28 +++-- isimip_files_api/worker.py | 14 +-- pyproject.toml | 28 +++-- run.sh | 23 ++++ 18 files changed, 256 insertions(+), 283 deletions(-) delete mode 100644 .env.sample rename isimip_files_api/{settings.py => config.py} (53%) create mode 100644 isimip_files_api/logging.py create mode 100644 isimip_files_api/responses.py delete mode 100644 isimip_files_api/scripts.py create mode 100755 run.sh diff --git a/.env.sample b/.env.sample deleted file mode 100644 index 5673beb..0000000 --- a/.env.sample +++ /dev/null @@ -1,43 +0,0 @@ -FLASK_APP=isimip_files_api.app - -# FLASK_ENV=production -# FLASK_ENV=development - -# LOG_LEVEL=ERROR -# LOG_LEVEL=DEBUG - -# LOG_FILE=log -# LOG_FILE=/var/log/isimip-cutout - -# BASE_URL=http://127.0.0.1:5000 -# OUTPUT_URL=http://127.0.0.1/api/output/ - -# INPUT_PATH=/path/to/the/isimip/data -# OUTPUT_PATH=/path/to/the/output/directory -# OUTPUT_PREFIX=isimip-download- - -# CDO_BIN=/usr/bin/cdo -# NCKS_BIN=/usr/bin/ncks - -# COUNTRYMASKS_FILE_PATH=/path/to/countrymasks.nc -# LANDSEAMASK_FILE_PATH=/path/to/landseamask.nc - -# CORS=False -# GLOBAL=_global_ -# MAX_FILES=32 - -# WORKER_TIMEOUT=180 -# WORKER_LOG_FILE=worker.log -# WORKER_LOG_LEVEL=DEBUG -# WORKER_TTL=86400 -# WORKER_FAILURE_TTL=86400 -# WORKER_RESULT_TTL=604800 - -# gunicorn configuration -# GUNICORN_BIN=/path/to/api/env/bin/gunicorn -# GUNICORN_WORKER=3 -# GUNICORN_PORT=9002 -# GUNICORN_TIMEOUT=120 -# GUNICORN_PID_FILE=/run/gunicorn/api/pid -# GUNICORN_ACCESS_LOG_FILE=/var/log/gunicorn/api/access.log -# GUNICORN_ERROR_LOG_FILE=/var/log/gunicorn/api/error.log diff --git a/.gitignore b/.gitignore index 6fb7823..6e073c8 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ __pycache__/ /dist /*.egg-info /env -/.env /log /*.log +/*.toml +!/pyproject.toml diff --git a/isimip_files_api/__init__.py b/isimip_files_api/__init__.py index e5ea9e1..04af162 100644 --- a/isimip_files_api/__init__.py +++ b/isimip_files_api/__init__.py @@ -1 +1 @@ -VERSION = __version__ = '1.1.0' +VERSION = __version__ = '2.0.0' diff --git a/isimip_files_api/app.py b/isimip_files_api/app.py index 67df03b..445473c 100644 --- a/isimip_files_api/app.py +++ b/isimip_files_api/app.py @@ -1,22 +1,31 @@ -import logging from collections import defaultdict from flask import Flask, request + +import tomli from flask_cors import CORS as FlaskCORS from .jobs import create_job, delete_job, fetch_job -from .settings import CORS, LOG_FILE, LOG_LEVEL -from .utils import get_errors_response +from .logging import configure_logging +from .responses import get_errors_response from .validators import validate_data, validate_datasets -logging.basicConfig(level=LOG_LEVEL, filename=LOG_FILE) - def create_app(): # create and configure the app app = Flask(__name__) + app.config.from_object('isimip_files_api.config') + app.config.from_prefixed_env() + app.config.from_file(app.config['CONFIG'], load=tomli.load, text=False) + + # configure logging + configure_logging(app) - if CORS: + # log config + app.logger.debug('app.config = %s', app.config) + + # enable CORS + if app.config['CORS']: FlaskCORS(app) @app.route('/', methods=['GET']) @@ -27,10 +36,13 @@ def index(): @app.route('/', methods=['POST']) def create(): + app.logger.debug('request.json = %s', request.json) + errors = defaultdict(list) cleaned_data = validate_data(request.json, errors) if errors: + app.logger.debug('errors = %s', errors) return get_errors_response(errors) validate_datasets(*cleaned_data, errors) diff --git a/isimip_files_api/cdo.py b/isimip_files_api/cdo.py index 35f57ba..ecb3f5d 100644 --- a/isimip_files_api/cdo.py +++ b/isimip_files_api/cdo.py @@ -1,9 +1,11 @@ import csv import logging import subprocess +from pathlib import Path + +from flask import current_app as app from .netcdf import get_index -from .settings import CDO_BIN, COUNTRYMASKS_FILE_PATH, LANDSEAMASK_FILE_PATH from .utils import mask_cmd @@ -19,21 +21,23 @@ def mask_bbox(dataset_path, output_path, bbox): def mask_country(dataset_path, output_path, country): # cdo -f nc4c -z zip_5 -ifthen -selname,m_COUNTRY COUNTRYMASK IFILE OFILE + mask_path = Path(app.config['COUNTRYMASKS_FILE_PATH']).expanduser() return cdo('-f', 'nc4c', '-z', 'zip_5', '-ifthen', f'-selname,m_{country.upper():3.3}', - str(COUNTRYMASKS_FILE_PATH), + str(mask_path), str(dataset_path), str(output_path)) def mask_landonly(dataset_path, output_path): # cdo -f nc4c -z zip_5 -ifthen LANDSEAMASK IFILE OFILE + mask_path = Path(app.config['LANDSEAMASK_FILE_PATH']).expanduser() return cdo('-f', 'nc4c', '-z', 'zip_5', '-ifthen', - str(LANDSEAMASK_FILE_PATH), + str(mask_path), str(dataset_path), str(output_path)) @@ -65,18 +69,19 @@ def select_bbox(dataset_path, output_path, bbox): def select_country(dataset_path, output_path, country): # cdo -s outputtab,date,value,nohead -fldmean -ifthen -selname,m_COUNTRY COUNTRYMASK IFILE + mask_path = Path(app.config['COUNTRYMASKS_FILE_PATH']).expanduser() return cdo('-s', 'outputtab,date,value,nohead', '-fldmean', '-ifthen', f'-selname,m_{country.upper():3.3}', - str(COUNTRYMASKS_FILE_PATH), + str(mask_path), str(dataset_path), output_path=output_path) def cdo(*args, output_path=None): - cmd_args = [CDO_BIN, *list(args)] + cmd_args = [app.config['CDO_BIN'], *list(args)] cmd = ' '.join(cmd_args) env = { 'CDI_VERSION_INFO': '0', diff --git a/isimip_files_api/settings.py b/isimip_files_api/config.py similarity index 53% rename from isimip_files_api/settings.py rename to isimip_files_api/config.py index f7b82a4..79f910a 100644 --- a/isimip_files_api/settings.py +++ b/isimip_files_api/config.py @@ -1,34 +1,68 @@ -import os -from pathlib import Path +''' +this file contains the default configuration, which can be overridden by -from dotenv import load_dotenv +* environment variables prefixed with 'FLASK_', either in the environment or a .env file +* a config.toml file at the root of the repository or at a location given by FLASK_CONFIG +''' -load_dotenv(Path().cwd() / '.env') +# flask environment +ENV = 'production' # choose from 'production', 'development', 'testing' -LOG_FILE = os.getenv('LOG_FILE') -LOG_LEVEL = os.getenv('LOG_LEVEL', 'ERROR') +# enable Cross-Origin Resource Sharing (CORS) +CORS = True -BASE_URL = os.getenv('BASE_URL', 'http://127.0.0.1:5000').rstrip('/') -OUTPUT_URL = os.getenv('OUTPUT_URL', 'http://127.0.0.1/api/output/').rstrip('/') +# toml config file +CONFIG = '../config.toml' -INPUT_PATH = Path(os.getenv('INPUT_PATH', 'input')) -OUTPUT_PATH = Path(os.getenv('OUTPUT_PATH', 'output')) -OUTPUT_PREFIX = os.getenv('OUTPUT_PREFIX', 'isimip-files-api-') +# log level and (optional) path to flask.log +LOG_LEVEL = 'ERROR' +LOG_PATH = None -CDO_BIN = os.getenv('CDO_BIN', 'cdo') -NCKS_BIN = os.getenv('NCKS_BIN', 'ncks') +# the base url the api is running on, in production this will be something like https://api.example.com/api/v1 +BASE_URL = 'http://127.0.0.1:5000' -CORS = os.getenv('CORS', '').upper() in ['TRUE', 1] -GLOBAL = os.getenv('GLOBAL', '_global_') -MAX_FILES = int(os.getenv('MAX_FILES', '32')) +# the output url the download packages will be available on +OUTPUT_URL = 'http://127.0.0.1/api/output/' -WORKER_TIMEOUT = int(os.getenv('WORKER_TIMEOUT', '180')) -WORKER_LOG_FILE = os.getenv('WORKER_LOG_FILE') -WORKER_LOG_LEVEL = os.getenv('WORKER_LOG_LEVEL', 'ERROR') -WORKER_TTL = int(os.getenv('RESULT_TTL', '86400')) # one day -WORKER_FAILURE_TTL = int(os.getenv('WORKER_FAILURE_TTL', '86400')) # one day -WORKER_RESULT_TTL = int(os.getenv('WORKER_RESULT_TTL', '604800')) # one week +# input path to the NetCDF files to process +INPUT_PATH = '..' +# output path to store the created download packages, this directory should be exposed on OUTPUT_URL +OUTPUT_PATH = '..' + +# output prefix to be prepended to the job ID to create the filename for the download package +OUTPUT_PREFIX = 'download-' + +# maximal number of files to process in one job +MAX_FILES = 32 + +# list of tasks which can be performed +TASKS = [ + 'cutout_bbox', + 'mask_bbox', + 'mask_country', + 'mask_landonly', + 'select_bbox', + 'select_country', + 'select_point' +] + +# the tag which designates global files +GLOBAL_TAG = '_global_' + +# list of the allowed resolution tags per task +RESOLUTION_TAGS = { + 'cutout_bbox': ['30arcsec', '90arcsec', '300arcsec', '1800arcsec', + '15arcmin', '30arcmin', '60arcmin', '120arcmin'], + 'mask_bbox': ['15arcmin', '30arcmin', '60arcmin', '120arcmin'], + 'mask_country': ['30arcmin'], + 'mask_landonly': ['30arcmin'], + 'select_bbox': ['15arcmin', '30arcmin', '60arcmin', '120arcmin'], + 'select_country': ['30arcmin'], + 'select_point': ['15arcmin', '30arcmin', '60arcmin', '120arcmin'] +} + +# list of the concrete number of gridpoints for each resolution tag RESOLUTIONS = { '30arcsec': (20880, 43200), '90arcsec': (6960, 14400), @@ -40,18 +74,14 @@ '120arcmin': (90, 180) } -TASKS = { - 'cutout_bbox': ['30arcsec', '90arcsec', '300arcsec', '1800arcsec', - '15arcmin', '30arcmin', '60arcmin', '120arcmin'], - 'mask_bbox': ['15arcmin', '30arcmin', '60arcmin', '120arcmin'], - 'mask_country': ['30arcmin'], - 'mask_landonly': ['30arcmin'], - 'select_bbox': ['15arcmin', '30arcmin', '60arcmin', '120arcmin'], - 'select_country': ['30arcmin'], - 'select_point': ['15arcmin', '30arcmin', '60arcmin', '120arcmin'] -} +# the cdo binary on the system, e.g. /usr/bin/cdo +CDO_BIN = 'cdo' + +# the ncks binary on the system, e.g. /usr/bin/ncks +NCKS_BIN = 'ncks' -COUNTRYMASKS_FILE_PATH = Path(os.getenv('COUNTRYMASKS_FILE_PATH', 'countrymasks.nc')) +# special settings for the countries +COUNTRYMASKS_FILE_PATH = 'countrymasks.nc' COUNTRYMASKS_COUNTRIES = [ 'AFG', 'ALB', 'DZA', 'AND', 'AGO', 'ATG', 'ARG', 'ARM', 'AUS', 'AUT', 'AZE', 'BHS', 'BHR', 'BGD', 'BRB', 'BLR', 'BEL', 'BLZ', 'BEN', 'BTN', @@ -77,4 +107,13 @@ 'ESH', 'YEM', 'ZMB', 'ZWE' ] -LANDSEAMASK_FILE_PATH = Path(os.getenv('LANDSEAMASK_FILE_PATH', 'landseamask.nc')) +# special settings for the land sea mask +LANDSEAMASK_FILE_PATH = 'landseamask.nc' + +# configuration for the worker +WORKER_TIMEOUT = 180 +WORKER_LOG_FILE = None +WORKER_LOG_LEVEL = 'ERROR' +WORKER_TTL = 86400 # one day +WORKER_FAILURE_TTL = 86400 # one day +WORKER_RESULT_TTL = 604800 # one week diff --git a/isimip_files_api/jobs.py b/isimip_files_api/jobs.py index 2490b76..5922677 100644 --- a/isimip_files_api/jobs.py +++ b/isimip_files_api/jobs.py @@ -1,11 +1,14 @@ -from redis import Redis +from flask import current_app as app + from rq import Queue from rq.exceptions import NoSuchJobError from rq.job import Job -from .settings import WORKER_FAILURE_TTL, WORKER_RESULT_TTL, WORKER_TIMEOUT, WORKER_TTL +from redis import Redis + +from .responses import get_response from .tasks import run_task -from .utils import get_hash, get_response +from .utils import get_hash redis = Redis() @@ -17,8 +20,10 @@ def create_job(paths, args): return get_response(job, 200) except NoSuchJobError: job = Job.create(run_task, id=job_id, args=[paths, args], - timeout=WORKER_TIMEOUT, ttl=WORKER_TTL, - result_ttl=WORKER_RESULT_TTL, failure_ttl=WORKER_FAILURE_TTL, + timeout=app.config['WORKER_TIMEOUT'], + ttl=app.config['WORKER_TTL'], + result_ttl=app.config['WORKER_RESULT_TTL'], + failure_ttl=app.config['WORKER_FAILURE_TTL'], connection=redis) queue = Queue(connection=redis) queue.enqueue_job(job) diff --git a/isimip_files_api/logging.py b/isimip_files_api/logging.py new file mode 100644 index 0000000..e20a1ec --- /dev/null +++ b/isimip_files_api/logging.py @@ -0,0 +1,40 @@ +import logging +from pathlib import Path + +from flask.logging import default_handler + +import colorlog + + +def configure_logging(app): + app.logger.removeHandler(default_handler) + app.logger.setLevel(app.config['LOG_LEVEL'].upper()) + + # log to the console in development + if app.config['ENV'] == 'development': + formatter = colorlog.ColoredFormatter('%(log_color)s[%(asctime)s] %(levelname)s' + ' %(filename)s:%(funcName)s %(message)s') + + handler = colorlog.StreamHandler() + handler.setLevel(logging.DEBUG) + handler.setFormatter(formatter) + + app.logger.addHandler(handler) + + # log to a file + if app.config['LOG_PATH']: + log_path = Path(app.config['LOG_PATH']) + if log_path.exists: + formatter = logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s') + + handler = logging.FileHandler(log_path / 'app.log', 'a') + handler.setLevel(logging.DEBUG) + handler.setFormatter(formatter) + + app.logger.addHandler(handler) + else: + raise RuntimeError('LOG_PATH does not exist') + + # disable logger if not handlers are set + if not app.logger.handlers: + app.logger.disabled = True diff --git a/isimip_files_api/nco.py b/isimip_files_api/nco.py index ca97e12..a2d16a7 100644 --- a/isimip_files_api/nco.py +++ b/isimip_files_api/nco.py @@ -1,7 +1,8 @@ import logging import subprocess -from .settings import NCKS_BIN +from flask import current_app as app + from .utils import mask_cmd @@ -19,7 +20,7 @@ def cutout_bbox(dataset_path, output_path, bbox): def ncks(*args): - cmd_args = [NCKS_BIN, *list(args)] + cmd_args = [app.config['NCKS_BIN'], *list(args)] cmd = ' '.join(cmd_args) logging.debug(cmd) diff --git a/isimip_files_api/netcdf.py b/isimip_files_api/netcdf.py index d873031..775c0c5 100644 --- a/isimip_files_api/netcdf.py +++ b/isimip_files_api/netcdf.py @@ -1,6 +1,6 @@ -from netCDF4 import Dataset +from flask import current_app as app -from .settings import RESOLUTIONS +from netCDF4 import Dataset def open_dataset(path): @@ -9,7 +9,7 @@ def open_dataset(path): def check_resolution(ds, resolution): try: - lat_size, lon_size = RESOLUTIONS[resolution] + lat_size, lon_size = app.config['RESOLUTIONS'][resolution] return ds.dimensions['lat'].size == lat_size or ds.dimensions['lon'].size == lon_size except KeyError: return False diff --git a/isimip_files_api/responses.py b/isimip_files_api/responses.py new file mode 100644 index 0000000..2b767ce --- /dev/null +++ b/isimip_files_api/responses.py @@ -0,0 +1,24 @@ +from flask import current_app as app + +from .utils import get_zip_file_name + + +def get_response(job, http_status): + file_name = get_zip_file_name(job.id) + + return { + 'id': job.id, + 'job_url': app.config['BASE_URL'] + '/' + job.id, + 'file_name': file_name, + 'file_url': app.config['OUTPUT_URL'] + '/' + file_name, + 'meta': job.meta, + 'ttl': app.config['WORKER_RESULT_TTL'], + 'status': job.get_status(), + }, http_status + + +def get_errors_response(errors): + return { + 'status': 'error', + 'errors': errors + }, 400 diff --git a/isimip_files_api/scripts.py b/isimip_files_api/scripts.py deleted file mode 100644 index 2429ae6..0000000 --- a/isimip_files_api/scripts.py +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env python -import argparse -import logging -import os -from pathlib import Path - -from dotenv import load_dotenv -from redis import Redis -from rq.exceptions import NoSuchJobError -from rq.job import Job - -from .cdo import mask_bbox, mask_country, mask_landonly, select_bbox, select_country, select_point -from .nco import cutout_bbox -from .settings import LOG_FILE, LOG_LEVEL, OUTPUT_PATH -from .utils import get_output_name - -logging.basicConfig(level=LOG_LEVEL, filename=LOG_FILE) - -redis = Redis() - - -class FloatListAction(argparse.Action): - - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, [float(c) for c in values.split(',')]) - - -def parse_floats(string): - return [float(c) for c in string.bbox.split(',')] if string else None - - -def select(): - parser = argparse.ArgumentParser() - parser.add_argument('paths', nargs='+', help='List of files to mask') - parser.add_argument('--point', help='Select by point, e.g. "52.39,13.06"', action=FloatListAction) - parser.add_argument('--country', help='Select by country, e.g. "deu"') - parser.add_argument('--bbox', help='Select by bounding box, e.g. "-23.43651,23.43651,-180,180"', - action=FloatListAction) - parser.add_argument('--output', help='Output directory, default: .', default='.') - args = parser.parse_args() - - if not any([args.country, args.bbox, args.point]): - parser.error('Please provide at least --country, --bbox, or --point.') - - for path in args.paths: - input_path = Path(path) - output_path = Path(args.output).expanduser() / get_output_name(path, vars(args), suffix='.csv') - - if args.bbox: - select_bbox(input_path, output_path, args.bbox) - elif args.country: - select_country(input_path, output_path, args.country) - elif args.point: - select_point(input_path, output_path, args.point) - - -def mask(): - parser = argparse.ArgumentParser() - parser.add_argument('paths', nargs='+', help='List of files to mask') - parser.add_argument('--country', help='Mask by country, e.g. "deu"') - parser.add_argument('--bbox', help='Mask by bounding box, e.g. "-23.43651,23.43651,-180,180"', - action=FloatListAction) - parser.add_argument('--landonly', action='store_true', help='Mask only land data') - parser.add_argument('--output', help='Output directory, default: .', default='.') - args = parser.parse_args() - - if not any([args.country, args.bbox, args.landonly]): - parser.error('Please provide at least --country, --bbox, or --landonly.') - - for path in args.paths: - input_path = Path(path) - output_path = Path(args.output).expanduser() / get_output_name(path, vars(args)) - - if args.bbox: - mask_bbox(input_path, output_path, args.bbox) - elif args.country: - mask_country(input_path, output_path, args.country) - elif args.landonly: - mask_landonly(input_path, output_path) - - -def cutout(): - parser = argparse.ArgumentParser() - parser.add_argument('paths', nargs='+', help='List of files to mask') - parser.add_argument('--bbox', help='Mask by bounding box (south, north, west, east),' - ' e.g. "-23.43,23.43,-180,180"', - action=FloatListAction) - parser.add_argument('--output', help='Output directory, default: .', default='.') - args = parser.parse_args() - - if not any([args.bbox]): - parser.error('Please provide at least --bbox.') - - for path in args.paths: - input_path = Path(path) - output_path = Path(args.output).expanduser() / get_output_name(path, vars(args)) - - cutout_bbox(input_path, output_path, args.bbox) - - -def clean(): - load_dotenv(Path().cwd() / '.env') - - for root, dirs, files in os.walk(OUTPUT_PATH, topdown=False): - root_path = Path(root) - - for file_name in files: - file_path = root_path / file_name - - # construct relative path and job_id - path = root_path.relative_to(OUTPUT_PATH) / file_name - job_id = path.stem.split('-')[-1] - - # check if there is a job for this - try: - Job.fetch(job_id, connection=redis) - except NoSuchJobError: - os.remove(file_path) - - # remove empty directories - for dir_name in dirs: - dir_path = root_path / dir_name - if not os.listdir(dir_path): - os.rmdir(dir_path) diff --git a/isimip_files_api/tasks.py b/isimip_files_api/tasks.py index 5e41f90..a4a7651 100644 --- a/isimip_files_api/tasks.py +++ b/isimip_files_api/tasks.py @@ -3,11 +3,12 @@ from tempfile import mkdtemp from zipfile import ZipFile +from flask import current_app as app + from rq import get_current_job from .cdo import mask_bbox, mask_country, mask_landonly, select_bbox, select_country, select_point from .nco import cutout_bbox -from .settings import INPUT_PATH, OUTPUT_PATH, OUTPUT_PREFIX from .utils import get_output_name, get_zip_file_name @@ -19,11 +20,11 @@ def run_task(paths, args): job.save_meta() # create output paths - output_path = OUTPUT_PATH / get_zip_file_name(job.id) + output_path = Path(app.config['OUTPUT_PATH']).expanduser() / get_zip_file_name(job.id) output_path.parent.mkdir(parents=True, exist_ok=True) # create a temporary directory - tmp = Path(mkdtemp(prefix=OUTPUT_PREFIX)) + tmp = Path(mkdtemp(prefix=app.config['OUTPUT_PREFIX'])) # open zipfile z = ZipFile(output_path, 'w') @@ -34,7 +35,7 @@ def run_task(paths, args): readme.write('The following commands were used to create the files in this container:\n\n') for path in paths: - input_path = INPUT_PATH / path + input_path = Path(app.config['INPUT_PATH']).expanduser() / path if args['task'] in ['select_country', 'select_bbox', 'select_point']: tmp_name = get_output_name(path, args, suffix='.csv') else: diff --git a/isimip_files_api/utils.py b/isimip_files_api/utils.py index 1d9f9b8..dc912b6 100644 --- a/isimip_files_api/utils.py +++ b/isimip_files_api/utils.py @@ -2,28 +2,7 @@ import re from pathlib import Path -from .settings import BASE_URL, GLOBAL, OUTPUT_PREFIX, OUTPUT_URL, WORKER_RESULT_TTL - - -def get_response(job, http_status): - file_name = get_zip_file_name(job.id) - - return { - 'id': job.id, - 'job_url': BASE_URL + '/' + job.id, - 'file_name': file_name, - 'file_url': OUTPUT_URL + '/' + file_name, - 'meta': job.meta, - 'ttl': WORKER_RESULT_TTL, - 'status': job.get_status(), - }, http_status - - -def get_errors_response(errors): - return { - 'status': 'error', - 'errors': errors - }, 400 +from flask import current_app as app def get_output_name(path, args, suffix=None): @@ -43,16 +22,16 @@ def get_output_name(path, args, suffix=None): path = Path(path) suffix = suffix if suffix else path.suffix - if GLOBAL in path.name: + if app.config['GLOBAL_TAG'] in path.name: # replace the _global_ specifier - return path.with_suffix(suffix).name.replace(GLOBAL, f'_{region}_') + return path.with_suffix(suffix).name.replace(app.config['GLOBAL_TAG'], f'_{region}_') else: # append region specifier return path.stem + f'_{region}' + suffix def get_zip_file_name(job_id): - return Path(OUTPUT_PREFIX + job_id).with_suffix('.zip').as_posix() + return Path(app.config['OUTPUT_PREFIX'] + job_id).with_suffix('.zip').as_posix() def get_hash(paths, args): diff --git a/isimip_files_api/validators.py b/isimip_files_api/validators.py index d6ca97f..4f8c13a 100644 --- a/isimip_files_api/validators.py +++ b/isimip_files_api/validators.py @@ -1,5 +1,8 @@ +from pathlib import Path + +from flask import current_app as app + from .netcdf import check_resolution, open_dataset -from .settings import COUNTRYMASKS_COUNTRIES, INPUT_PATH, MAX_FILES, TASKS def validate_data(data, errors): @@ -20,15 +23,16 @@ def validate_paths(data, errors): errors['paths'].append('This field is required.') return None - if len(data['paths']) > MAX_FILES: - errors['paths'].append(f'To many files match that dataset (max: {MAX_FILES}).') + if len(data['paths']) > app.config['MAX_FILES']: + errors['paths'].append('To many files match that dataset (max: {MAX_FILES}).'.format(**app.config)) return None for path in data['paths']: # prevent tree traversal try: - absolute_path = INPUT_PATH / path - absolute_path.parent.resolve().relative_to(INPUT_PATH.resolve()) + input_path = Path(app.config['INPUT_PATH']).expanduser() + absolute_path = input_path / path + absolute_path.parent.resolve().relative_to(input_path.resolve()) except ValueError: errors['paths'].append(f'{path} is below the root path.') @@ -55,14 +59,17 @@ def validate_args(data, errors): elif args['task'] in ['cutout_bbox', 'mask_bbox', 'select_bbox'] and not args['bbox']: errors['args'] = 'bbox needs to be provided' elif args['task'] in ['select_point'] and not args['point']: - errors['args'] = 'point needs to be provided' + errors['args'] = 'point needs to be provided' else: return args def validate_task(data, errors): - if 'task' not in data or data['task'] not in TASKS.keys(): + app.logger.info(data) + if 'task' not in data: errors['task'] = 'task needs to be provided' + elif data['task'] not in app.config['TASKS']: + errors['task'] = "task '{task}' is not supported".format(**data) else: return data['task'] @@ -91,7 +98,7 @@ def validate_country(data, errors): if 'country' in data: country = data['country'].lower() - if country.upper() not in COUNTRYMASKS_COUNTRIES: + if country.upper() not in app.config['COUNTRYMASKS_COUNTRIES']: errors['country'] = 'country not in the list of supported countries (e.g. DEU)' return country @@ -101,8 +108,9 @@ def validate_country(data, errors): def validate_datasets(paths, args, errors): for path in paths: - absolute_path = INPUT_PATH / path + input_path = Path(app.config['INPUT_PATH']).expanduser() + absolute_path = input_path / path with open_dataset(absolute_path) as ds: - resolutions = TASKS[args.get('task')] + resolutions = app.config['RESOLUTION_TAGS'].get(args.get('task')) if not any(check_resolution(ds, resolution) for resolution in resolutions): errors['paths'].append(f'{path} is not using the correct grid: {resolutions}.') diff --git a/isimip_files_api/worker.py b/isimip_files_api/worker.py index 7001b0d..74d313c 100644 --- a/isimip_files_api/worker.py +++ b/isimip_files_api/worker.py @@ -1,12 +1,12 @@ -import logging +from rq import Worker as RQWorker -from rq import Worker as Worker +from .app import create_app -from .settings import WORKER_LOG_FILE, WORKER_LOG_LEVEL -logging.basicConfig(level=WORKER_LOG_LEVEL, filename=WORKER_LOG_FILE, - format='[%(asctime)s] %(levelname)s %(name)s: %(message)s') +class Worker(RQWorker): + def work(self, *args, **kwargs): + app = create_app() -class LogWorker(Worker): - pass + with app.app_context(): + super().work(*args, **kwargs) diff --git a/pyproject.toml b/pyproject.toml index c31cab2..916e0b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,25 +31,19 @@ classifiers = [ 'Programming Language :: Python :: 3.12' ] dependencies = [ - "Flask~=1.1.1", - "Flask-Cors~=3.0.8", - "gunicorn~=20.0.4", - "netCDF4~=1.5.3", - "numpy~=1.18.2", - "python-dotenv~=0.12.0", - "rq~=1.3.0" + "Flask~=3.0.0", + "Flask-Cors~=4.0.0", + "gunicorn~=21.2.0", + "netCDF4~=1.6.5", + "numpy~=1.26.3", + "tomli", + "rq~=1.15.1" ] dynamic = ["version"] [project.urls] Repository = "https://github.com/ISI-MIP/isimip-files-api" -[project.scripts] -isimip-files-api-select = "isimip_files_api.scripts:select" -isimip-files-api-mask = "isimip_files_api.scripts:mask" -isimip-files-api-cutout = "isimip_files_api.scripts:cutout" -isimip-files-api-clean = "isimip_files_api.scripts:clean" - [project.optional-dependencies] dev = [ "build", @@ -95,11 +89,19 @@ known-first-party = [ section-order = [ "future", "standard-library", + "pytest", + "flask", + "rq", "third-party", "first-party", "local-folder" ] +[tool.ruff.isort.sections] +pytest = ["pytest"] +flask = ["flask"] +rq = ["rq"] + [tool.pytest.ini_options] testpaths = ["isimip_files_api"] diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..5e8fe1c --- /dev/null +++ b/run.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# set environment variables for `flask run` and `rq worker` for development +# in productione, the variables should be set in systemd or docker files +export FLASK_APP=isimip_files_api.app +export FLASK_ENV=development +export RQ_WORKER_CLASS=isimip_files_api.worker.Worker + +case $1 in + + server) + flask run + ;; + + worker) + rq worker + ;; + + *) + echo "usage: ../run.sh server|worker" + ;; + +esac From f753b3a293930a68ad71e5f7108f30547fad30b3 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Thu, 18 Jan 2024 16:27:11 +0100 Subject: [PATCH 02/22] Add count_jobs function --- isimip_files_api/app.py | 5 +++-- isimip_files_api/jobs.py | 11 +++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/isimip_files_api/app.py b/isimip_files_api/app.py index 445473c..2c19179 100644 --- a/isimip_files_api/app.py +++ b/isimip_files_api/app.py @@ -5,7 +5,7 @@ import tomli from flask_cors import CORS as FlaskCORS -from .jobs import create_job, delete_job, fetch_job +from .jobs import count_jobs, create_job, delete_job, fetch_job from .logging import configure_logging from .responses import get_errors_response from .validators import validate_data, validate_datasets @@ -31,7 +31,8 @@ def create_app(): @app.route('/', methods=['GET']) def index(): return { - 'status': 'ok' + 'status': 'ok', + 'jobs': count_jobs() }, 200 @app.route('/', methods=['POST']) diff --git a/isimip_files_api/jobs.py b/isimip_files_api/jobs.py index 5922677..e44bd15 100644 --- a/isimip_files_api/jobs.py +++ b/isimip_files_api/jobs.py @@ -13,6 +13,17 @@ redis = Redis() +def count_jobs(): + queue = Queue(connection=redis) + + return { + 'started': queue.started_job_registry.count, + 'deferred': queue.deferred_job_registry.count, + 'finished': queue.finished_job_registry.count, + 'failed': queue.failed_job_registry.count, + 'scheduled': queue.scheduled_job_registry.count + } + def create_job(paths, args): job_id = get_hash(paths, args) try: From 2fc7f6cb0a2a89d615d63f1b03833aac40db19f9 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Mon, 5 Feb 2024 15:47:21 +0100 Subject: [PATCH 03/22] Make config file optional --- isimip_files_api/app.py | 3 ++- isimip_files_api/config.py | 3 --- pyproject.toml | 1 + 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/isimip_files_api/app.py b/isimip_files_api/app.py index 2c19179..327dbc9 100644 --- a/isimip_files_api/app.py +++ b/isimip_files_api/app.py @@ -16,7 +16,8 @@ def create_app(): app = Flask(__name__) app.config.from_object('isimip_files_api.config') app.config.from_prefixed_env() - app.config.from_file(app.config['CONFIG'], load=tomli.load, text=False) + if 'CONFIG' in app.config: + app.config.from_file(app.config['CONFIG'], load=tomli.load, text=False) # configure logging configure_logging(app) diff --git a/isimip_files_api/config.py b/isimip_files_api/config.py index 79f910a..a88eef6 100644 --- a/isimip_files_api/config.py +++ b/isimip_files_api/config.py @@ -11,9 +11,6 @@ # enable Cross-Origin Resource Sharing (CORS) CORS = True -# toml config file -CONFIG = '../config.toml' - # log level and (optional) path to flask.log LOG_LEVEL = 'ERROR' LOG_PATH = None diff --git a/pyproject.toml b/pyproject.toml index 916e0b2..bdd4a0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ classifiers = [ 'Programming Language :: Python :: 3.12' ] dependencies = [ + "colorlog~=6.8.2", "Flask~=3.0.0", "Flask-Cors~=4.0.0", "gunicorn~=21.2.0", From 10d5f86bfbb5231b0ca24c176e6eb22325aaf0e1 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Mon, 5 Feb 2024 15:48:45 +0100 Subject: [PATCH 04/22] Add docker files and add REDIS_URL to config --- .gitignore | 3 +++ Dockerfile | 13 +++++++++++++ docker-compose.yaml | 40 ++++++++++++++++++++++++++++++++++++++ isimip_files_api/config.py | 3 +++ isimip_files_api/jobs.py | 9 +++++++-- 5 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 Dockerfile create mode 100644 docker-compose.yaml diff --git a/.gitignore b/.gitignore index 6e073c8..3a10966 100644 --- a/.gitignore +++ b/.gitignore @@ -9,9 +9,12 @@ __pycache__/ /build /dist +/volumes /*.egg-info /env /log /*.log /*.toml !/pyproject.toml + +/.env diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..32a6ced --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.12-slim-bookworm + +RUN apt-get update -y && \ + apt-get upgrade -y && \ + apt-get install -y build-essential cdo nco + +WORKDIR /api + +COPY . . + +RUN pip3 install . + +CMD ["gunicorn", "-b", "0.0.0.0:5000", "isimip_files_api.app:create_app()"] diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..e619b55 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,40 @@ +version: "3.7" + +services: + api: + build: . + depends_on: + - redis + volumes: + - ${DOCKER_INPUT_PATH}:/input + - ${DOCKER_OUTPUT_PATH}:/output + ports: + - "${DOCKER_API_PORT}:5000" + environment: + FLASK_REDIS_URL: redis://redis:6379 + FLASK_INPUT_PATH: /input + FLASK_OUTPUT_PATH: /output + env_file: .env + + worker: + build: . + command: rq worker + depends_on: + - redis + volumes: + - ${DOCKER_INPUT_PATH}:/input + - ${DOCKER_OUTPUT_PATH}:/output + environment: + FLASK_INPUT_PATH: /input + FLASK_OUTPUT_PATH: /output + RQ_REDIS_URL: redis://redis:6379 + RQ_WORKER_CLASS: isimip_files_api.worker.Worker + env_file: .env + + redis: + image: redis + command: redis-server --appendonly yes + restart: always + volumes: + - ${DOCKER_REDIS_PATH}:/data + env_file: .env diff --git a/isimip_files_api/config.py b/isimip_files_api/config.py index a88eef6..a9135c0 100644 --- a/isimip_files_api/config.py +++ b/isimip_files_api/config.py @@ -107,6 +107,9 @@ # special settings for the land sea mask LANDSEAMASK_FILE_PATH = 'landseamask.nc' +# redis configuration +REDIS_URL = 'redis://localhost:6379' + # configuration for the worker WORKER_TIMEOUT = 180 WORKER_LOG_FILE = None diff --git a/isimip_files_api/jobs.py b/isimip_files_api/jobs.py index e44bd15..e631b4b 100644 --- a/isimip_files_api/jobs.py +++ b/isimip_files_api/jobs.py @@ -10,10 +10,9 @@ from .tasks import run_task from .utils import get_hash -redis = Redis() - def count_jobs(): + redis = Redis.from_url(app.config['REDIS_URL']) queue = Queue(connection=redis) return { @@ -25,6 +24,8 @@ def count_jobs(): } def create_job(paths, args): + redis = Redis.from_url(app.config['REDIS_URL']) + job_id = get_hash(paths, args) try: job = Job.fetch(job_id, connection=redis) @@ -42,6 +43,8 @@ def create_job(paths, args): def fetch_job(job_id): + redis = Redis.from_url(app.config['REDIS_URL']) + try: job = Job.fetch(job_id, connection=redis) return get_response(job, 200) @@ -53,6 +56,8 @@ def fetch_job(job_id): def delete_job(job_id): + redis = Redis.from_url(app.config['REDIS_URL']) + try: job = Job.fetch(job_id, connection=redis) job.delete() From 3e8cff8b318ec32b6f6d8ff744b6857ec30d5b68 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Mon, 12 Feb 2024 14:49:38 +0100 Subject: [PATCH 05/22] Refactor tasks and add operations --- isimip_files_api/app.py | 38 ++++-- isimip_files_api/config.py | 7 + isimip_files_api/jobs.py | 6 +- isimip_files_api/operations/__init__.py | 78 +++++++++++ isimip_files_api/operations/cdo.py | 131 ++++++++++++++++++ isimip_files_api/operations/nco.py | 23 ++++ isimip_files_api/tasks.py | 74 ++++++----- isimip_files_api/tests/__init__.py | 0 isimip_files_api/tests/conftest.py | 20 +++ isimip_files_api/tests/test_create.py | 127 ++++++++++++++++++ isimip_files_api/utils.py | 6 + isimip_files_api/validators.py | 170 ++++++++++-------------- run.sh | 4 +- testing/input/constant.nc | Bin 0 -> 243090 bytes 14 files changed, 532 insertions(+), 152 deletions(-) create mode 100644 isimip_files_api/operations/__init__.py create mode 100644 isimip_files_api/operations/cdo.py create mode 100644 isimip_files_api/operations/nco.py create mode 100644 isimip_files_api/tests/__init__.py create mode 100644 isimip_files_api/tests/conftest.py create mode 100644 isimip_files_api/tests/test_create.py create mode 100644 testing/input/constant.nc diff --git a/isimip_files_api/app.py b/isimip_files_api/app.py index 327dbc9..9ddaf2b 100644 --- a/isimip_files_api/app.py +++ b/isimip_files_api/app.py @@ -1,14 +1,13 @@ -from collections import defaultdict from flask import Flask, request import tomli from flask_cors import CORS as FlaskCORS -from .jobs import count_jobs, create_job, delete_job, fetch_job +from .jobs import count_jobs, delete_job, fetch_job from .logging import configure_logging from .responses import get_errors_response -from .validators import validate_data, validate_datasets +from .validators import validate_data, validate_operations, validate_paths def create_app(): @@ -22,9 +21,6 @@ def create_app(): # configure logging configure_logging(app) - # log config - app.logger.debug('app.config = %s', app.config) - # enable CORS if app.config['CORS']: FlaskCORS(app) @@ -40,18 +36,40 @@ def index(): def create(): app.logger.debug('request.json = %s', request.json) - errors = defaultdict(list) + data = request.json - cleaned_data = validate_data(request.json, errors) + errors = validate_data(data) if errors: app.logger.debug('errors = %s', errors) return get_errors_response(errors) - validate_datasets(*cleaned_data, errors) + errors = dict(**validate_paths(data), + **validate_operations(data)) if errors: + app.logger.debug('errors = %s', errors) return get_errors_response(errors) - return create_job(*cleaned_data) + from .operations import OperationRegistry + + commands = [] + operation_registry = OperationRegistry() + for index, operation_config in enumerate(data['operations']): + operation = operation_registry.get(operation_config) + + if not commands or commands[-1]['agent'] != operation.agent: + commands.append({ + 'agent': operation.agent, + 'command': operation.get_command(), + 'command_args': operation.get_command_args(), + 'operation_args': operation.get_operation_args() + }) + else: + commands[-1]['operation_args'] += operation.get_args() + + print(commands) + + # return create_job(data['paths'], data['operations']) + return {'status': 'ok'}, 200 @app.route('/', methods=['GET']) def detail(job_id): diff --git a/isimip_files_api/config.py b/isimip_files_api/config.py index a9135c0..ef1b73b 100644 --- a/isimip_files_api/config.py +++ b/isimip_files_api/config.py @@ -33,6 +33,13 @@ # maximal number of files to process in one job MAX_FILES = 32 +# list of operations which can be performed +OPERATIONS = [ + 'isimip_files_api.operations.cdo.OutputtabOperation', + 'isimip_files_api.operations.cdo.FldmeanOperation', + 'isimip_files_api.operations.cdo.SelectBBoxOperation' +] + # list of tasks which can be performed TASKS = [ 'cutout_bbox', diff --git a/isimip_files_api/jobs.py b/isimip_files_api/jobs.py index e631b4b..127362f 100644 --- a/isimip_files_api/jobs.py +++ b/isimip_files_api/jobs.py @@ -23,15 +23,15 @@ def count_jobs(): 'scheduled': queue.scheduled_job_registry.count } -def create_job(paths, args): +def create_job(paths, operations): redis = Redis.from_url(app.config['REDIS_URL']) - job_id = get_hash(paths, args) + job_id = get_hash(paths, operations) try: job = Job.fetch(job_id, connection=redis) return get_response(job, 200) except NoSuchJobError: - job = Job.create(run_task, id=job_id, args=[paths, args], + job = Job.create(run_task, id=job_id, args=[paths, operations], timeout=app.config['WORKER_TIMEOUT'], ttl=app.config['WORKER_TTL'], result_ttl=app.config['WORKER_RESULT_TTL'], diff --git a/isimip_files_api/operations/__init__.py b/isimip_files_api/operations/__init__.py new file mode 100644 index 0000000..723413f --- /dev/null +++ b/isimip_files_api/operations/__init__.py @@ -0,0 +1,78 @@ +from flask import current_app as app + +from ..utils import import_class + + +class OperationRegistry: + + def __init__(self): + from flask import current_app as app + + self.operations = {} + for python_path in app.config['OPERATIONS']: + operation_class = import_class(python_path) + self.operations[operation_class.specifier] = operation_class + + def get(self, config): + if 'specifier' in config and config['specifier'] in self.operations: + return self.operations[config['specifier']](config) + + +class BaseOperation: + + def __init__(self, config): + self.config = config + + def validate(self): + pass + + +class BBoxOperationMixin: + + def get_bbox(self): + return ( + float(self.config['bbox'][0]), + float(self.config['bbox'][1]), + float(self.config['bbox'][2]), + float(self.config['bbox'][3]) + ) + + def validate_bbox(self): + if 'bbox' in self.config: + try: + self.get_bbox() + except (ValueError, IndexError): + return ['bbox is not of the form [%f, %f, %f, %f]'] + else: + return [f'bbox is missing for operation "{self.specifier}"'] + + +class PointOperationMixin: + + def get_point(self): + return ( + float(self.config['point'][0]), + float(self.config['point'][1]) + ) + + def validate_point(self): + if 'point' in self.config: + try: + self.get_point() + except (ValueError, IndexError): + return ['bbox is not of the form [%f, %f]'] + else: + return [f'point is missing for operation "{self.specifier}"'] + + +class CountryOperationMixin: + + def get_country(self): + return self.config['country'].upper() + + def validate_country(self): + if 'country' in self.config: + if self.get_country() not in app.config['COUNTRYMASKS_COUNTRIES']: + return ['country not in the list of supported countries (e.g. DEU)'] + else: + return [f'country is missing for operation "{self.specifier}"'] diff --git a/isimip_files_api/operations/cdo.py b/isimip_files_api/operations/cdo.py new file mode 100644 index 0000000..2f9cc3c --- /dev/null +++ b/isimip_files_api/operations/cdo.py @@ -0,0 +1,131 @@ +from pathlib import Path + +from flask import current_app as app + +from . import BaseOperation, BBoxOperationMixin, CountryOperationMixin, PointOperationMixin + + +class CdoOperation(BaseOperation): + + agent = 'cdo' + + def get_command(self): + return [app.config['CDO_BIN']] + + def get_cmd_args(self): + return ['x', 'y'] + + def get_env(self): + return { + 'CDI_VERSION_INFO': '0', + 'CDO_VERSION_INFO': '0', + 'CDO_HISTORY_INFO': '0' + } + + # def execute(*args, output_path=None): + # cmd_args = [app.config['CDO_BIN'], *list(args)] + # cmd = ' '.join(cmd_args) + # env = + + # app.logger.debug(cmd) + # output = subprocess.check_output(cmd_args, env=env) + + # if output_path: + # with open(output_path, 'w', newline='') as fp: + # writer = csv.writer(fp, delimiter=',') + # for line in output.splitlines(): + # writer.writerow(line.decode().strip().split()) + + # return mask_cmd(cmd) + + +class SelectBBoxOperation(BBoxOperationMixin, CdoOperation): + + specifier = 'select_bbox' + + def validate(self): + return self.validate_bbox() + + def get_args(self): + south, north, west, east = self.get_bbox() + return [f'-sellonlatbox,{west:f},{east:f},{south:f},{north:f}'] + + +class SelectCountryOperation(CountryOperationMixin, CdoOperation): + + specifier = 'select_country' + + def validate(self): + return self.validate_country() + + def get_args(self): + country = self.get_country() + mask_path = str(Path(app.config['COUNTRYMASKS_FILE_PATH']).expanduser()) + return ['-ifthen', f'-selname,m_{country:3.3}', mask_path] + + +class SelectPointOperation(PointOperationMixin, CdoOperation): + + specifier = 'select_point' + + def validate(self): + return self.validate_point() + + # def cmd_args(self): + # # cdo -s outputtab,date,value,nohead -selindexbox,IX,IX,IY,IY IFILE + # ix, iy = get_index(dataset_path, point) + + # # add one since cdo is counting from 1! + # ix, iy = ix + 1, iy + 1 + + # return [f'-selindexbox,{ix:d},{ix:d},{iy:d},{iy:d}'] + + +class MaskBBoxOperation(BBoxOperationMixin, CdoOperation): + + specifier = 'mask_bbox' + + def validate(self): + return self.validate_bbox() + + def get_args(self): + south, north, west, east = self.get_bbox() + return [f'-masklonlatbox,{west:f},{east:f},{south:f},{north:f}'] + + +class MaskCountryOperation(CountryOperationMixin, CdoOperation): + + specifier = 'mask_country' + + def validate(self): + return self.validate_country() + + def get_args(self): + country = self.get_country() + mask_path = str(Path(app.config['COUNTRYMASKS_FILE_PATH']).expanduser()) + return [f'-selname,m_{country:3.3}', mask_path] + + +class MaskLandonlyOperation(CdoOperation): + + specifier = 'mask_landonly' + + def get_args(self): + mask_path = str(Path(app.config['LANDSEAMASK_FILE_PATH']).expanduser()) + return ['-ifthen', mask_path] + + +class FldmeanOperation(CdoOperation): + + specifier = 'fldmean' + + def get_args(self): + return ['-fldmean'] + + +class OutputtabOperation(CdoOperation): + + specifier = 'outputtab' + + def get_args(self): + return ['-s outputtab,date,value,nohead'] diff --git a/isimip_files_api/operations/nco.py b/isimip_files_api/operations/nco.py new file mode 100644 index 0000000..1095ccd --- /dev/null +++ b/isimip_files_api/operations/nco.py @@ -0,0 +1,23 @@ + +from . import BaseOperation, BBoxOperationMixin + + +class NcoOperation(BaseOperation): + pass + + # def execute(*args, output_path=None): + # cmd_args = [app.config['NCKS_BIN'], *list(args)] + # cmd = ' '.join(cmd_args) + + # logging.debug(cmd) + # subprocess.check_output(cmd_args) + + # return mask_cmd(cmd) + + +class CutoutBBoxOperation(BBoxOperationMixin, NcoOperation): + + specifier = 'cutout_bbox' + + def validate(self): + return self.validate_bbox() diff --git a/isimip_files_api/tasks.py b/isimip_files_api/tasks.py index a4a7651..481bd1b 100644 --- a/isimip_files_api/tasks.py +++ b/isimip_files_api/tasks.py @@ -7,12 +7,10 @@ from rq import get_current_job -from .cdo import mask_bbox, mask_country, mask_landonly, select_bbox, select_country, select_point -from .nco import cutout_bbox -from .utils import get_output_name, get_zip_file_name +from .utils import get_zip_file_name -def run_task(paths, args): +def run_task(paths, operations): # get current job and init metadata job = get_current_job() job.meta['created_files'] = 0 @@ -36,48 +34,54 @@ def run_task(paths, args): for path in paths: input_path = Path(app.config['INPUT_PATH']).expanduser() / path - if args['task'] in ['select_country', 'select_bbox', 'select_point']: - tmp_name = get_output_name(path, args, suffix='.csv') - else: - tmp_name = get_output_name(path, args) + app.logger.warn(input_path) - tmp_path = tmp / tmp_name + for operation in operations: + print(operation) + app.logger.error(operation) - if args['task'] == 'cutout_bbox': - cmd = cutout_bbox(input_path, tmp_path, args['bbox']) + # if args['task'] in ['select_country', 'select_bbox', 'select_point']: + # tmp_name = get_output_name(path, args, suffix='.csv') + # else: + # tmp_name = get_output_name(path, args) - elif args['task'] == 'mask_country': - cmd = mask_country(input_path, tmp_path, args['country']) + # tmp_path = tmp / tmp_name - elif args['task'] == 'mask_bbox': - cmd = mask_bbox(input_path, tmp_path, args['bbox']) + # if args['task'] == 'cutout_bbox': + # cmd = cutout_bbox(input_path, tmp_path, args['bbox']) - elif args['task'] == 'mask_landonly': - cmd = mask_landonly(input_path, tmp_path) + # elif args['task'] == 'mask_country': + # cmd = mask_country(input_path, tmp_path, args['country']) - elif args['task'] == 'select_country': - cmd = select_country(input_path, tmp_path, args['country']) + # elif args['task'] == 'mask_bbox': + # cmd = mask_bbox(input_path, tmp_path, args['bbox']) - elif args['task'] == 'select_bbox': - cmd = select_bbox(input_path, tmp_path, args['bbox']) + # elif args['task'] == 'mask_landonly': + # cmd = mask_landonly(input_path, tmp_path) - elif args['task'] == 'select_point': - cmd = select_point(input_path, tmp_path, args['point']) + # elif args['task'] == 'select_country': + # cmd = select_country(input_path, tmp_path, args['country']) - # write cmd into readme file - readme.write(cmd + '\n') + # elif args['task'] == 'select_bbox': + # cmd = select_bbox(input_path, tmp_path, args['bbox']) - if tmp_path.is_file(): - z.write(tmp_path, tmp_name) - else: - error_path = Path(tmp_path).with_suffix('.txt') - error_path.write_text('Something went wrong with processing the input file.' - ' Probably it is not using a global grid.') - z.write(error_path, error_path.name) + # elif args['task'] == 'select_point': + # cmd = select_point(input_path, tmp_path, args['point']) - # update the current job and store progress - job.meta['created_files'] += 1 - job.save_meta() + # # write cmd into readme file + # readme.write(cmd + '\n') + + # if tmp_path.is_file(): + # z.write(tmp_path, tmp_name) + # else: + # error_path = Path(tmp_path).with_suffix('.txt') + # error_path.write_text('Something went wrong with processing the input file.' + # ' Probably it is not using a global grid.') + # z.write(error_path, error_path.name) + + # # update the current job and store progress + # job.meta['created_files'] += 1 + # job.save_meta() # close and write readme file readme.close() diff --git a/isimip_files_api/tests/__init__.py b/isimip_files_api/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/isimip_files_api/tests/conftest.py b/isimip_files_api/tests/conftest.py new file mode 100644 index 0000000..9d60574 --- /dev/null +++ b/isimip_files_api/tests/conftest.py @@ -0,0 +1,20 @@ +import pytest + +from ..app import create_app + + +@pytest.fixture() +def app(): + app = create_app() + app.config.update({ + 'TESTING': True, + 'INPUT_PATH': 'testing/input', + 'OUTPUT_PATH': 'testing/output', + 'MAX_FILES': 8 + }) + + yield app + +@pytest.fixture() +def client(app): + return app.test_client() diff --git a/isimip_files_api/tests/test_create.py b/isimip_files_api/tests/test_create.py new file mode 100644 index 0000000..31c0459 --- /dev/null +++ b/isimip_files_api/tests/test_create.py @@ -0,0 +1,127 @@ +def test_empty(client): + response = client.post('/', json={}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == {'data': ['No json data provided with POST']} + + +def test_list(client): + response = client.post('/', json=[1, 2, 3]) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == {'data': ['Provided json data is malformatted']} + + +def test_missing(client): + response = client.post('/', json={'foo': 'bar'}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'paths': ['This field is required.'], + 'operations': ['This field is required.'] + } + + +def test_malformatted(client): + response = client.post('/', json={'paths': {'foo': 'bar'}, 'operations': {'foo': 'bar'}}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'paths': ['Provided json data is malformatted.'], + 'operations': ['Provided json data is malformatted.'] + } + + +def test_paths_to_many(client): + response = client.post('/', json={'paths': [ + 'test1.nc', + 'test2.nc', + 'test3.nc', + 'test4.nc', + 'test5.nc', + 'test6.nc', + 'test7.nc', + 'test8.nc', + 'test9.nc' + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'paths': ['To many files match that dataset (max: 8).'], + 'operations': ['This field is required.'] + } + + +def test_paths_below_root(client): + response = client.post('/', json={'paths': [ + '../test.nc' + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'paths': ['../test.nc is below the root path.'], + 'operations': ['This field is required.'] + } + + +def test_paths_not_netcdf(client): + response = client.post('/', json={'paths': [ + 'test.txt' + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'paths': ['test.txt is not a NetCDF file.'], + 'operations': ['This field is required.'] + } + + +def test_paths_not_found(client): + response = client.post('/', json={ + 'paths': [ + 'test.nc' + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'paths': ['test.nc was not found on the server.'], + 'operations': ['This field is required.'] + } + + +def test_operations_not_found(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'specifier': 'invalid' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['operation "invalid" was not found'] + } + + +def test_select_bbox(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'specifier': 'select_bbox', + 'bbox': [-23.43651, 23.43651, -180, 180] + } + ]}) + assert response.status_code == 200 + assert response.json.get('status') == 'ok' + assert response.json.get('errors') is None + + +def test_select_bbox_missing_bbox(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'specifier': 'select_bbox' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['bbox is missing for operation "select_bbox"'] + } diff --git a/isimip_files_api/utils.py b/isimip_files_api/utils.py index dc912b6..ef72745 100644 --- a/isimip_files_api/utils.py +++ b/isimip_files_api/utils.py @@ -1,4 +1,5 @@ import hashlib +import importlib import re from pathlib import Path @@ -43,3 +44,8 @@ def get_hash(paths, args): def mask_cmd(cmd): return re.sub(r'\/\S+\/', '', cmd) + + +def import_class(string): + module_name, class_name = string.rsplit('.', 1) + return getattr(importlib.import_module(module_name), class_name) diff --git a/isimip_files_api/validators.py b/isimip_files_api/validators.py index 4f8c13a..f58f40b 100644 --- a/isimip_files_api/validators.py +++ b/isimip_files_api/validators.py @@ -1,116 +1,80 @@ +from collections import defaultdict from pathlib import Path from flask import current_app as app -from .netcdf import check_resolution, open_dataset +from .operations import OperationRegistry -def validate_data(data, errors): - # check if any data is provided +def validate_data(data): + errors = defaultdict(list) + if (not data) or (data is None): - errors['data'] = 'No json data provided with POST' + errors['data'].append('No json data provided with POST') elif not isinstance(data, dict): - errors['data'] = 'Provided json data is malformatted' - else: - paths = validate_paths(data, errors) - args = validate_args(data, errors) - return paths, args - - -def validate_paths(data, errors): - # check if path is given - if 'paths' not in data: - errors['paths'].append('This field is required.') - return None - - if len(data['paths']) > app.config['MAX_FILES']: - errors['paths'].append('To many files match that dataset (max: {MAX_FILES}).'.format(**app.config)) - return None - - for path in data['paths']: - # prevent tree traversal - try: - input_path = Path(app.config['INPUT_PATH']).expanduser() - absolute_path = input_path / path - absolute_path.parent.resolve().relative_to(input_path.resolve()) - except ValueError: - errors['paths'].append(f'{path} is below the root path.') + errors['data'].append('Provided json data is malformatted') - # check if the file exists - if not absolute_path.is_file(): - errors['paths'].append(f'{path} was not found on the server.') + return errors - # check if the file exists - if absolute_path.suffix not in ['.nc', '.nc4']: - errors['paths'].append(f'{path} is not a NetCDF file..') +def validate_paths(data): + errors = defaultdict(list) - return data['paths'] - - -def validate_args(data, errors): - args = { - 'task': validate_task(data, errors), - 'bbox': validate_bbox(data, errors), - 'point': validate_point(data, errors), - 'country': validate_country(data, errors) - } - if args['task'] in ['select_country', 'mask_country'] and not args['country']: - errors['args'] = 'country needs to be provided' - elif args['task'] in ['cutout_bbox', 'mask_bbox', 'select_bbox'] and not args['bbox']: - errors['args'] = 'bbox needs to be provided' - elif args['task'] in ['select_point'] and not args['point']: - errors['args'] = 'point needs to be provided' - else: - return args - - -def validate_task(data, errors): - app.logger.info(data) - if 'task' not in data: - errors['task'] = 'task needs to be provided' - elif data['task'] not in app.config['TASKS']: - errors['task'] = "task '{task}' is not supported".format(**data) - else: - return data['task'] - - -def validate_bbox(data, errors): - if 'bbox' in data: - try: - return [float(data['bbox'][0]), float(data['bbox'][1]), float(data['bbox'][2]), float(data['bbox'][3])] - except (ValueError, IndexError): - errors['bbox'] = 'bbox is not of the form [%f, %f, %f, %f]' - else: - return None - - -def validate_point(data, errors): - if 'point' in data: - try: - return [float(data['point'][0]), float(data['point'][1])] - except (ValueError, IndexError): - errors['bbox'] = 'bbox is not of the form [%f, %f]' + if not data.get('paths'): + errors['paths'].append('This field is required.') + elif not isinstance(data['paths'], list): + errors['paths'].append('Provided json data is malformatted.') else: - return None - - -def validate_country(data, errors): - if 'country' in data: - country = data['country'].lower() - - if country.upper() not in app.config['COUNTRYMASKS_COUNTRIES']: - errors['country'] = 'country not in the list of supported countries (e.g. DEU)' - - return country + if len(data['paths']) > app.config['MAX_FILES']: + errors['paths'].append('To many files match that dataset (max: {MAX_FILES}).'.format(**app.config)) + else: + for path in data['paths']: + # prevent tree traversal + try: + input_path = Path(app.config['INPUT_PATH']).expanduser() + absolute_path = input_path / path + absolute_path.parent.resolve().relative_to(input_path.resolve()) + except ValueError: + errors['paths'].append(f'{path} is below the root path.') + else: + # check if the file exists + if absolute_path.suffix not in ['.nc', '.nc4']: + errors['paths'].append(f'{path} is not a NetCDF file.') + # check if the file exists + elif not absolute_path.is_file(): + errors['paths'].append(f'{path} was not found on the server.') + + return errors + + +def validate_operations(data): + errors = defaultdict(list) + + if not data.get('operations'): + errors['operations'].append('This field is required.') + elif not isinstance(data['operations'], list): + errors['operations'].append('Provided json data is malformatted.') else: - return None - - -def validate_datasets(paths, args, errors): - for path in paths: - input_path = Path(app.config['INPUT_PATH']).expanduser() - absolute_path = input_path / path - with open_dataset(absolute_path) as ds: - resolutions = app.config['RESOLUTION_TAGS'].get(args.get('task')) - if not any(check_resolution(ds, resolution) for resolution in resolutions): - errors['paths'].append(f'{path} is not using the correct grid: {resolutions}.') + operation_registry = OperationRegistry() + for index, operation_config in enumerate(data['operations']): + if 'specifier' in operation_config: + operation = operation_registry.get(operation_config) + if operation is None: + errors['operations'].append('operation "{specifier}" was not found'.format(**operation_config)) + else: + operation_errors = operation.validate() + if operation_errors: + errors['operations'] += operation_errors + else: + errors['operations'].append(f'operation [{index}] does not have a specifier') + + return errors + + +# # def validate_datasets(paths, args, errors): +# # for path in paths: +# # input_path = Path(app.config['INPUT_PATH']).expanduser() +# # absolute_path = input_path / path +# # with open_dataset(absolute_path) as ds: +# # resolutions = app.config['RESOLUTION_TAGS'].get(args.get('task')) +# # if not any(check_resolution(ds, resolution) for resolution in resolutions): +# # errors['paths'].append(f'{path} is not using the correct grid: {resolutions}.') diff --git a/run.sh b/run.sh index 5e8fe1c..a971ffd 100755 --- a/run.sh +++ b/run.sh @@ -1,9 +1,11 @@ #!/bin/bash # set environment variables for `flask run` and `rq worker` for development -# in productione, the variables should be set in systemd or docker files +# in production, the variables should be set in systemd or docker files export FLASK_APP=isimip_files_api.app export FLASK_ENV=development +export FLASK_DEBUG=true +export FLASK_CONFIG=../config.toml export RQ_WORKER_CLASS=isimip_files_api.worker.Worker case $1 in diff --git a/testing/input/constant.nc b/testing/input/constant.nc new file mode 100644 index 0000000000000000000000000000000000000000..5e4a2310444434180037eec8f304d634b9f29bd9 GIT binary patch literal 243090 zcmeI53zQVsdFN|p01aq_nxF@=EKx!djKo9DLuil$Y92ET5|0)j7|F8e5i`=TLC^FmJDrV^WwW+C_U2^C9wqMMoK?`S6t{h} z3MaPXt*ZOGLl-nOubJtY`)>rlx^=%>RrmK*_cvWtXY`Am>pHHQ_MT~t&CQ;D{e;)Y zzuXk9O48A?bltYY0QGeXlnuKvSc4dW3Da+!4k# zc`d=8AavKh-Ai{J>_4z)cfXei4lNw*dA`>e{2OjDkqFzLHV5df4jaMCtX&oc1+QHO zW`O5?;v0wOMTdWt*U)fH*w@(5(9lDWJ&vYp!tKxSAMlKjl(+0kZ`n_R9ZjAd_B728 z*QY-E<$;^NaP2xfuPeO_Dt-WG;j?*WxPzv~#$0s7!E$hPQQyym9d@_FYr~c2{c4$S zCwIO>Q1jaFzqEYI+RlfjTptc>n1n}fQvaT3dcViUQa}6k_Db*J>N%TsCBlI%4~Khh z@}>uK3DdQCZTi6eZ0~{oJ-r9LaLS8pgIA&;ETYmn_)V+rPW#sg=tXtyr{l zVNdUKy?gg(pXuG#zo4(@sk>J$UbJ}0q9xz|*4($w%+U}>ZCNu%cUI3)u{AiL;I%b6 z*F0|kHvd$(wyB}KIHczdUT5eHgtRHLYt2bUL~UGjj=uWqvtJHXoBeH{Y9>vNsSORw!oiJAr``*F zY}NX<22NjJGY_%%%9YoJ@1MrT@FSridJBYXh~C=GH-^X1)Hvyt{&2yZ)#2{KLkN%F zzR|82PKP}xOC#hJ{Ct5aw9E}ZJBH8GgZ}fdC;B`e--#7W)zNruXr|69p zok9(h%=;&p+TQ*9el-g6yeagN(V(xp|N1Fr>$Z=k*YsTXfgkejT(EfQ6Tz0E*_aQ> z&Cy$G*}6T??CCk?@vGc&7I!c9a}ro(Fxq>+0XxAYj!%p z`!RaiH)!-;2-n!t#&i##do<6AVA$|Tv+3604VG)M175zh`S|H)c#aQ;Q)l>(ANsBT z5=Bi7-v?I(gL_(Y)66zbMQvKJd1c48;N9ZcSumMkkMY+eFCBc6wgxLrPl@w6dd~-I zEw5eE_|BT`8#@AD9|$9FH6LdikN64ic(@|?c%B;ImCydG?|nU3@PeI(^L^(dvv2cu zhAXItum00Z_j^0TA@hQ_TYzH&cWw1v2qR{jwC;NG=RV?nCR`B=36H0@W!0~GiEzmJ zpwtIgmznx$?=#_wdnnj@KJ~}uEwb11LLJw=^Y)*5(R+Dy&_8U9&1 z8oqwV!T-?YT@!pQ2!3YYMa5gYfB%7=J^Q-*gP%5IUk`$x=)<>c__7R_n}Sb)=m*uc z8&_}J78U;R<2d}NUbkUW`{r#Mwrt+DZSCrf?Lp{d6S{fzruHZ@{4@w7!=H_V?&*9( zIcM<=!h_H&gJTYlx?|7Yy*s-1KHFRMi$(Y~#2&1r{elthF51R3dk!Alv+wC$&t3Fp zqWPcs15o$@YG>CJe6B@d!A~nspZXcehHMW644Zs*-=2Q+qyBr@LQnUR6rXZNnB5A;60KlpKF zUz2yWmki}?ZSQE`+P-;h`>u@}wr%&82k~L*db<0&5B3H>Er#D`lp@@Y~R}69)2mXKOtF0Ij?e{#aXAV3{R)v#Sb;6p?_sS z24p}6WIzTcoPohDjjO{+mAVX5Pa5Wb+Az508TDsAU|9UHq5ms}ga6sE^xKB1KQzq0 zZ0HpYGyl!7_-#Y~b;H5`ZCLte!_-N`d_z;TU++r8%r%C^n+*MVhJ#7N(h9@WTEqNC zLvM#+W|v{H&(J?$IQX-MrJpxUeatZbNki{5hMCVB7Qbldf7NjCuMA7yG)(=SVg9>@ z-dl#5|7TeIH$%U9QZ&E88HT0z7^ZGB%r7+b?lH`y42v5K{cVPWj~kYH4O9CK^8<$7 z3x=5&4U3;J^gm@d_`8Osf??_v!~9N>9IuCRn=seK-K%WEp9MI>0 zJ_qzUFg`yA&i#Bv@MKQ(vXANvKOJH24w=s2U;C`d@QSl61jB|$noWn#SCkjp@V?hu zR-XQRMVZHhPCYsCzjiKr^7Q8`%E4m@;p@og2{rQUJJX&L) ztFV0Y?mzxM&sV$zW5WOzW z!1+C2VV`}_XI#S;D1Igj6RansSL3p5ZtUtynv zjmG}Tv!5+r((@JJJ9hY~fQxy)BK$}VZ`%eB9h~pu6=t8~;qi*_^OMOvy{G@#p5OtB zs?S*1xQ)S+7sT<%%Bcq}TG&=kZ}6Bz@4;QY-3R-JA6Mxf*kc~Y2;GEx>>7`t$7V_@K^Vsxy1unTs=HrNVh!De`}x9Ekh6Yw}Z29Lrc@Gv|C2VfuUf}OApw!&Gk z8J_H6|L{0G29Lrc@Gv|C2VfuUf}OApw!&Gk8J^tD{^4uocdN&G2M5`-jKjF?bXnfrsHCH~{-# z7wm*>uocdN&G2M~{lnw%7(5D(z{Btm9DseW3wFXb*a~OCW?0^3_CEv*FbA_R4U;ed z%TF*K7GMr$VHzf30+!#;cvyfrn1yMWgb7%Fobj*#b1)0jFbNZ|{65CR0?ffIOv5Bh z!17~^hXt5}S(t`Nn1JO+84n9E2eU8@lQ03xU5tkXn1flEhDn%!<(-U&1(<_bn1)H1 zfaM*GhXt5}S(t`Nn1JO+7!M0D2eU8@lQ03x+ZhiFFbA_R4U;ed%i9Fb$J1 z0n1w%4+}5{voH;lFagUCGaeRT4rXB*CSd}W(~O4&n1flEhDn%!r7dQB9%i5qOPlG3 z8R)~(Ci-Cp`mnT-ewcwiEImX&%s?NO9;6>;pbtwM=!Y5T!%`>xFav#9T2DXBKp&Pm z=!Y5T!%{o_Fav#9T1P+3Kp&RY(hoDxhov?2!wmFcX*K;Y1ASOZ(GN4whs6g>e+m{? z(T2s9v|;gn+OT*ZZCJdQHY~274U28GVeuZ?u(+HyEZ$8U7MIb6#ig`iaS3f$Tud7l zleFRBU8X&_2rh&R;Cy%|yaTqvd2lYg9o`0Sg>&F7&^PpE!<*oZ@CJB2ycW)cGvIVM z4NifL@JD~f=fayO0-+@1ZjVaSV1x|z0;S4wvUJI{>H^3X=O>nm1pbu|> zbKtG;Hh4Rn3+KUBcn7=_&W8)&LbwRtWtd7D7GY{JZJ1g@8>W`hhN)$=Vd`$$FtwaE zOx;5prrK!3)C$@#buVq0x{o$Y-A@~)R?>#4RkUI10ou?{nRW)|VF~)H>4$k(g8mx% zVIG#Czm|TOhb8E*qaWsB3Ht5y!#pfOzk`05hb8E*ryu5F3HqJ%!#pfOe*^t64@=O0 zkbanlCFnmyKg`1t^f%HE^RNW{P4vS&EJ1%W{V)$p(BDEo%)=5)q)mSkrePN5U;z%n zGE6+oc$kJ+n1cm41j{h7mGLkQvoHq>a0r%RVjJUO8fIY*7T^#p!^C#R!!*po94x>g zScZv57!T7h3v;jlhhP~db}$~MVHW0K0S>`3OzdPlOv5b9!2%qDWtiwa0r%R;xWdPQMF&?I27Up094#6^P&Y1DDU@L5c zov;h`!2x&(9)?HYQFsg zhpn&;cET>$2M6FGco-gmN8vGe9G-wDVe{^k7rth}R@eqRVHfO!1Mm<$43EI0@EAM} zPr#F~xrhD3R@eqRVHfO!1Mm<$43EI0@EAM}Pr#F~xtIOJR@eqRVHfO!1Mm<$43EI0 z@EAM}Pr#F~`6>1fTVWgQgk7)?4m_1Q_x<|d*C#|T`}-7z@3v1e>+Qc++Jzbnh7FH2 zn-1^SKflQid+Gm7dhPT(@y|bGdQaVn|M5@He%9QHx5vj}*1QwH`UKvW3ZXa zb0@yL|9tPnn|+RlJMqndo$zRb-?ntF8}T-5<8as+_u;Q%OZGnezWoRK`_6tN{?XC6 z5pPRUaAXGWH6Ry`ps56{w880%fmAopuE)W3OP@ zD_W;r0n6AcV9;JMvschE_6i!bSKRCsxXgxB^xp8Hy<%st;AQL;JbOj&v@3uadj-&5 z5$rJAvsVx^w0&r=INB?anRfOM?G;OV1v9gberT_F+AE-$we-W%8q?0h4D_MB;%cwJ zW>%YcAKELn_6lw$ML#S)VA{i1fLGCm#g(*S@qXH{cpq(8yq7jCuAmKzZM0$W9@?W$zd&kG#0kU_5>>Z-U zqKTUVr@`rP2Am16h1bIy;EnJmINNa0hqu5v@K$&mydBPk^I$8y1KtVe!v%05TmX_$pM zSb#&Y3=`WJ57RIUbFcu1U>PR1Gaja47Up094#6@^Ji>UGhFO?{1vmuDFtLO2Fb%Ui ziqUVO-rxnV3;NF76pjd9mf><^@V6oO+p;d?<@mLIEqlYQhTr=V;qS0J^*aWm-(hz{ zu&+e$_xj*(bMQC(1%UQ=EW_h@r6>3&cuflaR<LHc=~!eA)H(xBQ#6 zo|n1WE{QTA12P~3G9UvoAOkWW12P~3G9UvNY9RVQg*2S`KNNlUiu2o2;p5@8Y0tCq zD{2hOe#pezu-z>5apA%K&l2r*02?RDfDFih49I{C$bbyUfDDYifq^I9{_}>bg8Oz& z`SoA?(2rj{{Ess`$Ns2v1Tr84G9UvZV_@*E>qch7<6*1$SI5Iqj?giFqOI?aweP6v zkC0w%Rt9812I|s)s(xLjWfTrdReuyDYZo#g12Rx&22}OyEFq(III8-irdm6a0U3~i zx-p=tUpJ{3->s_tn5RGoAOkWW1EX$0Re#i#*U1i5_3I=NIt&?* z0T~!Y1FHI?D7&t9rK(?7>CnN*fDFjMs2EVy9~I4YxD!?VI!uZVNd{y<21eL`s{RNI zuG`J4>ep>*bYLtKLS$6#U@qt$3>cS zkTM_xGH_W9sOn!r^A*38IXZXFrcb`3G&AEP*weLojx7B49I{CT$BM-{fp8z0S2k+ zPkLZ(%dk^vczfpZ#A)jy}63EZiwKY^30smXv0$iP_)sOq0Z%!FU9 zsz2dVtO?4149LJ422}OWP~yl6ReeVit!c`D49Gx*0ag798BQ&#>N}NgO;!eEKn5ZM zs`{>;2&($7W?u7_0U2=FfU3T$*;mzfHS?Oc49I}X22}N3&AzI>tC`olWk3d8HlV8S zYW7w2UCq4aEdw&(vH?|nSF^9G?`q~XZyAsQmkp@uyPADfeOEKDdCPzdxNJaG-_`7^ z>bshG&07X!z-0rf`mSbQRo~UjYu+*-11=j-)ps@fs`{>GUh|d#8F1Nvs=lk)SJihl z^P0B|$bicRRP|lWzN)^fnb*8!Kn7ekpsMd`_Eq&=&AjF<12W*U0ablhv#+Y}YUVX> z8IS>&4XEn7ntfG$S2M49%YY2HY(Q1t)$FV4yPA2;TLxsnWdo}Eu4Z3V-_^`(-ZCHq zE*ntQcQyN}`mSbP^OgY_aM^&WzN^_+)ps@XnzszdfXfC{^ z?5pa#nt9Dz24uix1FHJ2W?xm`)y!+&G9Uvk8&K7EHT$aiu4Z2ImH`=X*?_9PtJzo8 zcQx~xw+zUD%LY{SUCq9#zN?wnyk$TJTsEMp?`rl{^bshKRee`8uX)RW47hASRo~U@tLnR&dCgk}WWZ$us`{>GUsd1L%xm5< zAOkKNP}O%e`>Oh`W?u7_0U2=FfU3T$*;mzfHS?Oc49I}X22}N3&AzI>tC`olWk3d8 zHlV8SYW7w2UCq4aEdw&(vH?|nSF^9G?`q~XZyAsQmkp@uyPADfeOEKDdCPzdxNJaG z-_`7^>bshG&07X!z-0rf`mSc*RQ>H++uIww`Cj4Ujgg3-@Ot+O<7Gt+J;e?)&c@$c zBmU*h@%Z~{#Q(vLc>Mh};(zOjc>Ky5@rR#^$FHgp|FM1X_y=ml|NL|D_*9Mf=RXjS zUtJ@<{Hb{Sni}zMd@dfpwnqHxe-w{jS0n!0FURBCYs4S>T0Fj^M*P?QDjvVSM*Npv zi^q4?i2wZG#^X2Ch|m8+JpREN@h`m*kAJ8}{73&e9>1|h{0r~K<2TiaANaR;{N@_* z`=&HS5%&GQrAB29le8=2){I(kL_b-aaZ?6%*WO+RP zks9%>tK#uHYQ*2%5s%+lBmSDr@%XM9@zZw1;~%XN?>!NZf2>CQyS?%F_tl7h>zR1` z<2B;H`)oY^{Wao`|HpXz6E)&r`-OP?t{U;L{!%J+4#N&vu4PAe0a?^K$HO)aK?c7{;lMvonXiemEXK>CBMWns4Xh@sne{XEu%gA99zV5mV^x;($KvtRs>D>TdTA&ge`RH7mF15fkH@!E ziK$%mLMa}9Rb^+D<$)9N_~}(*Dp&0*$K$WA?5wifb21*Es1j4T>M_rp@%H?mQQ28# zIo%wO|EVf5m8&`u@%WjQomH0apB0b4rb`?T^RLuIQ__`0CHZq3x(N}HpH$EAUzpYwO#k!AwIv#&}MPIeWL%$o3pIa@cV%-P65Rac% z(N}G;|10tM)@nf&>;B!J#pCa&=&QDv`LFT#JF5j%tlRO;c>Mf|zG{mb|7SdYLA9WY zb!)y8k6&2PS8cKFpW^Y0ss&Z7TlBqn{9P4&)fR92mw0@#T2RHh_xvOtzqq2W+G660 z_|I2Mss&Z7n{stLerZKtwZ$J@7mr_7EvRDM4{naf-(AsHZSln1c>MBeK^5!%VNpE( zo{GL|i+{5`9^Y0isAAn;uZqXly53zeREw9vz`zr4|9M03LtArGe*G6e^y3!~|KrTg z%dlB(LIz|&24tWv4d|(Zx=hO`9F(3l90keRg$&4m4AhwcRsA|k$fzBTs{W{{){bOA z24tXa45;eYO)5tDKveZdS-5s512P~3bzneMzYdZxCWoM^KPK9>QyGu}85mOos`_K9 ze++l7sy~MEwQCuW0T~!01FHICB!0|ytExZdDbNAPfDFjMs2fn#A9dw*vO`tep2|bTBd?12Qlw22}M&MROhQL{+~IlcGbC0U3~i5jLQz zKf;3RcJr$Gb(F{Jg24r9a45;dlfYfoZNmc!E zktQ9a49I{CTvh|B`j=JbINF%1{y0jO4pjzZKn5<80ag9WBy!wsMpb{@rA!Ab12P~3 zmux^)|B@As(+#NVkJH5Iuw_67WZ)7EsOn#Wym38LRexNkPX{jpG9UvNWk6N`qO?tb zL8|%_Ac>lS49I{CT%Z9}{RQAtQYC19? z12S+P22}OWL(+r{QPrQ2Y1O1;Kn7&soCZ|&kucdF`7;N)s*G9Uvoa25lq`ezX{ z;a98bPxusTf-)cjGH`|gRsA!RII==j-;qRXnlc~*GEiYaRlh=pQ;VwlPNiFul>r%$ zfyjWWzN;sKs=lk4*Suvw23$6vs_$y{RrOuXyyh(fGT^cSRee{pud45A<~45_kO7ws zsOr0#eN}x|Gp~8efDE{7Kvmz>?5pa#nt9Dz24uix1FHJ2W?xm`)y!+&G9Uvk8&K7E zHT$aiu4Z2ImH`=X*?_9PtJzo8cQx~xw+zUD%LY{SUCq9#zN?wnyk$TJTsEMp?`rl{ X^ Date: Tue, 13 Feb 2024 18:04:10 +0100 Subject: [PATCH 06/22] Add commands and finish operations --- isimip_files_api/app.py | 25 +---- isimip_files_api/cdo.py | 101 -------------------- isimip_files_api/commands/__init__.py | 42 +++++++++ isimip_files_api/commands/cdo.py | 50 ++++++++++ isimip_files_api/commands/ncks.py | 32 +++++++ isimip_files_api/config.py | 16 +++- isimip_files_api/jobs.py | 6 +- isimip_files_api/nco.py | 29 ------ isimip_files_api/operations/__init__.py | 25 +++-- isimip_files_api/operations/cdo.py | 94 ++++++++++--------- isimip_files_api/operations/ncks.py | 26 ++++++ isimip_files_api/operations/nco.py | 23 ----- isimip_files_api/tasks.py | 118 +++++++++++++----------- isimip_files_api/utils.py | 34 +------ isimip_files_api/validators.py | 7 +- 15 files changed, 305 insertions(+), 323 deletions(-) delete mode 100644 isimip_files_api/cdo.py create mode 100644 isimip_files_api/commands/__init__.py create mode 100644 isimip_files_api/commands/cdo.py create mode 100644 isimip_files_api/commands/ncks.py delete mode 100644 isimip_files_api/nco.py create mode 100644 isimip_files_api/operations/ncks.py delete mode 100644 isimip_files_api/operations/nco.py diff --git a/isimip_files_api/app.py b/isimip_files_api/app.py index 9ddaf2b..6510417 100644 --- a/isimip_files_api/app.py +++ b/isimip_files_api/app.py @@ -1,10 +1,9 @@ - from flask import Flask, request import tomli from flask_cors import CORS as FlaskCORS -from .jobs import count_jobs, delete_job, fetch_job +from .jobs import count_jobs, create_job, delete_job, fetch_job from .logging import configure_logging from .responses import get_errors_response from .validators import validate_data, validate_operations, validate_paths @@ -49,27 +48,7 @@ def create(): app.logger.debug('errors = %s', errors) return get_errors_response(errors) - from .operations import OperationRegistry - - commands = [] - operation_registry = OperationRegistry() - for index, operation_config in enumerate(data['operations']): - operation = operation_registry.get(operation_config) - - if not commands or commands[-1]['agent'] != operation.agent: - commands.append({ - 'agent': operation.agent, - 'command': operation.get_command(), - 'command_args': operation.get_command_args(), - 'operation_args': operation.get_operation_args() - }) - else: - commands[-1]['operation_args'] += operation.get_args() - - print(commands) - - # return create_job(data['paths'], data['operations']) - return {'status': 'ok'}, 200 + return create_job(data) @app.route('/', methods=['GET']) def detail(job_id): diff --git a/isimip_files_api/cdo.py b/isimip_files_api/cdo.py deleted file mode 100644 index ecb3f5d..0000000 --- a/isimip_files_api/cdo.py +++ /dev/null @@ -1,101 +0,0 @@ -import csv -import logging -import subprocess -from pathlib import Path - -from flask import current_app as app - -from .netcdf import get_index -from .utils import mask_cmd - - -def mask_bbox(dataset_path, output_path, bbox): - # cdo -f nc4c -z zip_5 -masklonlatbox,WEST,EAST,SOUTH,NORTH IFILE OFILE - south, north, west, east = bbox - return cdo('-f', 'nc4c', - '-z', 'zip_5', - f'-masklonlatbox,{west:f},{east:f},{south:f},{north:f}', - str(dataset_path), - str(output_path)) - - -def mask_country(dataset_path, output_path, country): - # cdo -f nc4c -z zip_5 -ifthen -selname,m_COUNTRY COUNTRYMASK IFILE OFILE - mask_path = Path(app.config['COUNTRYMASKS_FILE_PATH']).expanduser() - return cdo('-f', 'nc4c', - '-z', 'zip_5', - '-ifthen', - f'-selname,m_{country.upper():3.3}', - str(mask_path), - str(dataset_path), - str(output_path)) - - -def mask_landonly(dataset_path, output_path): - # cdo -f nc4c -z zip_5 -ifthen LANDSEAMASK IFILE OFILE - mask_path = Path(app.config['LANDSEAMASK_FILE_PATH']).expanduser() - return cdo('-f', 'nc4c', - '-z', 'zip_5', - '-ifthen', - str(mask_path), - str(dataset_path), - str(output_path)) - - -def select_point(dataset_path, output_path, point): - # cdo -s outputtab,date,value,nohead -selindexbox,IX,IX,IY,IY IFILE - ix, iy = get_index(dataset_path, point) - - # add one since cdo is counting from 1! - ix, iy = ix + 1, iy + 1 - - return cdo('-s', - 'outputtab,date,value,nohead', - f'-selindexbox,{ix:d},{ix:d},{iy:d},{iy:d}', - str(dataset_path), - output_path=output_path) - - -def select_bbox(dataset_path, output_path, bbox): - # cdo -s outputtab,date,value,nohead -fldmean -sellonlatbox,WEST,EAST,SOUTH,NORTH IFILE - south, north, west, east = bbox - return cdo('-s', - 'outputtab,date,value,nohead', - '-fldmean', - f'-sellonlatbox,{west:f},{east:f},{south:f},{north:f}', - str(dataset_path), - output_path=output_path) - - -def select_country(dataset_path, output_path, country): - # cdo -s outputtab,date,value,nohead -fldmean -ifthen -selname,m_COUNTRY COUNTRYMASK IFILE - mask_path = Path(app.config['COUNTRYMASKS_FILE_PATH']).expanduser() - return cdo('-s', - 'outputtab,date,value,nohead', - '-fldmean', - '-ifthen', - f'-selname,m_{country.upper():3.3}', - str(mask_path), - str(dataset_path), - output_path=output_path) - - -def cdo(*args, output_path=None): - cmd_args = [app.config['CDO_BIN'], *list(args)] - cmd = ' '.join(cmd_args) - env = { - 'CDI_VERSION_INFO': '0', - 'CDO_VERSION_INFO': '0', - 'CDO_HISTORY_INFO': '0' - } - - logging.debug(cmd) - output = subprocess.check_output(cmd_args, env=env) - - if output_path: - with open(output_path, 'w', newline='') as fp: - writer = csv.writer(fp, delimiter=',') - for line in output.splitlines(): - writer.writerow(line.decode().strip().split()) - - return mask_cmd(cmd) diff --git a/isimip_files_api/commands/__init__.py b/isimip_files_api/commands/__init__.py new file mode 100644 index 0000000..61388ba --- /dev/null +++ b/isimip_files_api/commands/__init__.py @@ -0,0 +1,42 @@ +from flask import current_app as app + +from ..utils import import_class + + +class CommandsRegistry: + + def __init__(self): + self.commands = {} + for python_path in app.config['COMMANDS']: + command_class = import_class(python_path) + self.commands[command_class.command] = command_class + + def get(self, command): + if command in self.commands: + return self.commands[command]() + + +class BaseCommand: + + def __init__(self): + self.operations = [] + + def execute(self, input_path, output_path): + raise NotImplementedError + + + def get_suffix(self): + # loop over operations and take the first one + for operation in self.operations: + suffix = operation.get_suffix() + if suffix is not None: + return suffix + + def get_region(self): + # loop over operations concat the regions with a hyphen + regions = [] + for operation in self.operations: + region = operation.get_region() + if region is not None: + regions.append(region) + return '-'.join(regions) diff --git a/isimip_files_api/commands/cdo.py b/isimip_files_api/commands/cdo.py new file mode 100644 index 0000000..7cb775f --- /dev/null +++ b/isimip_files_api/commands/cdo.py @@ -0,0 +1,50 @@ +import csv +import subprocess + +from flask import current_app as app + +from ..utils import mask_paths +from . import BaseCommand + + +class CdoCommand(BaseCommand): + + command = 'cdo' + + def execute(self, input_path, output_path): + write_csv = (self.get_suffix() == '.csv') + + # use the cdo bin from the config, NETCDF4_CLASSIC and compression + cmd_args = [app.config['CDO_BIN'], '-f', 'nc4c', '-z', 'zip_5'] + + # collect args from operations + for operation in self.operations: + operation.input_path = input_path + operation.output_path = output_path + cmd_args += operation.get_args() + + # add the input file + cmd_args += [str(input_path)] + + # add the output file + if not write_csv: + cmd_args += [str(output_path)] + + # join the cmd_args and execute the the command + cmd = ' '.join(cmd_args) + app.logger.debug(cmd) + output = subprocess.check_output(cmd_args, env={ + 'CDI_VERSION_INFO': '0', + 'CDO_VERSION_INFO': '0', + 'CDO_HISTORY_INFO': '0' + }) + + # write the subprocess output into a csv file + if write_csv: + with open(output_path, 'w', newline='') as fp: + writer = csv.writer(fp, delimiter=',') + for line in output.splitlines(): + writer.writerow(line.decode().strip().split()) + + # return the command without the paths + return mask_paths(cmd) diff --git a/isimip_files_api/commands/ncks.py b/isimip_files_api/commands/ncks.py new file mode 100644 index 0000000..9c19d07 --- /dev/null +++ b/isimip_files_api/commands/ncks.py @@ -0,0 +1,32 @@ +import subprocess + +from flask import current_app as app + +from ..utils import mask_paths +from . import BaseCommand + + +class NcksCommand(BaseCommand): + + command = 'ncks' + + def execute(self, input_path, output_path): + # use the ncks bin from the config + cmd_args = [app.config['NCKS_BIN']] + + # add the arguments from the operations + for operation in self.operations: + operation.input_path = input_path + operation.output_path = output_path + cmd_args += operation.get_args() + + # add the input file and output file + cmd_args += [str(input_path), str(output_path)] + + # join the cmd_args and execute the the command + cmd = ' '.join(cmd_args) + app.logger.debug(cmd) + subprocess.check_output(cmd_args) + + # return the command without the paths + return mask_paths(cmd) diff --git a/isimip_files_api/config.py b/isimip_files_api/config.py index ef1b73b..795a6b6 100644 --- a/isimip_files_api/config.py +++ b/isimip_files_api/config.py @@ -33,11 +33,23 @@ # maximal number of files to process in one job MAX_FILES = 32 +# list of commands which can be executed +COMMANDS = [ + 'isimip_files_api.commands.cdo.CdoCommand', + 'isimip_files_api.commands.ncks.NcksCommand' +] + # list of operations which can be performed OPERATIONS = [ - 'isimip_files_api.operations.cdo.OutputtabOperation', + 'isimip_files_api.operations.cdo.SelectBBoxOperation', + 'isimip_files_api.operations.cdo.SelectCountryOperation', + 'isimip_files_api.operations.cdo.SelectPointOperation', + 'isimip_files_api.operations.cdo.MaskBBoxOperation', + 'isimip_files_api.operations.cdo.MaskCountryOperation', + 'isimip_files_api.operations.cdo.MaskLandonlyOperation', 'isimip_files_api.operations.cdo.FldmeanOperation', - 'isimip_files_api.operations.cdo.SelectBBoxOperation' + 'isimip_files_api.operations.cdo.OutputtabOperation', + 'isimip_files_api.operations.ncks.CutOutBBoxOperation' ] # list of tasks which can be performed diff --git a/isimip_files_api/jobs.py b/isimip_files_api/jobs.py index 127362f..e5ac69b 100644 --- a/isimip_files_api/jobs.py +++ b/isimip_files_api/jobs.py @@ -23,15 +23,15 @@ def count_jobs(): 'scheduled': queue.scheduled_job_registry.count } -def create_job(paths, operations): +def create_job(data): redis = Redis.from_url(app.config['REDIS_URL']) - job_id = get_hash(paths, operations) + job_id = get_hash(data) try: job = Job.fetch(job_id, connection=redis) return get_response(job, 200) except NoSuchJobError: - job = Job.create(run_task, id=job_id, args=[paths, operations], + job = Job.create(run_task, id=job_id, args=[data['paths'], data['operations']], timeout=app.config['WORKER_TIMEOUT'], ttl=app.config['WORKER_TTL'], result_ttl=app.config['WORKER_RESULT_TTL'], diff --git a/isimip_files_api/nco.py b/isimip_files_api/nco.py deleted file mode 100644 index a2d16a7..0000000 --- a/isimip_files_api/nco.py +++ /dev/null @@ -1,29 +0,0 @@ -import logging -import subprocess - -from flask import current_app as app - -from .utils import mask_cmd - - -def cutout_bbox(dataset_path, output_path, bbox): - # ncks -O -h -d lat,SOUTH,NORTH -d lon,WEST,EAST IFILE OFILE - south, north, west, east = bbox - return ncks( - '-O', # overwrite - '-h', # omit history - '-d', f'lat,{south:f},{north:f}', # longitude - '-d', f'lon,{west:f},{east:f}', # latitude - str(dataset_path), # input - str(output_path) # output - ) - - -def ncks(*args): - cmd_args = [app.config['NCKS_BIN'], *list(args)] - cmd = ' '.join(cmd_args) - - logging.debug(cmd) - subprocess.check_output(cmd_args) - - return mask_cmd(cmd) diff --git a/isimip_files_api/operations/__init__.py b/isimip_files_api/operations/__init__.py index 723413f..cfe9efb 100644 --- a/isimip_files_api/operations/__init__.py +++ b/isimip_files_api/operations/__init__.py @@ -6,16 +6,14 @@ class OperationRegistry: def __init__(self): - from flask import current_app as app - self.operations = {} for python_path in app.config['OPERATIONS']: operation_class = import_class(python_path) - self.operations[operation_class.specifier] = operation_class + self.operations[operation_class.operation] = operation_class def get(self, config): - if 'specifier' in config and config['specifier'] in self.operations: - return self.operations[config['specifier']](config) + if 'operation' in config and config['operation'] in self.operations: + return self.operations[config['operation']](config) class BaseOperation: @@ -24,7 +22,16 @@ def __init__(self, config): self.config = config def validate(self): - pass + raise NotImplementedError + + def get_args(self): + raise NotImplementedError + + def get_suffix(self): + return None + + def get_region(self): + return None class BBoxOperationMixin: @@ -44,7 +51,7 @@ def validate_bbox(self): except (ValueError, IndexError): return ['bbox is not of the form [%f, %f, %f, %f]'] else: - return [f'bbox is missing for operation "{self.specifier}"'] + return [f'bbox is missing for operation "{self.operation}"'] class PointOperationMixin: @@ -62,7 +69,7 @@ def validate_point(self): except (ValueError, IndexError): return ['bbox is not of the form [%f, %f]'] else: - return [f'point is missing for operation "{self.specifier}"'] + return [f'point is missing for operation "{self.operation}"'] class CountryOperationMixin: @@ -75,4 +82,4 @@ def validate_country(self): if self.get_country() not in app.config['COUNTRYMASKS_COUNTRIES']: return ['country not in the list of supported countries (e.g. DEU)'] else: - return [f'country is missing for operation "{self.specifier}"'] + return [f'country is missing for operation "{self.operation}"'] diff --git a/isimip_files_api/operations/cdo.py b/isimip_files_api/operations/cdo.py index 2f9cc3c..13d7bc3 100644 --- a/isimip_files_api/operations/cdo.py +++ b/isimip_files_api/operations/cdo.py @@ -2,46 +2,18 @@ from flask import current_app as app +from ..netcdf import get_index from . import BaseOperation, BBoxOperationMixin, CountryOperationMixin, PointOperationMixin class CdoOperation(BaseOperation): - agent = 'cdo' - - def get_command(self): - return [app.config['CDO_BIN']] - - def get_cmd_args(self): - return ['x', 'y'] - - def get_env(self): - return { - 'CDI_VERSION_INFO': '0', - 'CDO_VERSION_INFO': '0', - 'CDO_HISTORY_INFO': '0' - } - - # def execute(*args, output_path=None): - # cmd_args = [app.config['CDO_BIN'], *list(args)] - # cmd = ' '.join(cmd_args) - # env = - - # app.logger.debug(cmd) - # output = subprocess.check_output(cmd_args, env=env) - - # if output_path: - # with open(output_path, 'w', newline='') as fp: - # writer = csv.writer(fp, delimiter=',') - # for line in output.splitlines(): - # writer.writerow(line.decode().strip().split()) - - # return mask_cmd(cmd) + command = 'cdo' class SelectBBoxOperation(BBoxOperationMixin, CdoOperation): - specifier = 'select_bbox' + operation = 'select_bbox' def validate(self): return self.validate_bbox() @@ -50,10 +22,14 @@ def get_args(self): south, north, west, east = self.get_bbox() return [f'-sellonlatbox,{west:f},{east:f},{south:f},{north:f}'] + def get_region(self): + south, north, west, east = self.get_bbox() + return f'lat{south}to{north}lon{west}to{east}' + class SelectCountryOperation(CountryOperationMixin, CdoOperation): - specifier = 'select_country' + operation = 'select_country' def validate(self): return self.validate_country() @@ -63,27 +39,33 @@ def get_args(self): mask_path = str(Path(app.config['COUNTRYMASKS_FILE_PATH']).expanduser()) return ['-ifthen', f'-selname,m_{country:3.3}', mask_path] + def get_region(self): + return self.get_country().lower() + class SelectPointOperation(PointOperationMixin, CdoOperation): - specifier = 'select_point' + operation = 'select_point' def validate(self): return self.validate_point() - # def cmd_args(self): - # # cdo -s outputtab,date,value,nohead -selindexbox,IX,IX,IY,IY IFILE - # ix, iy = get_index(dataset_path, point) + def get_args(self): + point = self.get_point() + ix, iy = get_index(self.input_path, point) - # # add one since cdo is counting from 1! - # ix, iy = ix + 1, iy + 1 + # add one since cdo is counting from 1! + ix, iy = ix + 1, iy + 1 - # return [f'-selindexbox,{ix:d},{ix:d},{iy:d},{iy:d}'] + return [f'-selindexbox,{ix:d},{ix:d},{iy:d},{iy:d}'] + def get_region(self): + lat, lon = self.get_point() + return f'lat{lat}lon{lon}' class MaskBBoxOperation(BBoxOperationMixin, CdoOperation): - specifier = 'mask_bbox' + operation = 'mask_bbox' def validate(self): return self.validate_bbox() @@ -92,10 +74,14 @@ def get_args(self): south, north, west, east = self.get_bbox() return [f'-masklonlatbox,{west:f},{east:f},{south:f},{north:f}'] + def get_region(self): + south, north, west, east = self.get_bbox() + return f'lat{south}to{north}lon{west}to{east}' + class MaskCountryOperation(CountryOperationMixin, CdoOperation): - specifier = 'mask_country' + operation = 'mask_country' def validate(self): return self.validate_country() @@ -105,19 +91,31 @@ def get_args(self): mask_path = str(Path(app.config['COUNTRYMASKS_FILE_PATH']).expanduser()) return [f'-selname,m_{country:3.3}', mask_path] + def get_region(self): + return self.get_country().lower() + class MaskLandonlyOperation(CdoOperation): - specifier = 'mask_landonly' + operation = 'mask_landonly' + + def validate(self): + pass def get_args(self): mask_path = str(Path(app.config['LANDSEAMASK_FILE_PATH']).expanduser()) return ['-ifthen', mask_path] + def get_region(self): + return 'landonly' + class FldmeanOperation(CdoOperation): - specifier = 'fldmean' + operation = 'fldmean' + + def validate(self): + pass def get_args(self): return ['-fldmean'] @@ -125,7 +123,13 @@ def get_args(self): class OutputtabOperation(CdoOperation): - specifier = 'outputtab' + operation = 'outputtab' + + def validate(self): + pass def get_args(self): - return ['-s outputtab,date,value,nohead'] + return ['-s', 'outputtab,date,value,nohead'] + + def get_suffix(self): + return '.csv' diff --git a/isimip_files_api/operations/ncks.py b/isimip_files_api/operations/ncks.py new file mode 100644 index 0000000..40bb1f4 --- /dev/null +++ b/isimip_files_api/operations/ncks.py @@ -0,0 +1,26 @@ +from . import BaseOperation, BBoxOperationMixin + + +class NcksOperation(BaseOperation): + + command = 'ncks' + + +class CutOutBBoxOperation(BBoxOperationMixin, NcksOperation): + + operation = 'cutout_bbox' + + def validate(self): + return self.validate_bbox() + + def get_args(self): + south, north, west, east = self.get_bbox() + return [ + '-h', # omit history + '-d', f'lat,{south:f},{north:f}', # longitude + '-d', f'lon,{west:f},{east:f}', # latitude + ] + + def get_region(self): + south, north, west, east = self.get_bbox() + return f'lat{south}to{north}lon{west}to{east}' diff --git a/isimip_files_api/operations/nco.py b/isimip_files_api/operations/nco.py deleted file mode 100644 index 1095ccd..0000000 --- a/isimip_files_api/operations/nco.py +++ /dev/null @@ -1,23 +0,0 @@ - -from . import BaseOperation, BBoxOperationMixin - - -class NcoOperation(BaseOperation): - pass - - # def execute(*args, output_path=None): - # cmd_args = [app.config['NCKS_BIN'], *list(args)] - # cmd = ' '.join(cmd_args) - - # logging.debug(cmd) - # subprocess.check_output(cmd_args) - - # return mask_cmd(cmd) - - -class CutoutBBoxOperation(BBoxOperationMixin, NcoOperation): - - specifier = 'cutout_bbox' - - def validate(self): - return self.validate_bbox() diff --git a/isimip_files_api/tasks.py b/isimip_files_api/tasks.py index 481bd1b..fb762f0 100644 --- a/isimip_files_api/tasks.py +++ b/isimip_files_api/tasks.py @@ -7,6 +7,8 @@ from rq import get_current_job +from .commands import CommandsRegistry +from .operations import OperationRegistry from .utils import get_zip_file_name @@ -18,70 +20,76 @@ def run_task(paths, operations): job.save_meta() # create output paths - output_path = Path(app.config['OUTPUT_PATH']).expanduser() / get_zip_file_name(job.id) - output_path.parent.mkdir(parents=True, exist_ok=True) + zip_path = Path(app.config['OUTPUT_PATH']).expanduser() / get_zip_file_name(job.id) + zip_path.parent.mkdir(parents=True, exist_ok=True) # create a temporary directory - tmp = Path(mkdtemp(prefix=app.config['OUTPUT_PREFIX'])) + tmp_path = Path(mkdtemp(prefix=app.config['OUTPUT_PREFIX'])) # open zipfile - z = ZipFile(output_path, 'w') + z = ZipFile(zip_path, 'w') # open readme - readme_path = tmp / 'README.txt' + readme_path = tmp_path / 'README.txt' readme = readme_path.open('w') readme.write('The following commands were used to create the files in this container:\n\n') - for path in paths: - input_path = Path(app.config['INPUT_PATH']).expanduser() / path - app.logger.warn(input_path) - - for operation in operations: - print(operation) - app.logger.error(operation) - - # if args['task'] in ['select_country', 'select_bbox', 'select_point']: - # tmp_name = get_output_name(path, args, suffix='.csv') - # else: - # tmp_name = get_output_name(path, args) - - # tmp_path = tmp / tmp_name - - # if args['task'] == 'cutout_bbox': - # cmd = cutout_bbox(input_path, tmp_path, args['bbox']) - - # elif args['task'] == 'mask_country': - # cmd = mask_country(input_path, tmp_path, args['country']) - - # elif args['task'] == 'mask_bbox': - # cmd = mask_bbox(input_path, tmp_path, args['bbox']) + commands = [] + commands_registry = CommandsRegistry() + operation_registry = OperationRegistry() + for index, operation_config in enumerate(operations): + operation = operation_registry.get(operation_config) + if not commands or commands[-1].command != operation.command: + commands.append(commands_registry.get(operation.command)) + commands[-1].operations.append(operation) - # elif args['task'] == 'mask_landonly': - # cmd = mask_landonly(input_path, tmp_path) - - # elif args['task'] == 'select_country': - # cmd = select_country(input_path, tmp_path, args['country']) - - # elif args['task'] == 'select_bbox': - # cmd = select_bbox(input_path, tmp_path, args['bbox']) - - # elif args['task'] == 'select_point': - # cmd = select_point(input_path, tmp_path, args['point']) - - # # write cmd into readme file - # readme.write(cmd + '\n') - - # if tmp_path.is_file(): - # z.write(tmp_path, tmp_name) - # else: - # error_path = Path(tmp_path).with_suffix('.txt') - # error_path.write_text('Something went wrong with processing the input file.' - # ' Probably it is not using a global grid.') - # z.write(error_path, error_path.name) - - # # update the current job and store progress - # job.meta['created_files'] += 1 - # job.save_meta() + for path in paths: + input_path = output_path = output_region = None + + for command in commands: + if output_path is None: + input_path = Path(app.config['INPUT_PATH']).expanduser() / path + output_path = tmp_path / input_path.name + else: + input_path = output_path + + region = command.get_region() + if region is not None: + if output_region is None: + if app.config['GLOBAL_TAG'] in output_path.name: + # replace the _global_ specifier + output_name = output_path.name.replace(app.config['GLOBAL_TAG'], f'_{region}_') + else: + output_name = output_path.stem + f'_{region}' + output_path.suffix + else: + region = f'{output_region}+{region}' + output_name = output_path.name.replace(output_region, region) + + output_region = region + output_path = output_path.with_name(output_name) + + suffix = command.get_suffix() + if suffix is not None: + output_path = output_path.with_suffix(suffix) + + # execute the command and obtain the command_string + command_string = command.execute(input_path, output_path) + + # write the command_string into readme file + readme.write(command_string + '\n') + + if output_path.is_file(): + z.write(output_path, output_path.name) + print(output_path, output_path.name) + else: + error_path = Path(tmp_path).with_suffix('.txt') + error_path.write_text('Something went wrong with processing the input file.' + ' Probably it is not using a global grid.') + z.write(error_path, error_path.name) + + # update the current job and store progress + job.meta['created_files'] += 1 + job.save_meta() # close and write readme file readme.close() @@ -91,7 +99,7 @@ def run_task(paths, operations): z.close() # delete temporary directory - shutil.rmtree(tmp) + shutil.rmtree(tmp_path) # return True to indicate success return True diff --git a/isimip_files_api/utils.py b/isimip_files_api/utils.py index ef72745..7c3e681 100644 --- a/isimip_files_api/utils.py +++ b/isimip_files_api/utils.py @@ -6,44 +6,18 @@ from flask import current_app as app -def get_output_name(path, args, suffix=None): - if args.get('bbox'): - south, north, west, east = args['bbox'] - region = f'lat{south}to{north}lon{west}to{east}' - - elif args.get('country'): - region = args['country'].lower() - - elif args.get('point'): - lat, lon = args['point'] - region = f'lat{lat}lon{lon}' - - else: - region = 'landonly' - - path = Path(path) - suffix = suffix if suffix else path.suffix - if app.config['GLOBAL_TAG'] in path.name: - # replace the _global_ specifier - return path.with_suffix(suffix).name.replace(app.config['GLOBAL_TAG'], f'_{region}_') - else: - # append region specifier - return path.stem + f'_{region}' + suffix - - def get_zip_file_name(job_id): return Path(app.config['OUTPUT_PREFIX'] + job_id).with_suffix('.zip').as_posix() -def get_hash(paths, args): +def get_hash(data): m = hashlib.sha1() - m.update(str(paths).encode()) - m.update(str(args).encode()) + m.update(str(data).encode()) return m.hexdigest() -def mask_cmd(cmd): - return re.sub(r'\/\S+\/', '', cmd) +def mask_paths(string): + return re.sub(r'\/\S+\/', '', string) def import_class(string): diff --git a/isimip_files_api/validators.py b/isimip_files_api/validators.py index f58f40b..3605bb4 100644 --- a/isimip_files_api/validators.py +++ b/isimip_files_api/validators.py @@ -16,6 +16,7 @@ def validate_data(data): return errors + def validate_paths(data): errors = defaultdict(list) @@ -56,16 +57,16 @@ def validate_operations(data): else: operation_registry = OperationRegistry() for index, operation_config in enumerate(data['operations']): - if 'specifier' in operation_config: + if 'operation' in operation_config: operation = operation_registry.get(operation_config) if operation is None: - errors['operations'].append('operation "{specifier}" was not found'.format(**operation_config)) + errors['operations'].append('operation "{operation}" was not found'.format(**operation_config)) else: operation_errors = operation.validate() if operation_errors: errors['operations'] += operation_errors else: - errors['operations'].append(f'operation [{index}] does not have a specifier') + errors['operations'].append(f'operation [{index}] does not have a "operation" key') return errors From 5d77965133039a58eb59c69aee0009095648832c Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 14 Feb 2024 10:28:26 +0100 Subject: [PATCH 07/22] Update tests --- .gitignore | 2 + isimip_files_api/operations/__init__.py | 6 +-- isimip_files_api/tests/conftest.py | 7 ++++ isimip_files_api/tests/test_create.py | 27 +------------ .../tests/test_create_cutout_bbox.py | 39 +++++++++++++++++++ isimip_files_api/tests/test_create_fldmean.py | 11 ++++++ .../tests/test_create_mask_bbox.py | 39 +++++++++++++++++++ .../tests/test_create_mask_country.py | 39 +++++++++++++++++++ .../tests/test_create_mask_landonly.py | 11 ++++++ .../tests/test_create_outputtab.py | 11 ++++++ .../tests/test_create_select_bbox.py | 39 +++++++++++++++++++ .../tests/test_create_select_country.py | 39 +++++++++++++++++++ .../tests/test_create_select_point.py | 39 +++++++++++++++++++ 13 files changed, 280 insertions(+), 29 deletions(-) create mode 100644 isimip_files_api/tests/test_create_cutout_bbox.py create mode 100644 isimip_files_api/tests/test_create_fldmean.py create mode 100644 isimip_files_api/tests/test_create_mask_bbox.py create mode 100644 isimip_files_api/tests/test_create_mask_country.py create mode 100644 isimip_files_api/tests/test_create_mask_landonly.py create mode 100644 isimip_files_api/tests/test_create_outputtab.py create mode 100644 isimip_files_api/tests/test_create_select_bbox.py create mode 100644 isimip_files_api/tests/test_create_select_country.py create mode 100644 isimip_files_api/tests/test_create_select_point.py diff --git a/.gitignore b/.gitignore index 3a10966..4c667ef 100644 --- a/.gitignore +++ b/.gitignore @@ -10,11 +10,13 @@ __pycache__/ /build /dist /volumes +/testing/output/ /*.egg-info /env /log /*.log /*.toml !/pyproject.toml +/testing/output/ /.env diff --git a/isimip_files_api/operations/__init__.py b/isimip_files_api/operations/__init__.py index cfe9efb..2c6f622 100644 --- a/isimip_files_api/operations/__init__.py +++ b/isimip_files_api/operations/__init__.py @@ -49,7 +49,7 @@ def validate_bbox(self): try: self.get_bbox() except (ValueError, IndexError): - return ['bbox is not of the form [%f, %f, %f, %f]'] + return [f'bbox is not of the form [%f, %f, %f, %f] for operation "{self.operation}"'] else: return [f'bbox is missing for operation "{self.operation}"'] @@ -67,7 +67,7 @@ def validate_point(self): try: self.get_point() except (ValueError, IndexError): - return ['bbox is not of the form [%f, %f]'] + return [f'point is not of the form [%f, %f] for operation "{self.operation}"'] else: return [f'point is missing for operation "{self.operation}"'] @@ -80,6 +80,6 @@ def get_country(self): def validate_country(self): if 'country' in self.config: if self.get_country() not in app.config['COUNTRYMASKS_COUNTRIES']: - return ['country not in the list of supported countries (e.g. DEU)'] + return [f'country not in the list of supported countries (e.g. deu) for operation "{self.operation}"'] else: return [f'country is missing for operation "{self.operation}"'] diff --git a/isimip_files_api/tests/conftest.py b/isimip_files_api/tests/conftest.py index 9d60574..12d97b9 100644 --- a/isimip_files_api/tests/conftest.py +++ b/isimip_files_api/tests/conftest.py @@ -1,5 +1,7 @@ import pytest +from redis import Redis + from ..app import create_app @@ -18,3 +20,8 @@ def app(): @pytest.fixture() def client(app): return app.test_client() + + +@pytest.fixture() +def redis(app): + return Redis.from_url(app.config['REDIS_URL']) diff --git a/isimip_files_api/tests/test_create.py b/isimip_files_api/tests/test_create.py index 31c0459..0d7823d 100644 --- a/isimip_files_api/tests/test_create.py +++ b/isimip_files_api/tests/test_create.py @@ -92,7 +92,7 @@ def test_paths_not_found(client): def test_operations_not_found(client): response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { - 'specifier': 'invalid' + 'operation': 'invalid' } ]}) assert response.status_code == 400 @@ -100,28 +100,3 @@ def test_operations_not_found(client): assert response.json.get('errors') == { 'operations': ['operation "invalid" was not found'] } - - -def test_select_bbox(client): - response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ - { - 'specifier': 'select_bbox', - 'bbox': [-23.43651, 23.43651, -180, 180] - } - ]}) - assert response.status_code == 200 - assert response.json.get('status') == 'ok' - assert response.json.get('errors') is None - - -def test_select_bbox_missing_bbox(client): - response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ - { - 'specifier': 'select_bbox' - } - ]}) - assert response.status_code == 400 - assert response.json.get('status') == 'error' - assert response.json.get('errors') == { - 'operations': ['bbox is missing for operation "select_bbox"'] - } diff --git a/isimip_files_api/tests/test_create_cutout_bbox.py b/isimip_files_api/tests/test_create_cutout_bbox.py new file mode 100644 index 0000000..196e963 --- /dev/null +++ b/isimip_files_api/tests/test_create_cutout_bbox.py @@ -0,0 +1,39 @@ +def test_success(client, mocker): + mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'cutout_bbox', + 'bbox': [-23.43651, 23.43651, -180, 180] + } + ]}) + + assert response.status_code == 201 + assert response.json.get('errors') is None + + +def test_missing_bbox(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'cutout_bbox' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['bbox is missing for operation "cutout_bbox"'] + } + + +def test_wrong_bbox(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'cutout_bbox', + 'bbox': [-23.43651, 23.43651, -180, 'wrong'] + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['bbox is not of the form [%f, %f, %f, %f] for operation "cutout_bbox"'] + } diff --git a/isimip_files_api/tests/test_create_fldmean.py b/isimip_files_api/tests/test_create_fldmean.py new file mode 100644 index 0000000..23dd1f3 --- /dev/null +++ b/isimip_files_api/tests/test_create_fldmean.py @@ -0,0 +1,11 @@ +def test_success(client, mocker): + mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'fldmean' + } + ]}) + + assert response.status_code == 201 + assert response.json.get('errors') is None diff --git a/isimip_files_api/tests/test_create_mask_bbox.py b/isimip_files_api/tests/test_create_mask_bbox.py new file mode 100644 index 0000000..bf5fcf0 --- /dev/null +++ b/isimip_files_api/tests/test_create_mask_bbox.py @@ -0,0 +1,39 @@ +def test_success(client, mocker): + mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_bbox', + 'bbox': [-23.43651, 23.43651, -180, 180] + } + ]}) + + assert response.status_code == 201 + assert response.json.get('errors') is None + + +def test_missing_bbox(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_bbox' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['bbox is missing for operation "mask_bbox"'] + } + + +def test_wrong_bbox(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_bbox', + 'bbox': [-23.43651, 23.43651, -180, 'wrong'] + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['bbox is not of the form [%f, %f, %f, %f] for operation "mask_bbox"'] + } diff --git a/isimip_files_api/tests/test_create_mask_country.py b/isimip_files_api/tests/test_create_mask_country.py new file mode 100644 index 0000000..3914275 --- /dev/null +++ b/isimip_files_api/tests/test_create_mask_country.py @@ -0,0 +1,39 @@ +def test_success(client, mocker): + mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_country', + 'country': 'deu' + } + ]}) + + assert response.status_code == 201 + assert response.json.get('errors') is None + + +def test_missing_country(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_country' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['country is missing for operation "mask_country"'] + } + + +def test_wrong_country(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_country', + 'country': 'wrong' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['country not in the list of supported countries (e.g. deu) for operation "mask_country"'] + } diff --git a/isimip_files_api/tests/test_create_mask_landonly.py b/isimip_files_api/tests/test_create_mask_landonly.py new file mode 100644 index 0000000..e3e78b6 --- /dev/null +++ b/isimip_files_api/tests/test_create_mask_landonly.py @@ -0,0 +1,11 @@ +def test_success(client, mocker): + mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_landonly' + } + ]}) + + assert response.status_code == 201 + assert response.json.get('errors') is None diff --git a/isimip_files_api/tests/test_create_outputtab.py b/isimip_files_api/tests/test_create_outputtab.py new file mode 100644 index 0000000..e5253ec --- /dev/null +++ b/isimip_files_api/tests/test_create_outputtab.py @@ -0,0 +1,11 @@ +def test_success(client, mocker): + mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'outputtab' + } + ]}) + + assert response.status_code == 201 + assert response.json.get('errors') is None diff --git a/isimip_files_api/tests/test_create_select_bbox.py b/isimip_files_api/tests/test_create_select_bbox.py new file mode 100644 index 0000000..541f092 --- /dev/null +++ b/isimip_files_api/tests/test_create_select_bbox.py @@ -0,0 +1,39 @@ +def test_success(client, mocker): + mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'select_bbox', + 'bbox': [-23.43651, 23.43651, -180, 180] + } + ]}) + + assert response.status_code == 201 + assert response.json.get('errors') is None + + +def test_missing_bbox(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'select_bbox' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['bbox is missing for operation "select_bbox"'] + } + + +def test_wrong_bbox(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'select_bbox', + 'bbox': [-23.43651, 23.43651, -180, 'wrong'] + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['bbox is not of the form [%f, %f, %f, %f] for operation "select_bbox"'] + } diff --git a/isimip_files_api/tests/test_create_select_country.py b/isimip_files_api/tests/test_create_select_country.py new file mode 100644 index 0000000..0a78db2 --- /dev/null +++ b/isimip_files_api/tests/test_create_select_country.py @@ -0,0 +1,39 @@ +def test_success(client, mocker): + mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'select_country', + 'country': 'deu' + } + ]}) + + assert response.status_code == 201 + assert response.json.get('errors') is None + + +def test_missing_country(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'select_country' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['country is missing for operation "select_country"'] + } + + +def test_wrong_country(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'select_country', + 'country': 'wrong' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['country not in the list of supported countries (e.g. deu) for operation "select_country"'] + } diff --git a/isimip_files_api/tests/test_create_select_point.py b/isimip_files_api/tests/test_create_select_point.py new file mode 100644 index 0000000..15fbeb6 --- /dev/null +++ b/isimip_files_api/tests/test_create_select_point.py @@ -0,0 +1,39 @@ +def test_success(client, mocker): + mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'select_point', + 'point': [52.380551, 13.064332] + } + ]}) + + assert response.status_code == 201 + assert response.json.get('errors') is None + + +def test_missing_bbox(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'select_point' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['point is missing for operation "select_point"'] + } + + +def test_wrong_point(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'select_point', + 'point': [52.380551, 'wrong'] + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['point is not of the form [%f, %f] for operation "select_point"'] + } From 834b81294cd14e6d03e7d8a1efad0f947196f6a8 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 14 Feb 2024 12:38:42 +0100 Subject: [PATCH 08/22] Add tests for detail and delete routes --- isimip_files_api/tests/test_delete.py | 21 +++++++++++++++++++++ isimip_files_api/tests/test_detail.py | 21 +++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 isimip_files_api/tests/test_delete.py create mode 100644 isimip_files_api/tests/test_detail.py diff --git a/isimip_files_api/tests/test_delete.py b/isimip_files_api/tests/test_delete.py new file mode 100644 index 0000000..f598ece --- /dev/null +++ b/isimip_files_api/tests/test_delete.py @@ -0,0 +1,21 @@ +def mocked_delete_job(job_id): + if job_id == 'test': + return {}, 204 + else: + return {}, 404 + + +def test_success(client, mocker): + mocker.patch('isimip_files_api.app.fetch_job', mocked_delete_job) + + response = client.get('/test') + + assert response.status_code == 204 + + +def test_wrong_id(client, mocker): + mocker.patch('isimip_files_api.app.fetch_job', mocked_delete_job) + + response = client.get('/wrong') + + assert response.status_code == 404 diff --git a/isimip_files_api/tests/test_detail.py b/isimip_files_api/tests/test_detail.py new file mode 100644 index 0000000..d8b53de --- /dev/null +++ b/isimip_files_api/tests/test_detail.py @@ -0,0 +1,21 @@ +def mocked_fetch_job(job_id): + if job_id == 'test': + return {}, 200 + else: + return {}, 404 + + +def test_success(client, mocker): + mocker.patch('isimip_files_api.app.fetch_job', mocked_fetch_job) + + response = client.get('/test') + + assert response.status_code == 200 + + +def test_wrong_id(client, mocker): + mocker.patch('isimip_files_api.app.fetch_job', mocked_fetch_job) + + response = client.get('/wrong') + + assert response.status_code == 404 From a0fb74cd63e4556b32fa0d8ecd81cebb2b5c17db Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 14 Feb 2024 12:51:11 +0100 Subject: [PATCH 09/22] Add commands and operations to root route --- isimip_files_api/app.py | 6 +++++- isimip_files_api/commands/__init__.py | 2 +- isimip_files_api/config.py | 2 +- isimip_files_api/tasks.py | 6 +++--- isimip_files_api/tests/test_root.py | 22 ++++++++++++++++++++++ 5 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 isimip_files_api/tests/test_root.py diff --git a/isimip_files_api/app.py b/isimip_files_api/app.py index 6510417..a1843be 100644 --- a/isimip_files_api/app.py +++ b/isimip_files_api/app.py @@ -3,8 +3,10 @@ import tomli from flask_cors import CORS as FlaskCORS +from .commands import CommandRegistry from .jobs import count_jobs, create_job, delete_job, fetch_job from .logging import configure_logging +from .operations import OperationRegistry from .responses import get_errors_response from .validators import validate_data, validate_operations, validate_paths @@ -28,7 +30,9 @@ def create_app(): def index(): return { 'status': 'ok', - 'jobs': count_jobs() + 'jobs': count_jobs(), + 'commands': list(CommandRegistry().commands.keys()), + 'operations': list(OperationRegistry().operations.keys()), }, 200 @app.route('/', methods=['POST']) diff --git a/isimip_files_api/commands/__init__.py b/isimip_files_api/commands/__init__.py index 61388ba..8dd74da 100644 --- a/isimip_files_api/commands/__init__.py +++ b/isimip_files_api/commands/__init__.py @@ -3,7 +3,7 @@ from ..utils import import_class -class CommandsRegistry: +class CommandRegistry: def __init__(self): self.commands = {} diff --git a/isimip_files_api/config.py b/isimip_files_api/config.py index 795a6b6..99992f8 100644 --- a/isimip_files_api/config.py +++ b/isimip_files_api/config.py @@ -15,7 +15,7 @@ LOG_LEVEL = 'ERROR' LOG_PATH = None -# the base url the api is running on, in production this will be something like https://api.example.com/api/v1 +# the base url the api is running on, in production this will be something like https://api.example.com/api/v2 BASE_URL = 'http://127.0.0.1:5000' # the output url the download packages will be available on diff --git a/isimip_files_api/tasks.py b/isimip_files_api/tasks.py index fb762f0..08db2db 100644 --- a/isimip_files_api/tasks.py +++ b/isimip_files_api/tasks.py @@ -7,7 +7,7 @@ from rq import get_current_job -from .commands import CommandsRegistry +from .commands import CommandRegistry from .operations import OperationRegistry from .utils import get_zip_file_name @@ -35,12 +35,12 @@ def run_task(paths, operations): readme.write('The following commands were used to create the files in this container:\n\n') commands = [] - commands_registry = CommandsRegistry() + command_registry = CommandRegistry() operation_registry = OperationRegistry() for index, operation_config in enumerate(operations): operation = operation_registry.get(operation_config) if not commands or commands[-1].command != operation.command: - commands.append(commands_registry.get(operation.command)) + commands.append(command_registry.get(operation.command)) commands[-1].operations.append(operation) for path in paths: diff --git a/isimip_files_api/tests/test_root.py b/isimip_files_api/tests/test_root.py new file mode 100644 index 0000000..5ac6d8b --- /dev/null +++ b/isimip_files_api/tests/test_root.py @@ -0,0 +1,22 @@ +def test_success(client, mocker): + mocker.patch('isimip_files_api.app.count_jobs', mocker.Mock(return_value={})) + + response = client.get('/') + + assert response.status_code == 200 + assert response.json.get('status') == 'ok' + assert response.json.get('commands') == [ + 'cdo', + 'ncks' + ] + assert response.json.get('operations') == [ + 'select_bbox', + 'select_country', + 'select_point', + 'mask_bbox', + 'mask_country', + 'mask_landonly', + 'fldmean', + 'outputtab', + 'cutout_bbox' + ] From 8e6141b8ff9982f37bd9ff51d2d04762a9b9c806 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 14 Feb 2024 14:32:53 +0100 Subject: [PATCH 10/22] Add MAX_COMMANDS and MAX_OPERATIONS to config --- isimip_files_api/config.py | 18 +++++-------- isimip_files_api/operations/__init__.py | 13 +++++++++ isimip_files_api/tasks.py | 13 +++------ isimip_files_api/tests/conftest.py | 5 +++- isimip_files_api/tests/test_create.py | 36 ++++++++++++++++++++++++- isimip_files_api/validators.py | 7 +++++ 6 files changed, 69 insertions(+), 23 deletions(-) diff --git a/isimip_files_api/config.py b/isimip_files_api/config.py index 99992f8..f6162f8 100644 --- a/isimip_files_api/config.py +++ b/isimip_files_api/config.py @@ -39,6 +39,9 @@ 'isimip_files_api.commands.ncks.NcksCommand' ] +# maximum number of commands which can be performed +MAX_COMMANDS = 2 + # list of operations which can be performed OPERATIONS = [ 'isimip_files_api.operations.cdo.SelectBBoxOperation', @@ -52,18 +55,11 @@ 'isimip_files_api.operations.ncks.CutOutBBoxOperation' ] -# list of tasks which can be performed -TASKS = [ - 'cutout_bbox', - 'mask_bbox', - 'mask_country', - 'mask_landonly', - 'select_bbox', - 'select_country', - 'select_point' -] +# maximum number of operations which can be performed +MAX_OPERATIONS = 16 -# the tag which designates global files +# the tag which designates global files, this tag will be replaced by the region +# specifier of the operations, if set to None, the region will be appended GLOBAL_TAG = '_global_' # list of the allowed resolution tags per task diff --git a/isimip_files_api/operations/__init__.py b/isimip_files_api/operations/__init__.py index 2c6f622..f9035de 100644 --- a/isimip_files_api/operations/__init__.py +++ b/isimip_files_api/operations/__init__.py @@ -1,5 +1,6 @@ from flask import current_app as app +from ..commands import CommandRegistry from ..utils import import_class @@ -15,6 +16,18 @@ def get(self, config): if 'operation' in config and config['operation'] in self.operations: return self.operations[config['operation']](config) + def get_command_list(self, operations): + commands = [] + + command_registry = CommandRegistry() + for index, operation_config in enumerate(operations): + operation = self.get(operation_config) + if not commands or commands[-1].command != operation.command: + commands.append(command_registry.get(operation.command)) + commands[-1].operations.append(operation) + + return commands + class BaseOperation: diff --git a/isimip_files_api/tasks.py b/isimip_files_api/tasks.py index 08db2db..b9d2376 100644 --- a/isimip_files_api/tasks.py +++ b/isimip_files_api/tasks.py @@ -7,7 +7,6 @@ from rq import get_current_job -from .commands import CommandRegistry from .operations import OperationRegistry from .utils import get_zip_file_name @@ -34,19 +33,13 @@ def run_task(paths, operations): readme = readme_path.open('w') readme.write('The following commands were used to create the files in this container:\n\n') - commands = [] - command_registry = CommandRegistry() - operation_registry = OperationRegistry() - for index, operation_config in enumerate(operations): - operation = operation_registry.get(operation_config) - if not commands or commands[-1].command != operation.command: - commands.append(command_registry.get(operation.command)) - commands[-1].operations.append(operation) + # construct command list from the operations + command_list = OperationRegistry().get_command_list(operations) for path in paths: input_path = output_path = output_region = None - for command in commands: + for command in command_list: if output_path is None: input_path = Path(app.config['INPUT_PATH']).expanduser() / path output_path = tmp_path / input_path.name diff --git a/isimip_files_api/tests/conftest.py b/isimip_files_api/tests/conftest.py index 12d97b9..9af8c47 100644 --- a/isimip_files_api/tests/conftest.py +++ b/isimip_files_api/tests/conftest.py @@ -12,11 +12,14 @@ def app(): 'TESTING': True, 'INPUT_PATH': 'testing/input', 'OUTPUT_PATH': 'testing/output', - 'MAX_FILES': 8 + 'MAX_FILES': 8, + 'MAX_COMMANDS': 2, + 'MAX_OPERATIONS': 8 }) yield app + @pytest.fixture() def client(app): return app.test_client() diff --git a/isimip_files_api/tests/test_create.py b/isimip_files_api/tests/test_create.py index 0d7823d..3566e41 100644 --- a/isimip_files_api/tests/test_create.py +++ b/isimip_files_api/tests/test_create.py @@ -32,7 +32,7 @@ def test_malformatted(client): } -def test_paths_to_many(client): +def test_paths_to_many_files(client): response = client.post('/', json={'paths': [ 'test1.nc', 'test2.nc', @@ -100,3 +100,37 @@ def test_operations_not_found(client): assert response.json.get('errors') == { 'operations': ['operation "invalid" was not found'] } + + +def test_operations_to_many_commands(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'cutout_bbox', + 'bbox': [-10, 10, -10, 10] + }, + { + 'operation': 'mask_landonly' + }, + { + 'operation': 'cutout_bbox', + 'bbox': [-23.43651, 23.43651, -180, 180] + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['Operations result in to many commands (max: 2).'] + } + + +def test_operations_to_many_operations(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_landonly' + } for i in range(10) + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['To many operations provided (max: 8).'] + } diff --git a/isimip_files_api/validators.py b/isimip_files_api/validators.py index 3605bb4..6a5c6c9 100644 --- a/isimip_files_api/validators.py +++ b/isimip_files_api/validators.py @@ -54,6 +54,8 @@ def validate_operations(data): errors['operations'].append('This field is required.') elif not isinstance(data['operations'], list): errors['operations'].append('Provided json data is malformatted.') + elif len(data['operations']) > app.config['MAX_OPERATIONS']: + errors['operations'].append('To many operations provided (max: {MAX_OPERATIONS}).'.format(**app.config)) else: operation_registry = OperationRegistry() for index, operation_config in enumerate(data['operations']): @@ -68,6 +70,11 @@ def validate_operations(data): else: errors['operations'].append(f'operation [{index}] does not have a "operation" key') + if not errors and len(operation_registry.get_command_list(data['operations'])) > app.config['MAX_COMMANDS']: + errors['operations'].append('Operations result in to many commands (max: {MAX_COMMANDS}).'.format( + **app.config + )) + return errors From fa0d946d0dddc318fcb63b1175f3e29718dee751 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 14 Feb 2024 14:43:44 +0100 Subject: [PATCH 11/22] Remove resolution check code --- isimip_files_api/config.py | 24 ------------------------ isimip_files_api/validators.py | 10 ---------- 2 files changed, 34 deletions(-) diff --git a/isimip_files_api/config.py b/isimip_files_api/config.py index f6162f8..5713689 100644 --- a/isimip_files_api/config.py +++ b/isimip_files_api/config.py @@ -62,30 +62,6 @@ # specifier of the operations, if set to None, the region will be appended GLOBAL_TAG = '_global_' -# list of the allowed resolution tags per task -RESOLUTION_TAGS = { - 'cutout_bbox': ['30arcsec', '90arcsec', '300arcsec', '1800arcsec', - '15arcmin', '30arcmin', '60arcmin', '120arcmin'], - 'mask_bbox': ['15arcmin', '30arcmin', '60arcmin', '120arcmin'], - 'mask_country': ['30arcmin'], - 'mask_landonly': ['30arcmin'], - 'select_bbox': ['15arcmin', '30arcmin', '60arcmin', '120arcmin'], - 'select_country': ['30arcmin'], - 'select_point': ['15arcmin', '30arcmin', '60arcmin', '120arcmin'] -} - -# list of the concrete number of gridpoints for each resolution tag -RESOLUTIONS = { - '30arcsec': (20880, 43200), - '90arcsec': (6960, 14400), - '300arcsec': (2088, 4320), - '1800arcsec': (348, 720), - '15arcmin': (720, 1440), - '30arcmin': (360, 720), - '60arcmin': (180, 360), - '120arcmin': (90, 180) -} - # the cdo binary on the system, e.g. /usr/bin/cdo CDO_BIN = 'cdo' diff --git a/isimip_files_api/validators.py b/isimip_files_api/validators.py index 6a5c6c9..acef4bd 100644 --- a/isimip_files_api/validators.py +++ b/isimip_files_api/validators.py @@ -76,13 +76,3 @@ def validate_operations(data): )) return errors - - -# # def validate_datasets(paths, args, errors): -# # for path in paths: -# # input_path = Path(app.config['INPUT_PATH']).expanduser() -# # absolute_path = input_path / path -# # with open_dataset(absolute_path) as ds: -# # resolutions = app.config['RESOLUTION_TAGS'].get(args.get('task')) -# # if not any(check_resolution(ds, resolution) for resolution in resolutions): -# # errors['paths'].append(f'{path} is not using the correct grid: {resolutions}.') From bbe68a65c2054861cb8075dfc99b94ea4fe4526d Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 14 Feb 2024 15:52:55 +0100 Subject: [PATCH 12/22] Fix pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index bdd4a0f..e0335ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ dev = [ "pre-commit", "pytest", "pytest-cov", + "pytest-mock", "ruff", "twine" ] From dc4d9663067bbed007f0c14c73cb23e931f2e01d Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Tue, 20 Feb 2024 14:03:34 +0100 Subject: [PATCH 13/22] Add Makefile, remove run.sh, and refactor setup --- run.sh => Makefile | 22 +++++----------------- isimip_files_api/commands/cdo.py | 2 +- isimip_files_api/config.py | 4 ++-- isimip_files_api/worker.py | 4 ++++ pyproject.toml | 3 ++- 5 files changed, 14 insertions(+), 21 deletions(-) rename run.sh => Makefile (56%) mode change 100755 => 100644 diff --git a/run.sh b/Makefile old mode 100755 new mode 100644 similarity index 56% rename from run.sh rename to Makefile index a971ffd..d067064 --- a/run.sh +++ b/Makefile @@ -1,25 +1,13 @@ -#!/bin/bash - # set environment variables for `flask run` and `rq worker` for development # in production, the variables should be set in systemd or docker files export FLASK_APP=isimip_files_api.app export FLASK_ENV=development -export FLASK_DEBUG=true -export FLASK_CONFIG=../config.toml export RQ_WORKER_CLASS=isimip_files_api.worker.Worker -case $1 in - - server) - flask run - ;; - - worker) - rq worker - ;; +server: + flask run - *) - echo "usage: ../run.sh server|worker" - ;; +worker: + rq worker -esac +.PHONY: server worker diff --git a/isimip_files_api/commands/cdo.py b/isimip_files_api/commands/cdo.py index 7cb775f..7d1895a 100644 --- a/isimip_files_api/commands/cdo.py +++ b/isimip_files_api/commands/cdo.py @@ -15,7 +15,7 @@ def execute(self, input_path, output_path): write_csv = (self.get_suffix() == '.csv') # use the cdo bin from the config, NETCDF4_CLASSIC and compression - cmd_args = [app.config['CDO_BIN'], '-f', 'nc4c', '-z', 'zip_5'] + cmd_args = [app.config['CDO_BIN'], '-f', 'nc4c', '-z', 'zip_5', '-L'] # collect args from operations for operation in self.operations: diff --git a/isimip_files_api/config.py b/isimip_files_api/config.py index 5713689..26a43ba 100644 --- a/isimip_files_api/config.py +++ b/isimip_files_api/config.py @@ -22,10 +22,10 @@ OUTPUT_URL = 'http://127.0.0.1/api/output/' # input path to the NetCDF files to process -INPUT_PATH = '..' +INPUT_PATH = '.' # output path to store the created download packages, this directory should be exposed on OUTPUT_URL -OUTPUT_PATH = '..' +OUTPUT_PATH = '.' # output prefix to be prepended to the job ID to create the filename for the download package OUTPUT_PREFIX = 'download-' diff --git a/isimip_files_api/worker.py b/isimip_files_api/worker.py index 74d313c..7d7d3d3 100644 --- a/isimip_files_api/worker.py +++ b/isimip_files_api/worker.py @@ -1,11 +1,15 @@ from rq import Worker as RQWorker +from dotenv import load_dotenv + from .app import create_app class Worker(RQWorker): def work(self, *args, **kwargs): + load_dotenv() + app = create_app() with app.app_context(): diff --git a/pyproject.toml b/pyproject.toml index e0335ca..06447e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,8 +37,9 @@ dependencies = [ "gunicorn~=21.2.0", "netCDF4~=1.6.5", "numpy~=1.26.3", + "python-dotenv~=1.0.0", "tomli", - "rq~=1.15.1" + "rq~=1.15.1", ] dynamic = ["version"] From 150e42c9a6c252e22eda29be106b74355702fdb6 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Tue, 20 Feb 2024 16:26:43 +0100 Subject: [PATCH 14/22] Add MaskShapeOperation and ShapeOperationMixin --- .pre-commit-config.yaml | 3 +- isimip_files_api/config.py | 1 + isimip_files_api/operations/__init__.py | 38 +++++++ isimip_files_api/operations/cdo.py | 13 ++- .../tests/test_create_mask_shape.py | 102 +++++++++++++++++ testing/shapes/pm.dbf | Bin 0 -> 871 bytes testing/shapes/pm.json | 105 ++++++++++++++++++ testing/shapes/pm.prj | 1 + testing/shapes/pm.shp | Bin 0 -> 444 bytes testing/shapes/pm.shx | Bin 0 -> 108 bytes testing/shapes/pm.zip | Bin 0 -> 1351 bytes testing/shapes/wrong.txt | 0 testing/shapes/wrong.zip | Bin 0 -> 196 bytes 13 files changed, 260 insertions(+), 3 deletions(-) create mode 100644 isimip_files_api/tests/test_create_mask_shape.py create mode 100644 testing/shapes/pm.dbf create mode 100644 testing/shapes/pm.json create mode 100644 testing/shapes/pm.prj create mode 100644 testing/shapes/pm.shp create mode 100644 testing/shapes/pm.shx create mode 100644 testing/shapes/pm.zip create mode 100644 testing/shapes/wrong.txt create mode 100644 testing/shapes/wrong.zip diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5ed7ddf..4c8271f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,9 +7,8 @@ repos: hooks: - id: check-ast - id: end-of-file-fixer - exclude: \.json$ + exclude: \.json$|.prj$ - id: trailing-whitespace - exclude: fldmean\.csv$ - id: debug-statements - repo: https://github.com/charliermarsh/ruff-pre-commit rev: v0.0.284 diff --git a/isimip_files_api/config.py b/isimip_files_api/config.py index 26a43ba..e380e44 100644 --- a/isimip_files_api/config.py +++ b/isimip_files_api/config.py @@ -49,6 +49,7 @@ 'isimip_files_api.operations.cdo.SelectPointOperation', 'isimip_files_api.operations.cdo.MaskBBoxOperation', 'isimip_files_api.operations.cdo.MaskCountryOperation', + 'isimip_files_api.operations.cdo.MaskShapeOperation', 'isimip_files_api.operations.cdo.MaskLandonlyOperation', 'isimip_files_api.operations.cdo.FldmeanOperation', 'isimip_files_api.operations.cdo.OutputtabOperation', diff --git a/isimip_files_api/operations/__init__.py b/isimip_files_api/operations/__init__.py index f9035de..1d1715c 100644 --- a/isimip_files_api/operations/__init__.py +++ b/isimip_files_api/operations/__init__.py @@ -1,3 +1,10 @@ +import base64 +import binascii +import io +import json +import zipfile +from pathlib import Path + from flask import current_app as app from ..commands import CommandRegistry @@ -96,3 +103,34 @@ def validate_country(self): return [f'country not in the list of supported countries (e.g. deu) for operation "{self.operation}"'] else: return [f'country is missing for operation "{self.operation}"'] + + +class ShapeOperationMixin: + + def validate_shape(self): + if 'shapefile' in self.config and 'geojson' in self.config: + return [f'shapefile and geojson and mutually exclusive for operation "{self.operation}"'] + + elif 'shapefile' in self.config: + try: + shapefile_stream = io.BytesIO(base64.b64decode(self.config['shapefile'])) + + try: + with zipfile.ZipFile(shapefile_stream) as z: + for file_name in z.namelist(): + if Path(file_name).suffix not in ['.dbf', '.prj', '.shp', '.shx']: + return [f'shapefile is not a valid shape file for operation "{self.operation}"'] + except zipfile.BadZipFile: + return [f'shapefile is a valid zip file for operation "{self.operation}"'] + + except binascii.Error: + return [f'shapefile is not a valid base64 stream for operation "{self.operation}"'] + + elif 'geojson' in self.config: + try: + json.loads(self.config['geojson']) + except json.decoder.JSONDecodeError: + return [f'geojson is not a valid json for operation "{self.operation}"'] + + else: + return [f'shapefile or geojson is missing for operation "{self.operation}"'] diff --git a/isimip_files_api/operations/cdo.py b/isimip_files_api/operations/cdo.py index 13d7bc3..d8c5d74 100644 --- a/isimip_files_api/operations/cdo.py +++ b/isimip_files_api/operations/cdo.py @@ -3,7 +3,7 @@ from flask import current_app as app from ..netcdf import get_index -from . import BaseOperation, BBoxOperationMixin, CountryOperationMixin, PointOperationMixin +from . import BaseOperation, BBoxOperationMixin, CountryOperationMixin, PointOperationMixin, ShapeOperationMixin class CdoOperation(BaseOperation): @@ -95,6 +95,17 @@ def get_region(self): return self.get_country().lower() +class MaskShapeOperation(ShapeOperationMixin, CdoOperation): + + operation = 'mask_shape' + + def validate(self): + return self.validate_shape() + + def get_args(self): + return [] + + class MaskLandonlyOperation(CdoOperation): operation = 'mask_landonly' diff --git a/isimip_files_api/tests/test_create_mask_shape.py b/isimip_files_api/tests/test_create_mask_shape.py new file mode 100644 index 0000000..b3ce4cd --- /dev/null +++ b/isimip_files_api/tests/test_create_mask_shape.py @@ -0,0 +1,102 @@ +import base64 +from pathlib import Path + +shapefile_path = Path('testing/shapes/pm.zip') +wrong_path = Path('testing/shapes/wrong.zip') +geojson_path = Path('testing/shapes/pm.json') + +def test_shapefile(client, mocker): + mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_shape', + 'shapefile': base64.b64encode(shapefile_path.read_bytes()).decode() + } + ]}) + + assert response.status_code == 201 + assert response.json.get('errors') is None + + +def test_geojson(client, mocker): + mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_shape', + 'geojson': geojson_path.read_text() + } + ]}) + + assert response.status_code == 201 + assert response.json.get('errors') is None + + +def test_missing_file(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_shape' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['shapefile or geojson is missing for operation "mask_shape"'] + } + + +def test_invalid_shapefile(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_shape', + 'shapefile': 'wrong' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['shapefile is not a valid base64 stream for operation "mask_shape"'] + } + + +def test_invalid_shapefile2(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_shape', + 'shapefile': base64.b64encode(b'this is not a valid shapefile').decode() + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['shapefile is a valid zip file for operation "mask_shape"'] + } + + +def test_invalid_shapefile3(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_shape', + 'shapefile': base64.b64encode(wrong_path.read_bytes()).decode() + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['shapefile is not a valid shape file for operation "mask_shape"'] + } + + +def test_invalid_geojson(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_shape', + 'geojson': 'wrong' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['geojson is not a valid json for operation "mask_shape"'] + } diff --git a/testing/shapes/pm.dbf b/testing/shapes/pm.dbf new file mode 100644 index 0000000000000000000000000000000000000000..ff5da76c9f187c3b685b13ed8aa34f1e712c9182 GIT binary patch literal 871 zcmZRsVG?0vU|>jOWMu+Us34qyAu|Oojxiv%h07KK*VGzk_^))qT7QF0Lj9t_sux)HV<1GGiBNx)Nw=Kz>PaN@A|AZ)QnJ fYEEuqQ8vsf@@Q(C2Xlo9j9~<00E2?MaV7--G?r?~ literal 0 HcmV?d00001 diff --git a/testing/shapes/pm.json b/testing/shapes/pm.json new file mode 100644 index 0000000..ce9943b --- /dev/null +++ b/testing/shapes/pm.json @@ -0,0 +1,105 @@ +{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 12.89353, + 52.46076 + ], + [ + 13.01813, + 52.35544 + ], + [ + 13.1659, + 52.39428 + ], + [ + 13.31193, + 52.39919 + ], + [ + 12.98098, + 52.1479 + ], + [ + 12.96593, + 52.04049 + ], + [ + 12.76978, + 51.97927 + ], + [ + 12.37612, + 52.04512 + ], + [ + 12.27672, + 52.10402 + ], + [ + 12.23625, + 52.17559 + ], + [ + 12.31718, + 52.4541 + ], + [ + 12.36118, + 52.44277 + ], + [ + 12.45535, + 52.34313 + ], + [ + 12.55783, + 52.39808 + ], + [ + 12.54333, + 52.42865 + ], + [ + 12.42503, + 52.45544 + ], + [ + 12.46984, + 52.54388 + ], + [ + 12.89353, + 52.46076 + ] + ] + ] + }, + "properties": { + "NUTS_ID": "DE40E", + "LEVL_CODE": 3, + "CNTR_CODE": "DE", + "NAME_LATN": "Potsdam-Mittelmark", + "NUTS_NAME": "Potsdam-Mittelmark", + "MOUNT_TYPE": 4, + "URBN_TYPE": 2, + "COAST_TYPE": 3, + "FID": "DE40E" + }, + "id": "DE40E" + } + ], + "crs": { + "type": "name", + "properties": { + "name": "urn:ogc:def:crs:EPSG::4326" + } + } +} \ No newline at end of file diff --git a/testing/shapes/pm.prj b/testing/shapes/pm.prj new file mode 100644 index 0000000..f45cbad --- /dev/null +++ b/testing/shapes/pm.prj @@ -0,0 +1 @@ +GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]] \ No newline at end of file diff --git a/testing/shapes/pm.shp b/testing/shapes/pm.shp new file mode 100644 index 0000000000000000000000000000000000000000..2dac9cdf610431a8c325afe0a04ef872abef5d6e GIT binary patch literal 444 zcmZQzQ0HR64(`2RW?*0i%4x>*ANpFM;UM)(eMiJ!Plqs%pPpOiYdK_8%H5gk>g9l4 zJBlbHkhg*uvp@=kpjs3E&OcmpQqzHRIjjCJD=!BP^;@AHoLUZ1H!kzl>v}neTyVMZ zAw9Mfa< za=2u*YL-HmCfr{Z`@Wr-bU?#lfziL(V?g;xpB%e&y&4V+1sK}{c)c88;V|RC+TE36 zUJidA@6AeGq~Xxj_vDj2(0#5S_D_AYL&L%F^0)PCEW8|MmCsqE{8GaqMOk3|Yi%!w zNhN2W%$C)3=$s+s`32}+!TX(&`eK?6;@dAW+%xlXa9M2KG54m1gZk11x4nVp^==8d N*Y`mK9?t0D0RSlKoWB47 literal 0 HcmV?d00001 diff --git a/testing/shapes/pm.shx b/testing/shapes/pm.shx new file mode 100644 index 0000000000000000000000000000000000000000..e094fdcee1cdd4d0a27a2c8b556bb4c76ab3fdb2 GIT binary patch literal 108 zcmZQzQ0HR64$NLKGcd3MPtUFMwHz`kVA|GVKzDh!uaa5FHnyZ|Z! z>SbU^UY978n39m-k&=>{d6!KiN_w= z)gMo&`E^&J?)c04I9Bm+VdikwSBzP+*_n^&g_xNt>|bJ`Y8ZaqeDe|&Q+4x=8r20e z*_qW0+5~$Necl>OV`2#KX6N8~eSOaYpj$yc3&853 zm-F?7F$sNq`0n7xwrOH&URHX2iVOE$^wRTiI>;!u&hg(oy^CHPhQ`Uo+|J3xw#>51 z-rTmz!NtnT$-%~tZ$!*(Q1FirU%>g!b>+&+r!!AVdRehmv2C5Lr_CmpgUrYBXKU%-sdB$=7^J_nEl`+zK`tD5W*P;Upr!V`tr)z%gU$aNXPRY8`e%iSGk!q>lks!(+B>9KYJ z-S)$~ll!JfR~|h3D`v)Sr$aYRE6%Owp8VN-!9Ts9Ps*L*_ymvga@M{#k!^~9W4Y}3 ztEUAgv##p2>iO=7*7;UzZTzyK?J(2g`mM(sK25ZW-W)%5vird=(0F-P&qPu3T42Y=6<_$z4^-=o|5Mt6)q7ZpV2+%^2ZtgZRi zCD!eG8=rm6?PnC91n871v@5W zBVUvy&^nNn0p5&EBFu;^j+_xd$sGokG=f;fWq5Qmk<%lpnO`7g5|N?M%|s4RR5K3( sgBa*eBGNUwnaJUSYGw$qSOA+zFk%9{S=m4##R7x@K$koO>SSO509TjXP5=M^ literal 0 HcmV?d00001 diff --git a/testing/shapes/wrong.txt b/testing/shapes/wrong.txt new file mode 100644 index 0000000..e69de29 diff --git a/testing/shapes/wrong.zip b/testing/shapes/wrong.zip new file mode 100644 index 0000000000000000000000000000000000000000..041a244bae41c70f08712c50df9ae9e06e8685d5 GIT binary patch literal 196 zcmWIWW@h1H;9y{2_|g~>0i@u7lR<%@yeK~}U9Y5~Bs7GVfnDa%l~fomt>9*0WO>2N uz`!B`)Dqy$4l)A{0=yZSxR`Mp3{$wI5yT?F8df%t21XzZ2ht$SzyJW( Date: Thu, 22 Feb 2024 13:27:00 +0100 Subject: [PATCH 15/22] Add create-mask script, CreateMaskCommand and remove ShapeOperationMixin --- isimip_files_api/commands/__init__.py | 2 + isimip_files_api/commands/create_mask.py | 32 ++++++++ isimip_files_api/config.py | 8 +- isimip_files_api/operations/__init__.py | 37 --------- isimip_files_api/operations/cdo.py | 6 +- isimip_files_api/operations/create_mask.py | 53 +++++++++++++ isimip_files_api/scripts/__init__.py | 0 isimip_files_api/scripts/create_mask.py | 75 ++++++++++++++++++ ...sk_shape.py => test_create_create_mask.py} | 25 +++--- isimip_files_api/tests/test_root.py | 3 + pyproject.toml | 5 ++ testing/shapes/pm.dbf | Bin 871 -> 0 bytes testing/shapes/pm.prj | 1 - testing/shapes/pm.shp | Bin 444 -> 0 bytes testing/shapes/pm.shx | Bin 108 -> 0 bytes 15 files changed, 193 insertions(+), 54 deletions(-) create mode 100644 isimip_files_api/commands/create_mask.py create mode 100644 isimip_files_api/operations/create_mask.py create mode 100644 isimip_files_api/scripts/__init__.py create mode 100644 isimip_files_api/scripts/create_mask.py rename isimip_files_api/tests/{test_create_mask_shape.py => test_create_create_mask.py} (88%) delete mode 100644 testing/shapes/pm.dbf delete mode 100644 testing/shapes/pm.prj delete mode 100644 testing/shapes/pm.shp delete mode 100644 testing/shapes/pm.shx diff --git a/isimip_files_api/commands/__init__.py b/isimip_files_api/commands/__init__.py index 8dd74da..dd954b6 100644 --- a/isimip_files_api/commands/__init__.py +++ b/isimip_files_api/commands/__init__.py @@ -14,6 +14,8 @@ def __init__(self): def get(self, command): if command in self.commands: return self.commands[command]() + else: + raise RuntimeError(f'Command "{command}" not found in CommandRegistry.') class BaseCommand: diff --git a/isimip_files_api/commands/create_mask.py b/isimip_files_api/commands/create_mask.py new file mode 100644 index 0000000..7217a03 --- /dev/null +++ b/isimip_files_api/commands/create_mask.py @@ -0,0 +1,32 @@ +import subprocess + +from flask import current_app as app + +from ..utils import mask_paths +from . import BaseCommand + + +class CreateMaskCommand(BaseCommand): + + command = 'create_mask' + + def execute(self, input_path, output_path): + # use the ncks bin from the config + cmd_args = [app.config['CREATE_MASK_BIN']] + + # add the arguments from the operations + for operation in self.operations: + operation.input_path = input_path + operation.output_path = output_path + cmd_args += operation.get_args() + + # add the input file and output file + cmd_args += [str(input_path), str(output_path)] + + # join the cmd_args and execute the the command + cmd = ' '.join(cmd_args) + app.logger.debug(cmd) + subprocess.check_output(cmd_args) + + # return the command without the paths + return mask_paths(cmd) diff --git a/isimip_files_api/config.py b/isimip_files_api/config.py index e380e44..c50cea9 100644 --- a/isimip_files_api/config.py +++ b/isimip_files_api/config.py @@ -36,6 +36,7 @@ # list of commands which can be executed COMMANDS = [ 'isimip_files_api.commands.cdo.CdoCommand', + 'isimip_files_api.commands.create_mask.CreateMaskCommand', 'isimip_files_api.commands.ncks.NcksCommand' ] @@ -49,10 +50,11 @@ 'isimip_files_api.operations.cdo.SelectPointOperation', 'isimip_files_api.operations.cdo.MaskBBoxOperation', 'isimip_files_api.operations.cdo.MaskCountryOperation', - 'isimip_files_api.operations.cdo.MaskShapeOperation', + 'isimip_files_api.operations.cdo.MaskMaskOperation', 'isimip_files_api.operations.cdo.MaskLandonlyOperation', 'isimip_files_api.operations.cdo.FldmeanOperation', 'isimip_files_api.operations.cdo.OutputtabOperation', + 'isimip_files_api.operations.create_mask.CreateMaskOperation', 'isimip_files_api.operations.ncks.CutOutBBoxOperation' ] @@ -69,6 +71,10 @@ # the ncks binary on the system, e.g. /usr/bin/ncks NCKS_BIN = 'ncks' +# the binary used to create masks from geojson and shapefiles, +# shipped with this software and located in scripts/create_mask.py +CREATE_MASK_BIN = 'create-mask' + # special settings for the countries COUNTRYMASKS_FILE_PATH = 'countrymasks.nc' COUNTRYMASKS_COUNTRIES = [ diff --git a/isimip_files_api/operations/__init__.py b/isimip_files_api/operations/__init__.py index 1d1715c..033d1b5 100644 --- a/isimip_files_api/operations/__init__.py +++ b/isimip_files_api/operations/__init__.py @@ -1,9 +1,3 @@ -import base64 -import binascii -import io -import json -import zipfile -from pathlib import Path from flask import current_app as app @@ -103,34 +97,3 @@ def validate_country(self): return [f'country not in the list of supported countries (e.g. deu) for operation "{self.operation}"'] else: return [f'country is missing for operation "{self.operation}"'] - - -class ShapeOperationMixin: - - def validate_shape(self): - if 'shapefile' in self.config and 'geojson' in self.config: - return [f'shapefile and geojson and mutually exclusive for operation "{self.operation}"'] - - elif 'shapefile' in self.config: - try: - shapefile_stream = io.BytesIO(base64.b64decode(self.config['shapefile'])) - - try: - with zipfile.ZipFile(shapefile_stream) as z: - for file_name in z.namelist(): - if Path(file_name).suffix not in ['.dbf', '.prj', '.shp', '.shx']: - return [f'shapefile is not a valid shape file for operation "{self.operation}"'] - except zipfile.BadZipFile: - return [f'shapefile is a valid zip file for operation "{self.operation}"'] - - except binascii.Error: - return [f'shapefile is not a valid base64 stream for operation "{self.operation}"'] - - elif 'geojson' in self.config: - try: - json.loads(self.config['geojson']) - except json.decoder.JSONDecodeError: - return [f'geojson is not a valid json for operation "{self.operation}"'] - - else: - return [f'shapefile or geojson is missing for operation "{self.operation}"'] diff --git a/isimip_files_api/operations/cdo.py b/isimip_files_api/operations/cdo.py index d8c5d74..ba9caf5 100644 --- a/isimip_files_api/operations/cdo.py +++ b/isimip_files_api/operations/cdo.py @@ -3,7 +3,7 @@ from flask import current_app as app from ..netcdf import get_index -from . import BaseOperation, BBoxOperationMixin, CountryOperationMixin, PointOperationMixin, ShapeOperationMixin +from . import BaseOperation, BBoxOperationMixin, CountryOperationMixin, PointOperationMixin class CdoOperation(BaseOperation): @@ -95,9 +95,9 @@ def get_region(self): return self.get_country().lower() -class MaskShapeOperation(ShapeOperationMixin, CdoOperation): +class MaskMaskOperation(CdoOperation): - operation = 'mask_shape' + operation = 'mask_mask' def validate(self): return self.validate_shape() diff --git a/isimip_files_api/operations/create_mask.py b/isimip_files_api/operations/create_mask.py new file mode 100644 index 0000000..5b1d90d --- /dev/null +++ b/isimip_files_api/operations/create_mask.py @@ -0,0 +1,53 @@ +import base64 +import binascii +import io +import json +import zipfile +from pathlib import Path + +from . import BaseOperation + + +class CreateMaskOperation(BaseOperation): + + command = 'create_mask' + operation = 'create_mask' + + def validate(self): + if 'shapefile' in self.config and 'geojson' in self.config: + return [f'shapefile and geojson and mutually exclusive for operation "{self.operation}"'] + + elif 'shapefile' in self.config: + try: + shapefile_stream = io.BytesIO(base64.b64decode(self.config['shapefile'])) + + try: + with zipfile.ZipFile(shapefile_stream) as z: + for file_name in z.namelist(): + if Path(file_name).suffix not in [ + '.shp', '.dbf', '.shx', '.prj', '.sbn', '.sbx', '.fbn', '.fbx', + '.ain', '.aih', '.ixs', '.mxs', '.atx', '.shp.xml', '.cpg', '.qix' + ]: + return [f'shapefile is not a valid shape file for operation "{self.operation}"'] + except zipfile.BadZipFile: + return [f'shapefile is a valid zip file for operation "{self.operation}"'] + + except binascii.Error: + return [f'shapefile is not a valid base64 stream for operation "{self.operation}"'] + + elif 'geojson' in self.config: + try: + json.loads(self.config['geojson']) + except json.decoder.JSONDecodeError: + return [f'geojson is not a valid json for operation "{self.operation}"'] + + else: + return [f'shapefile or geojson is missing for operation "{self.operation}"'] + + def get_args(self): + south, north, west, east = self.get_bbox() + return [f'-sellonlatbox,{west:f},{east:f},{south:f},{north:f}'] + + def get_region(self): + south, north, west, east = self.get_bbox() + return f'lat{south}to{north}lon{west}to{east}' diff --git a/isimip_files_api/scripts/__init__.py b/isimip_files_api/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/isimip_files_api/scripts/create_mask.py b/isimip_files_api/scripts/create_mask.py new file mode 100644 index 0000000..3eb36f7 --- /dev/null +++ b/isimip_files_api/scripts/create_mask.py @@ -0,0 +1,75 @@ +import argparse + +import geopandas +import netCDF4 as nc +import numpy as np +import rioxarray # noqa: F401 +import shapely +import xarray as xr + +FILL_VALUE_FLOAT = 1e+20 +FILL_VALUE_BOOL = -128 + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('input_path', help='path to the input file') + parser.add_argument('output_path', help='path to the output file') + parser.add_argument('-g|--grid', dest='grid', help='grid spacing in arcsec', + type=int, default=1800) + + args = parser.parse_args() + + n_lat = int((360 * 60 * 60) / args.grid) + n_lon = int((180 * 60 * 60) / args.grid) + delta = args.grid / 3600.0 + + df = geopandas.read_file(args.input_path) + + # create a diskless netcdf file using python-netCDF4 + ds = nc.Dataset(args.output_path, 'w', format='NETCDF4_CLASSIC', diskless=True) + ds.createDimension('lon', n_lat) + ds.createDimension('lat', n_lon) + + lon = ds.createVariable('lon', 'f8', ('lon',), fill_value=FILL_VALUE_FLOAT) + lon.standard_name = 'longitude' + lon.long_name = 'Longitude' + lon.units = 'degrees_east' + lon.axis = 'X' + lon[:] = np.arange(-180 + 0.5 * delta, 180, delta) + + lat = ds.createVariable('lat', 'f8', ('lat',), fill_value=FILL_VALUE_FLOAT) + lat.standard_name = 'latitude' + lat.long_name = 'Latitude' + lat.units = 'degrees_north' + lat.axis = 'Y' + lat[:] = np.arange(90 - 0.5 * delta, -90 - 0.5 * delta, -delta) + + for index, row in df.iterrows(): + variable_name = f'm_{index}' + variable = ds.createVariable(variable_name, 'b', ('lat', 'lon'), + fill_value=FILL_VALUE_BOOL, compression='zlib') + + for key, value in row.items(): + if isinstance(value, (str, int, float)): + setattr(variable, key.lower(), value) + + variable[:, :] = np.ones((n_lon, n_lat)) + + # convert to a crs-aware xarray dataset + ds = xr.open_dataset(xr.backends.NetCDF4DataStore(ds)) + ds.rio.write_crs(df.crs, inplace=True) + + for index, row in df.iterrows(): + variable_name = f'm_{index}' + variable = ds[variable_name] + + geometry = shapely.geometry.mapping(row['geometry']) + + mask = variable.rio.clip([geometry], drop=False) + variable[:, :] = mask[:, :] + + ds.to_netcdf(args.output_path) + + +if __name__ == '__main__': + main() diff --git a/isimip_files_api/tests/test_create_mask_shape.py b/isimip_files_api/tests/test_create_create_mask.py similarity index 88% rename from isimip_files_api/tests/test_create_mask_shape.py rename to isimip_files_api/tests/test_create_create_mask.py index b3ce4cd..e4693a7 100644 --- a/isimip_files_api/tests/test_create_mask_shape.py +++ b/isimip_files_api/tests/test_create_create_mask.py @@ -5,12 +5,13 @@ wrong_path = Path('testing/shapes/wrong.zip') geojson_path = Path('testing/shapes/pm.json') + def test_shapefile(client, mocker): mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { - 'operation': 'mask_shape', + 'operation': 'create_mask', 'shapefile': base64.b64encode(shapefile_path.read_bytes()).decode() } ]}) @@ -24,7 +25,7 @@ def test_geojson(client, mocker): response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { - 'operation': 'mask_shape', + 'operation': 'create_mask', 'geojson': geojson_path.read_text() } ]}) @@ -36,67 +37,67 @@ def test_geojson(client, mocker): def test_missing_file(client): response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { - 'operation': 'mask_shape' + 'operation': 'create_mask' } ]}) assert response.status_code == 400 assert response.json.get('status') == 'error' assert response.json.get('errors') == { - 'operations': ['shapefile or geojson is missing for operation "mask_shape"'] + 'operations': ['shapefile or geojson is missing for operation "create_mask"'] } def test_invalid_shapefile(client): response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { - 'operation': 'mask_shape', + 'operation': 'create_mask', 'shapefile': 'wrong' } ]}) assert response.status_code == 400 assert response.json.get('status') == 'error' assert response.json.get('errors') == { - 'operations': ['shapefile is not a valid base64 stream for operation "mask_shape"'] + 'operations': ['shapefile is not a valid base64 stream for operation "create_mask"'] } def test_invalid_shapefile2(client): response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { - 'operation': 'mask_shape', + 'operation': 'create_mask', 'shapefile': base64.b64encode(b'this is not a valid shapefile').decode() } ]}) assert response.status_code == 400 assert response.json.get('status') == 'error' assert response.json.get('errors') == { - 'operations': ['shapefile is a valid zip file for operation "mask_shape"'] + 'operations': ['shapefile is a valid zip file for operation "create_mask"'] } def test_invalid_shapefile3(client): response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { - 'operation': 'mask_shape', + 'operation': 'create_mask', 'shapefile': base64.b64encode(wrong_path.read_bytes()).decode() } ]}) assert response.status_code == 400 assert response.json.get('status') == 'error' assert response.json.get('errors') == { - 'operations': ['shapefile is not a valid shape file for operation "mask_shape"'] + 'operations': ['shapefile is not a valid shape file for operation "create_mask"'] } def test_invalid_geojson(client): response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { - 'operation': 'mask_shape', + 'operation': 'create_mask', 'geojson': 'wrong' } ]}) assert response.status_code == 400 assert response.json.get('status') == 'error' assert response.json.get('errors') == { - 'operations': ['geojson is not a valid json for operation "mask_shape"'] + 'operations': ['geojson is not a valid json for operation "create_mask"'] } diff --git a/isimip_files_api/tests/test_root.py b/isimip_files_api/tests/test_root.py index 5ac6d8b..739953f 100644 --- a/isimip_files_api/tests/test_root.py +++ b/isimip_files_api/tests/test_root.py @@ -7,6 +7,7 @@ def test_success(client, mocker): assert response.json.get('status') == 'ok' assert response.json.get('commands') == [ 'cdo', + 'create_mask', 'ncks' ] assert response.json.get('operations') == [ @@ -15,8 +16,10 @@ def test_success(client, mocker): 'select_point', 'mask_bbox', 'mask_country', + 'mask_mask', 'mask_landonly', 'fldmean', 'outputtab', + 'create_mask', 'cutout_bbox' ] diff --git a/pyproject.toml b/pyproject.toml index 06447e2..fc04589 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,15 +34,20 @@ dependencies = [ "colorlog~=6.8.2", "Flask~=3.0.0", "Flask-Cors~=4.0.0", + "geopandas~=0.14.3", "gunicorn~=21.2.0", "netCDF4~=1.6.5", "numpy~=1.26.3", "python-dotenv~=1.0.0", "tomli", + "rioxarray~=0.15.1", "rq~=1.15.1", ] dynamic = ["version"] +[project.scripts] +create-mask = "isimip_files_api.scripts.create_mask:main" + [project.urls] Repository = "https://github.com/ISI-MIP/isimip-files-api" diff --git a/testing/shapes/pm.dbf b/testing/shapes/pm.dbf deleted file mode 100644 index ff5da76c9f187c3b685b13ed8aa34f1e712c9182..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 871 zcmZRsVG?0vU|>jOWMu+Us34qyAu|Oojxiv%h07KK*VGzk_^))qT7QF0Lj9t_sux)HV<1GGiBNx)Nw=Kz>PaN@A|AZ)QnJ fYEEuqQ8vsf@@Q(C2Xlo9j9~<00E2?MaV7--G?r?~ diff --git a/testing/shapes/pm.prj b/testing/shapes/pm.prj deleted file mode 100644 index f45cbad..0000000 --- a/testing/shapes/pm.prj +++ /dev/null @@ -1 +0,0 @@ -GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]] \ No newline at end of file diff --git a/testing/shapes/pm.shp b/testing/shapes/pm.shp deleted file mode 100644 index 2dac9cdf610431a8c325afe0a04ef872abef5d6e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 444 zcmZQzQ0HR64(`2RW?*0i%4x>*ANpFM;UM)(eMiJ!Plqs%pPpOiYdK_8%H5gk>g9l4 zJBlbHkhg*uvp@=kpjs3E&OcmpQqzHRIjjCJD=!BP^;@AHoLUZ1H!kzl>v}neTyVMZ zAw9Mfa< za=2u*YL-HmCfr{Z`@Wr-bU?#lfziL(V?g;xpB%e&y&4V+1sK}{c)c88;V|RC+TE36 zUJidA@6AeGq~Xxj_vDj2(0#5S_D_AYL&L%F^0)PCEW8|MmCsqE{8GaqMOk3|Yi%!w zNhN2W%$C)3=$s+s`32}+!TX(&`eK?6;@dAW+%xlXa9M2KG54m1gZk11x4nVp^==8d N*Y`mK9?t0D0RSlKoWB47 diff --git a/testing/shapes/pm.shx b/testing/shapes/pm.shx deleted file mode 100644 index e094fdcee1cdd4d0a27a2c8b556bb4c76ab3fdb2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 108 zcmZQzQ0HR64$NLKGcd3MPtUFMwHz`kV Date: Sat, 2 Mar 2024 12:10:58 +0100 Subject: [PATCH 16/22] Refactor operations and add examples --- .gitignore | 15 ++- Makefile | 4 + examples/time_series_bbox.py | 54 ++++++++++ examples/time_series_country.py | 54 ++++++++++ examples/time_series_geojson.py | 65 +++++++++++ examples/time_series_shapefile.py | 65 +++++++++++ isimip_files_api/app.py | 21 ++-- isimip_files_api/commands/__init__.py | 8 +- isimip_files_api/commands/cdo.py | 17 +-- isimip_files_api/commands/create_mask.py | 32 ------ isimip_files_api/commands/ncks.py | 10 +- isimip_files_api/commands/python/__init__.py | 0 .../commands/python/create_mask.py | 35 ++++++ isimip_files_api/config.py | 19 ++-- isimip_files_api/jobs.py | 11 +- isimip_files_api/operations/__init__.py | 53 ++++++++- isimip_files_api/operations/cdo.py | 46 ++++---- isimip_files_api/operations/create_mask.py | 53 --------- .../operations/python/__init__.py | 0 .../operations/python/create_mask.py | 29 +++++ isimip_files_api/responses.py | 17 ++- isimip_files_api/tasks.py | 76 +++++++------ ...tputtab.py => test_create_compute_mean.py} | 2 +- .../tests/test_create_create_mask.py | 101 ++++++++++-------- .../tests/test_create_mask_mask.py | 73 +++++++++++++ ...e_fldmean.py => test_create_output_csv.py} | 2 +- isimip_files_api/tests/test_root.py | 6 +- isimip_files_api/utils.py | 77 ++++++++++++- isimip_files_api/validators.py | 22 +++- pyproject.toml | 18 ++-- testing/shapes/pm.nc | Bin 0 -> 29451 bytes 31 files changed, 738 insertions(+), 247 deletions(-) create mode 100644 examples/time_series_bbox.py create mode 100644 examples/time_series_country.py create mode 100644 examples/time_series_geojson.py create mode 100644 examples/time_series_shapefile.py delete mode 100644 isimip_files_api/commands/create_mask.py create mode 100644 isimip_files_api/commands/python/__init__.py create mode 100644 isimip_files_api/commands/python/create_mask.py delete mode 100644 isimip_files_api/operations/create_mask.py create mode 100644 isimip_files_api/operations/python/__init__.py create mode 100644 isimip_files_api/operations/python/create_mask.py rename isimip_files_api/tests/{test_create_outputtab.py => test_create_compute_mean.py} (88%) create mode 100644 isimip_files_api/tests/test_create_mask_mask.py rename isimip_files_api/tests/{test_create_fldmean.py => test_create_output_csv.py} (89%) create mode 100644 testing/shapes/pm.nc diff --git a/.gitignore b/.gitignore index 4c667ef..cad719a 100644 --- a/.gitignore +++ b/.gitignore @@ -10,13 +10,20 @@ __pycache__/ /build /dist /volumes -/testing/output/ -/*.egg-info -/env + /log +/input +/tmp +/output +/testing/output/ + /*.log /*.toml !/pyproject.toml -/testing/output/ + +/*.egg-info +/env + +/.coverage /.env diff --git a/Makefile b/Makefile index d067064..9ce4544 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,7 @@ # in production, the variables should be set in systemd or docker files export FLASK_APP=isimip_files_api.app export FLASK_ENV=development +export FLASK_DEBUG=true export RQ_WORKER_CLASS=isimip_files_api.worker.Worker server: @@ -10,4 +11,7 @@ server: worker: rq worker +burst: + rq worker --burst + .PHONY: server worker diff --git a/examples/time_series_bbox.py b/examples/time_series_bbox.py new file mode 100644 index 0000000..69d96f6 --- /dev/null +++ b/examples/time_series_bbox.py @@ -0,0 +1,54 @@ +import json +import time + +import requests + +url = 'http://localhost:5000' + +paths = [ + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2021_2030.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2031_2040.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2041_2050.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2051_2060.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2061_2070.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2071_2080.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2081_2090.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2091_2100.nc', +] + +data = { + 'paths': paths, + 'operations': [ + { + 'operation': 'select_bbox', + 'bbox': [-23.43651, 23.43651, -180, 180] + }, + { + 'operation': 'compute_mean', + }, + { + 'operation': 'output_csv' + } + ] +} + +response = requests.post(url, json=data) + +job = response.json() +print(json.dumps(job, indent=2)) + +for i in range(100): + job = requests.get(job['job_url']).json() + print(json.dumps(job, indent=2)) + + if job['status'] in ['queued', 'started']: + time.sleep(2) + else: + break + +if job['status'] == 'finished': + with requests.get(job['file_url'], stream=True) as response: + with open(job['file_name'], 'wb') as fp: + for chunk in response.iter_content(chunk_size=8192): + fp.write(chunk) diff --git a/examples/time_series_country.py b/examples/time_series_country.py new file mode 100644 index 0000000..789a9a2 --- /dev/null +++ b/examples/time_series_country.py @@ -0,0 +1,54 @@ +import json +import time + +import requests + +url = 'http://localhost:5000' + +paths = [ + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2021_2030.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2031_2040.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2041_2050.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2051_2060.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2061_2070.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2071_2080.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2081_2090.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2091_2100.nc', +] + +data = { + 'paths': paths, + 'operations': [ + { + 'operation': 'select_country', + 'country': 'bra' + }, + { + 'operation': 'compute_mean', + }, + { + 'operation': 'output_csv' + } + ] +} + +response = requests.post(url, json=data) + +job = response.json() +print(json.dumps(job, indent=2)) + +for i in range(100): + job = requests.get(job['job_url']).json() + print(json.dumps(job, indent=2)) + + if job['status'] in ['queued', 'started']: + time.sleep(2) + else: + break + +if job['status'] == 'finished': + with requests.get(job['file_url'], stream=True) as response: + with open(job['file_name'], 'wb') as fp: + for chunk in response.iter_content(chunk_size=8192): + fp.write(chunk) diff --git a/examples/time_series_geojson.py b/examples/time_series_geojson.py new file mode 100644 index 0000000..db51bb5 --- /dev/null +++ b/examples/time_series_geojson.py @@ -0,0 +1,65 @@ +import json +import time +from pathlib import Path + +import requests + +url = 'http://localhost:5000/' + +paths = [ + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2021_2030.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2031_2040.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2041_2050.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2051_2060.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2061_2070.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2071_2080.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2081_2090.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2091_2100.nc', +] + +shape_path = Path('testing') / 'shapes' / 'pm.json' + +data = { + 'paths': paths, + 'operations': [ + { + 'operation': 'create_mask', + 'shape': 'pm.json', + 'mask': 'pm.nc' + }, + { + 'operation': 'mask_mask', + 'mask': 'pm.nc', + }, + { + 'operation': 'compute_mean', + }, + { + 'operation': 'output_csv' + } + ] +} + +response = requests.post(url, files={ + 'data': json.dumps(data), + 'pm.json': shape_path.read_bytes(), +}) + +job = response.json() +print(json.dumps(job, indent=2)) + +for i in range(100): + job = requests.get(job['job_url']).json() + print(json.dumps(job, indent=2)) + + if job['status'] in ['queued', 'started']: + time.sleep(2) + else: + break + +if job['status'] == 'finished': + with requests.get(job['file_url'], stream=True) as response: + with open(job['file_name'], 'wb') as fp: + for chunk in response.iter_content(chunk_size=8192): + fp.write(chunk) diff --git a/examples/time_series_shapefile.py b/examples/time_series_shapefile.py new file mode 100644 index 0000000..ef09ca4 --- /dev/null +++ b/examples/time_series_shapefile.py @@ -0,0 +1,65 @@ +import json +import time +from pathlib import Path + +import requests + +url = 'http://localhost:5000/' + +paths = [ + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2021_2030.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2031_2040.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2041_2050.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2051_2060.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2061_2070.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2071_2080.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2081_2090.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2091_2100.nc', +] + +shape_path = Path('testing') / 'shapes' / 'pm.zip' + +data = { + 'paths': paths, + 'operations': [ + { + 'operation': 'create_mask', + 'shape': 'pm.zip', + 'mask': 'pm.nc' + }, + { + 'operation': 'mask_mask', + 'mask': 'pm.nc' + }, + { + 'operation': 'compute_mean', + }, + { + 'operation': 'output_csv' + } + ] +} + +response = requests.post(url, files={ + 'data': json.dumps(data), + 'pm.zip': shape_path.read_bytes(), +}) + +job = response.json() +print(json.dumps(job, indent=2)) + +for i in range(100): + job = requests.get(job['job_url']).json() + print(json.dumps(job, indent=2)) + + if job['status'] in ['queued', 'started']: + time.sleep(2) + else: + break + +if job['status'] == 'finished': + with requests.get(job['file_url'], stream=True) as response: + with open(job['file_name'], 'wb') as fp: + for chunk in response.iter_content(chunk_size=8192): + fp.write(chunk) diff --git a/isimip_files_api/app.py b/isimip_files_api/app.py index a1843be..99439f5 100644 --- a/isimip_files_api/app.py +++ b/isimip_files_api/app.py @@ -8,7 +8,8 @@ from .logging import configure_logging from .operations import OperationRegistry from .responses import get_errors_response -from .validators import validate_data, validate_operations, validate_paths +from .utils import get_config_path, handle_post_request +from .validators import validate_data, validate_operations, validate_paths, validate_uploads def create_app(): @@ -17,7 +18,7 @@ def create_app(): app.config.from_object('isimip_files_api.config') app.config.from_prefixed_env() if 'CONFIG' in app.config: - app.config.from_file(app.config['CONFIG'], load=tomli.load, text=False) + app.config.from_file(get_config_path(app.config['CONFIG']), load=tomli.load, text=False) # configure logging configure_logging(app) @@ -37,22 +38,30 @@ def index(): @app.route('/', methods=['POST']) def create(): - app.logger.debug('request.json = %s', request.json) - - data = request.json + data, uploads = handle_post_request(request) + app.logger.debug('data = %s', data) + app.logger.debug('files = %s', uploads.keys()) + # validation step 1: check data errors = validate_data(data) if errors: app.logger.debug('errors = %s', errors) return get_errors_response(errors) + # validation step 2: check paths and operations errors = dict(**validate_paths(data), **validate_operations(data)) if errors: app.logger.debug('errors = %s', errors) return get_errors_response(errors) - return create_job(data) + # validation step 3: check uploads + errors = validate_uploads(data, uploads) + if errors: + app.logger.debug('errors = %s', errors) + return get_errors_response(errors) + + return create_job(data, uploads) @app.route('/', methods=['GET']) def detail(job_id): diff --git a/isimip_files_api/commands/__init__.py b/isimip_files_api/commands/__init__.py index dd954b6..137a9c2 100644 --- a/isimip_files_api/commands/__init__.py +++ b/isimip_files_api/commands/__init__.py @@ -20,13 +20,17 @@ def get(self, command): class BaseCommand: + perform_once = False + max_operations = None + def __init__(self): self.operations = [] + self.outputs = [] + self.artefacts = [] - def execute(self, input_path, output_path): + def execute(self, input_path, output_path, tmp_path): raise NotImplementedError - def get_suffix(self): # loop over operations and take the first one for operation in self.operations: diff --git a/isimip_files_api/commands/cdo.py b/isimip_files_api/commands/cdo.py index 7d1895a..5ee289d 100644 --- a/isimip_files_api/commands/cdo.py +++ b/isimip_files_api/commands/cdo.py @@ -3,7 +3,6 @@ from flask import current_app as app -from ..utils import mask_paths from . import BaseCommand @@ -11,14 +10,14 @@ class CdoCommand(BaseCommand): command = 'cdo' - def execute(self, input_path, output_path): - write_csv = (self.get_suffix() == '.csv') + def execute(self, job_path, input_path, output_path): + write_csv = (output_path.suffix == '.csv') # use the cdo bin from the config, NETCDF4_CLASSIC and compression cmd_args = [app.config['CDO_BIN'], '-f', 'nc4c', '-z', 'zip_5', '-L'] # collect args from operations - for operation in self.operations: + for operation in reversed(self.operations): operation.input_path = input_path operation.output_path = output_path cmd_args += operation.get_args() @@ -33,18 +32,22 @@ def execute(self, input_path, output_path): # join the cmd_args and execute the the command cmd = ' '.join(cmd_args) app.logger.debug(cmd) + output = subprocess.check_output(cmd_args, env={ 'CDI_VERSION_INFO': '0', 'CDO_VERSION_INFO': '0', 'CDO_HISTORY_INFO': '0' - }) + }, cwd=job_path) # write the subprocess output into a csv file if write_csv: - with open(output_path, 'w', newline='') as fp: + with open(job_path / output_path, 'w', newline='') as fp: writer = csv.writer(fp, delimiter=',') for line in output.splitlines(): writer.writerow(line.decode().strip().split()) + # add the output path to the commands outputs + self.outputs = [output_path] + # return the command without the paths - return mask_paths(cmd) + return cmd diff --git a/isimip_files_api/commands/create_mask.py b/isimip_files_api/commands/create_mask.py deleted file mode 100644 index 7217a03..0000000 --- a/isimip_files_api/commands/create_mask.py +++ /dev/null @@ -1,32 +0,0 @@ -import subprocess - -from flask import current_app as app - -from ..utils import mask_paths -from . import BaseCommand - - -class CreateMaskCommand(BaseCommand): - - command = 'create_mask' - - def execute(self, input_path, output_path): - # use the ncks bin from the config - cmd_args = [app.config['CREATE_MASK_BIN']] - - # add the arguments from the operations - for operation in self.operations: - operation.input_path = input_path - operation.output_path = output_path - cmd_args += operation.get_args() - - # add the input file and output file - cmd_args += [str(input_path), str(output_path)] - - # join the cmd_args and execute the the command - cmd = ' '.join(cmd_args) - app.logger.debug(cmd) - subprocess.check_output(cmd_args) - - # return the command without the paths - return mask_paths(cmd) diff --git a/isimip_files_api/commands/ncks.py b/isimip_files_api/commands/ncks.py index 9c19d07..4fa10b0 100644 --- a/isimip_files_api/commands/ncks.py +++ b/isimip_files_api/commands/ncks.py @@ -2,7 +2,6 @@ from flask import current_app as app -from ..utils import mask_paths from . import BaseCommand @@ -10,7 +9,7 @@ class NcksCommand(BaseCommand): command = 'ncks' - def execute(self, input_path, output_path): + def execute(self, job_path, input_path, output_path): # use the ncks bin from the config cmd_args = [app.config['NCKS_BIN']] @@ -26,7 +25,10 @@ def execute(self, input_path, output_path): # join the cmd_args and execute the the command cmd = ' '.join(cmd_args) app.logger.debug(cmd) - subprocess.check_output(cmd_args) + subprocess.check_call(cmd_args, cwd=job_path) + + # add the output path to the commands outputs + self.outputs = [output_path] # return the command without the paths - return mask_paths(cmd) + return cmd diff --git a/isimip_files_api/commands/python/__init__.py b/isimip_files_api/commands/python/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/isimip_files_api/commands/python/create_mask.py b/isimip_files_api/commands/python/create_mask.py new file mode 100644 index 0000000..817852a --- /dev/null +++ b/isimip_files_api/commands/python/create_mask.py @@ -0,0 +1,35 @@ +import subprocess +from pathlib import Path + +from flask import current_app as app + +from isimip_files_api.commands import BaseCommand + + +class CreateMaskCommand(BaseCommand): + + command = 'create_mask' + + perform_once = True + max_operations = 1 + + def execute(self, job_path, input_path, output_path): + # use the ncks bin from the config + cmd_args = [app.config['CREATE_MASK_BIN']] + + # add the arguments from the first operation + shape_file, mask_file = self.operations[0].get_args() + + # add the arguments to cmd_args + cmd_args += [shape_file, mask_file] + + # join the cmd_args and execute the the command + cmd = ' '.join(cmd_args) + app.logger.debug(cmd) + subprocess.check_call(cmd_args, cwd=job_path) + + # add the input and the output path of the command to the commands artefacts + self.artefacts = [Path(shape_file), Path(mask_file)] + + # return the command without the paths + return cmd diff --git a/isimip_files_api/config.py b/isimip_files_api/config.py index c50cea9..f40aeaa 100644 --- a/isimip_files_api/config.py +++ b/isimip_files_api/config.py @@ -22,10 +22,13 @@ OUTPUT_URL = 'http://127.0.0.1/api/output/' # input path to the NetCDF files to process -INPUT_PATH = '.' +INPUT_PATH = 'input' + +# temporary path to store interim files +TMP_PATH = 'tmp' # output path to store the created download packages, this directory should be exposed on OUTPUT_URL -OUTPUT_PATH = '.' +OUTPUT_PATH = 'output' # output prefix to be prepended to the job ID to create the filename for the download package OUTPUT_PREFIX = 'download-' @@ -36,12 +39,12 @@ # list of commands which can be executed COMMANDS = [ 'isimip_files_api.commands.cdo.CdoCommand', - 'isimip_files_api.commands.create_mask.CreateMaskCommand', + 'isimip_files_api.commands.python.create_mask.CreateMaskCommand', 'isimip_files_api.commands.ncks.NcksCommand' ] # maximum number of commands which can be performed -MAX_COMMANDS = 2 +MAX_COMMANDS = 8 # list of operations which can be performed OPERATIONS = [ @@ -49,12 +52,12 @@ 'isimip_files_api.operations.cdo.SelectCountryOperation', 'isimip_files_api.operations.cdo.SelectPointOperation', 'isimip_files_api.operations.cdo.MaskBBoxOperation', - 'isimip_files_api.operations.cdo.MaskCountryOperation', 'isimip_files_api.operations.cdo.MaskMaskOperation', + 'isimip_files_api.operations.cdo.MaskCountryOperation', 'isimip_files_api.operations.cdo.MaskLandonlyOperation', - 'isimip_files_api.operations.cdo.FldmeanOperation', - 'isimip_files_api.operations.cdo.OutputtabOperation', - 'isimip_files_api.operations.create_mask.CreateMaskOperation', + 'isimip_files_api.operations.cdo.ComputeMeanOperation', + 'isimip_files_api.operations.cdo.OutputCsvOperation', + 'isimip_files_api.operations.python.create_mask.CreateMaskOperation', 'isimip_files_api.operations.ncks.CutOutBBoxOperation' ] diff --git a/isimip_files_api/jobs.py b/isimip_files_api/jobs.py index e5ac69b..85a595f 100644 --- a/isimip_files_api/jobs.py +++ b/isimip_files_api/jobs.py @@ -8,7 +8,7 @@ from .responses import get_response from .tasks import run_task -from .utils import get_hash +from .utils import get_hash, store_uploads def count_jobs(): @@ -23,14 +23,19 @@ def count_jobs(): 'scheduled': queue.scheduled_job_registry.count } -def create_job(data): +def create_job(data, uploads): redis = Redis.from_url(app.config['REDIS_URL']) - job_id = get_hash(data) + job_id = get_hash(data, uploads) + try: job = Job.fetch(job_id, connection=redis) return get_response(job, 200) except NoSuchJobError: + # create tmp dir and store uploaded files + store_uploads(job_id, uploads) + + # create and enqueue asyncronous job job = Job.create(run_task, id=job_id, args=[data['paths'], data['operations']], timeout=app.config['WORKER_TIMEOUT'], ttl=app.config['WORKER_TTL'], diff --git a/isimip_files_api/operations/__init__.py b/isimip_files_api/operations/__init__.py index 033d1b5..ff4c061 100644 --- a/isimip_files_api/operations/__init__.py +++ b/isimip_files_api/operations/__init__.py @@ -1,3 +1,5 @@ +import re +from pathlib import Path from flask import current_app as app @@ -21,11 +23,26 @@ def get_command_list(self, operations): commands = [] command_registry = CommandRegistry() + current_command = None for index, operation_config in enumerate(operations): operation = self.get(operation_config) - if not commands or commands[-1].command != operation.command: - commands.append(command_registry.get(operation.command)) - commands[-1].operations.append(operation) + + # add a new command, if + # * its the first operation + # * the operation has a different command than the previous one + # * the command reached its limit of operations + if ( + current_command is None or + current_command.command != operation.command or + ( + current_command.max_operations is not None and + len(current_command.operations) >= current_command.max_operations + ) + ): + current_command = command_registry.get(operation.command) + commands.append(current_command) + + current_command.operations.append(operation) return commands @@ -38,6 +55,9 @@ def __init__(self, config): def validate(self): raise NotImplementedError + def validate_uploads(self, uploads): + pass + def get_args(self): raise NotImplementedError @@ -91,9 +111,36 @@ class CountryOperationMixin: def get_country(self): return self.config['country'].upper() + def get_mask_path(self): + return Path(app.config['COUNTRYMASKS_FILE_PATH']).expanduser().resolve() + def validate_country(self): if 'country' in self.config: if self.get_country() not in app.config['COUNTRYMASKS_COUNTRIES']: return [f'country not in the list of supported countries (e.g. deu) for operation "{self.operation}"'] else: return [f'country is missing for operation "{self.operation}"'] + + +class MaskOperationMixin: + + def get_var(self): + return self.config.get('var', 'm_0') + + def get_mask_path(self): + return Path(self.config.get('mask')) + + def validate_var(self): + if 'var' in self.config: + if not re.match(r'^[A-Za-z0-9_]*$', self.config['var']): + return [f'only letters, numbers, underscores are permitted in "var" for operation "{self.operation}"'] + + def validate_mask(self): + if 'mask' in self.config: + if not re.match(r'^[A-Za-z0-9-.]*$', self.config['mask']): + return ['only letters, numbers, hyphens, and periods are permitted in "mask"' + f' for operation "{self.operation}"'] + elif re.search(r'\.{2}', self.config['mask']): + return [f'consecutive periods are not permitted in "mask" for operation "{self.operation}"'] + else: + return [f'mask is missing for operation "{self.operation}"'] diff --git a/isimip_files_api/operations/cdo.py b/isimip_files_api/operations/cdo.py index ba9caf5..a6414b9 100644 --- a/isimip_files_api/operations/cdo.py +++ b/isimip_files_api/operations/cdo.py @@ -3,7 +3,7 @@ from flask import current_app as app from ..netcdf import get_index -from . import BaseOperation, BBoxOperationMixin, CountryOperationMixin, PointOperationMixin +from . import BaseOperation, BBoxOperationMixin, CountryOperationMixin, MaskOperationMixin, PointOperationMixin class CdoOperation(BaseOperation): @@ -36,8 +36,8 @@ def validate(self): def get_args(self): country = self.get_country() - mask_path = str(Path(app.config['COUNTRYMASKS_FILE_PATH']).expanduser()) - return ['-ifthen', f'-selname,m_{country:3.3}', mask_path] + mask_path = self.get_mask_path() + return ['-ifthen', f'-selname,m_{country:3.3}', str(mask_path)] def get_region(self): return self.get_country().lower() @@ -63,6 +63,7 @@ def get_region(self): lat, lon = self.get_point() return f'lat{lat}lon{lon}' + class MaskBBoxOperation(BBoxOperationMixin, CdoOperation): operation = 'mask_bbox' @@ -79,31 +80,40 @@ def get_region(self): return f'lat{south}to{north}lon{west}to{east}' -class MaskCountryOperation(CountryOperationMixin, CdoOperation): +class MaskMaskOperation(MaskOperationMixin, CdoOperation): - operation = 'mask_country' + operation = 'mask_mask' def validate(self): - return self.validate_country() + errors = [] + errors += self.validate_var() or [] + errors += self.validate_mask() or [] + return errors def get_args(self): - country = self.get_country() - mask_path = str(Path(app.config['COUNTRYMASKS_FILE_PATH']).expanduser()) - return [f'-selname,m_{country:3.3}', mask_path] + var = self.get_var() + mask_path = self.get_mask_path() + return ['-ifthen', f'-selname,{var}', str(mask_path)] def get_region(self): - return self.get_country().lower() + mask_path = self.get_mask_path() + return mask_path.stem -class MaskMaskOperation(CdoOperation): +class MaskCountryOperation(CountryOperationMixin, CdoOperation): - operation = 'mask_mask' + operation = 'mask_country' def validate(self): - return self.validate_shape() + return self.validate_country() def get_args(self): - return [] + country = self.get_country() + mask_path = str(Path(app.config['COUNTRYMASKS_FILE_PATH']).expanduser()) + return [f'-selname,m_{country:3.3}', mask_path] + + def get_region(self): + return self.get_country().lower() class MaskLandonlyOperation(CdoOperation): @@ -121,9 +131,9 @@ def get_region(self): return 'landonly' -class FldmeanOperation(CdoOperation): +class ComputeMeanOperation(CdoOperation): - operation = 'fldmean' + operation = 'compute_mean' def validate(self): pass @@ -132,9 +142,9 @@ def get_args(self): return ['-fldmean'] -class OutputtabOperation(CdoOperation): +class OutputCsvOperation(CdoOperation): - operation = 'outputtab' + operation = 'output_csv' def validate(self): pass diff --git a/isimip_files_api/operations/create_mask.py b/isimip_files_api/operations/create_mask.py deleted file mode 100644 index 5b1d90d..0000000 --- a/isimip_files_api/operations/create_mask.py +++ /dev/null @@ -1,53 +0,0 @@ -import base64 -import binascii -import io -import json -import zipfile -from pathlib import Path - -from . import BaseOperation - - -class CreateMaskOperation(BaseOperation): - - command = 'create_mask' - operation = 'create_mask' - - def validate(self): - if 'shapefile' in self.config and 'geojson' in self.config: - return [f'shapefile and geojson and mutually exclusive for operation "{self.operation}"'] - - elif 'shapefile' in self.config: - try: - shapefile_stream = io.BytesIO(base64.b64decode(self.config['shapefile'])) - - try: - with zipfile.ZipFile(shapefile_stream) as z: - for file_name in z.namelist(): - if Path(file_name).suffix not in [ - '.shp', '.dbf', '.shx', '.prj', '.sbn', '.sbx', '.fbn', '.fbx', - '.ain', '.aih', '.ixs', '.mxs', '.atx', '.shp.xml', '.cpg', '.qix' - ]: - return [f'shapefile is not a valid shape file for operation "{self.operation}"'] - except zipfile.BadZipFile: - return [f'shapefile is a valid zip file for operation "{self.operation}"'] - - except binascii.Error: - return [f'shapefile is not a valid base64 stream for operation "{self.operation}"'] - - elif 'geojson' in self.config: - try: - json.loads(self.config['geojson']) - except json.decoder.JSONDecodeError: - return [f'geojson is not a valid json for operation "{self.operation}"'] - - else: - return [f'shapefile or geojson is missing for operation "{self.operation}"'] - - def get_args(self): - south, north, west, east = self.get_bbox() - return [f'-sellonlatbox,{west:f},{east:f},{south:f},{north:f}'] - - def get_region(self): - south, north, west, east = self.get_bbox() - return f'lat{south}to{north}lon{west}to{east}' diff --git a/isimip_files_api/operations/python/__init__.py b/isimip_files_api/operations/python/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/isimip_files_api/operations/python/create_mask.py b/isimip_files_api/operations/python/create_mask.py new file mode 100644 index 0000000..00c32bd --- /dev/null +++ b/isimip_files_api/operations/python/create_mask.py @@ -0,0 +1,29 @@ +from isimip_files_api.operations import BaseOperation, MaskOperationMixin + + +class CreateMaskOperation(MaskOperationMixin, BaseOperation): + + command = 'create_mask' + operation = 'create_mask' + + def validate(self): + errors = [] + errors += self.validate_shape() or [] + errors += self.validate_mask() or [] + return errors + + def validate_shape(self): + if 'shape' not in self.config: + return [f'shape is missing for operation "{self.operation}"'] + + def validate_uploads(self, uploads): + if 'shape' in self.config: + shape = self.config['shape'] + if not uploads.get(shape): + return [f'File "{shape}" for operation "{self.operation}" is not part of the uploads'] + + def get_args(self): + return [ + self.config.get('shape'), + self.config.get('mask') + ] diff --git a/isimip_files_api/responses.py b/isimip_files_api/responses.py index 2b767ce..b557118 100644 --- a/isimip_files_api/responses.py +++ b/isimip_files_api/responses.py @@ -5,16 +5,23 @@ def get_response(job, http_status): file_name = get_zip_file_name(job.id) + status = job.get_status() - return { + response = { 'id': job.id, 'job_url': app.config['BASE_URL'] + '/' + job.id, - 'file_name': file_name, - 'file_url': app.config['OUTPUT_URL'] + '/' + file_name, 'meta': job.meta, 'ttl': app.config['WORKER_RESULT_TTL'], - 'status': job.get_status(), - }, http_status + 'status': status + } + + if status == 'finished': + response.update({ + 'file_name': file_name, + 'file_url': app.config['OUTPUT_URL'] + '/' + file_name, + }) + + return response, http_status def get_errors_response(errors): diff --git a/isimip_files_api/tasks.py b/isimip_files_api/tasks.py index b9d2376..c46a9ef 100644 --- a/isimip_files_api/tasks.py +++ b/isimip_files_api/tasks.py @@ -1,14 +1,11 @@ -import shutil from pathlib import Path -from tempfile import mkdtemp -from zipfile import ZipFile from flask import current_app as app from rq import get_current_job from .operations import OperationRegistry -from .utils import get_zip_file_name +from .utils import get_input_path, get_job_path, get_zip_file, mask_paths, remove_job_path def run_task(paths, operations): @@ -18,18 +15,14 @@ def run_task(paths, operations): job.meta['total_files'] = len(paths) job.save_meta() - # create output paths - zip_path = Path(app.config['OUTPUT_PATH']).expanduser() / get_zip_file_name(job.id) - zip_path.parent.mkdir(parents=True, exist_ok=True) + # get the temporary directory + job_path = get_job_path(job.id) - # create a temporary directory - tmp_path = Path(mkdtemp(prefix=app.config['OUTPUT_PREFIX'])) - - # open zipfile - z = ZipFile(zip_path, 'w') + # create the output zip file + zip_file = get_zip_file(job.id) # open readme - readme_path = tmp_path / 'README.txt' + readme_path = job_path / 'README.txt' readme = readme_path.open('w') readme.write('The following commands were used to create the files in this container:\n\n') @@ -37,17 +30,17 @@ def run_task(paths, operations): command_list = OperationRegistry().get_command_list(operations) for path in paths: - input_path = output_path = output_region = None + input_path = get_input_path() / path + output_path = Path(input_path.name) + output_region = None for command in command_list: - if output_path is None: - input_path = Path(app.config['INPUT_PATH']).expanduser() / path - output_path = tmp_path / input_path.name - else: - input_path = output_path + if command.perform_once and path != paths[0]: + continue + # update region tag in output_path region = command.get_region() - if region is not None: + if region: if output_region is None: if app.config['GLOBAL_TAG'] in output_path.name: # replace the _global_ specifier @@ -61,24 +54,37 @@ def run_task(paths, operations): output_region = region output_path = output_path.with_name(output_name) + # update suffix in output_path suffix = command.get_suffix() - if suffix is not None: + if suffix: output_path = output_path.with_suffix(suffix) # execute the command and obtain the command_string - command_string = command.execute(input_path, output_path) + command_string = command.execute(job_path, input_path, output_path) # write the command_string into readme file - readme.write(command_string + '\n') - - if output_path.is_file(): - z.write(output_path, output_path.name) - print(output_path, output_path.name) - else: - error_path = Path(tmp_path).with_suffix('.txt') - error_path.write_text('Something went wrong with processing the input file.' - ' Probably it is not using a global grid.') - z.write(error_path, error_path.name) + readme.write(mask_paths(command_string) + '\n') + + # write the artefacts into the zipfile + if command.artefacts: + for artefact_path in command.artefacts: + if (job_path / artefact_path).is_file(): + zip_file.write(job_path / artefact_path, artefact_path.name) + + # write the outputs into the zipfile and set the new input path + if command.outputs: + for output_path in command.outputs: + # set the new input path to the output path + input_path = output_path + + if (job_path / output_path).is_file(): + # write the output into the zipfile + zip_file.write(job_path / output_path, output_path.name) + else: + error_path = output_path.with_suffix('.txt') + error_path.write_text('Something went wrong with processing the input file.' + ' Probably it is not using a global grid.') + zip_file.write(error_path, error_path.name) # update the current job and store progress job.meta['created_files'] += 1 @@ -86,13 +92,13 @@ def run_task(paths, operations): # close and write readme file readme.close() - z.write(readme_path, readme_path.name) + zip_file.write(readme_path, readme_path.name) # close zip file - z.close() + zip_file.close() # delete temporary directory - shutil.rmtree(tmp_path) + remove_job_path(job.id) # return True to indicate success return True diff --git a/isimip_files_api/tests/test_create_outputtab.py b/isimip_files_api/tests/test_create_compute_mean.py similarity index 88% rename from isimip_files_api/tests/test_create_outputtab.py rename to isimip_files_api/tests/test_create_compute_mean.py index e5253ec..6757c9d 100644 --- a/isimip_files_api/tests/test_create_outputtab.py +++ b/isimip_files_api/tests/test_create_compute_mean.py @@ -3,7 +3,7 @@ def test_success(client, mocker): response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { - 'operation': 'outputtab' + 'operation': 'compute_mean' } ]}) diff --git a/isimip_files_api/tests/test_create_create_mask.py b/isimip_files_api/tests/test_create_create_mask.py index e4693a7..a51df9c 100644 --- a/isimip_files_api/tests/test_create_create_mask.py +++ b/isimip_files_api/tests/test_create_create_mask.py @@ -1,103 +1,114 @@ -import base64 +import io +import json from pathlib import Path -shapefile_path = Path('testing/shapes/pm.zip') -wrong_path = Path('testing/shapes/wrong.zip') -geojson_path = Path('testing/shapes/pm.json') +shapefile_path = Path('testing') / 'shapes' / 'pm.zip' +wrong_path = Path('testing') / 'shapes' / 'wrong.zip' +geojson_path = Path('testing') / 'shapes' / 'pm.json' -def test_shapefile(client, mocker): +def test_shape(client, mocker): mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) - response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ - { - 'operation': 'create_mask', - 'shapefile': base64.b64encode(shapefile_path.read_bytes()).decode() - } - ]}) - - assert response.status_code == 201 - assert response.json.get('errors') is None - + data = { + 'paths': ['constant.nc'], + 'operations': [ + { + 'operation': 'create_mask', + 'shape': 'pm.zip', + 'mask': 'pm.nc' + } + ] + } -def test_geojson(client, mocker): - mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + response = client.post('/', data={ + 'data': (io.BytesIO(json.dumps(data).encode()), 'data', 'application/json'), + 'pm.zip': (shapefile_path.open('rb'), 'pm.zip', 'application/zip') + }) - response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ - { - 'operation': 'create_mask', - 'geojson': geojson_path.read_text() - } - ]}) - - assert response.status_code == 201 + assert response.status_code == 201, response.text assert response.json.get('errors') is None -def test_missing_file(client): +def test_missing_shape(client): response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { - 'operation': 'create_mask' + 'operation': 'create_mask', + 'mask': 'pm.nc' } ]}) assert response.status_code == 400 assert response.json.get('status') == 'error' assert response.json.get('errors') == { - 'operations': ['shapefile or geojson is missing for operation "create_mask"'] + 'operations': ['shape is missing for operation "create_mask"'] } -def test_invalid_shapefile(client): +def test_missing_mask(client): response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { 'operation': 'create_mask', - 'shapefile': 'wrong' + 'shape': 'pm.zip' } ]}) assert response.status_code == 400 assert response.json.get('status') == 'error' assert response.json.get('errors') == { - 'operations': ['shapefile is not a valid base64 stream for operation "create_mask"'] + 'operations': ['mask is missing for operation "create_mask"'] } -def test_invalid_shapefile2(client): +def test_invalid_mask1(client): response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { 'operation': 'create_mask', - 'shapefile': base64.b64encode(b'this is not a valid shapefile').decode() + 'shape': 'pm.zip', + 'mask': 'pm.nc ; wrong' } ]}) assert response.status_code == 400 assert response.json.get('status') == 'error' assert response.json.get('errors') == { - 'operations': ['shapefile is a valid zip file for operation "create_mask"'] + 'operations': ['only letters, numbers, hyphens, and periods are permitted in "mask"' + ' for operation "create_mask"'] } -def test_invalid_shapefile3(client): +def test_invalid_mask2(client): response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { 'operation': 'create_mask', - 'shapefile': base64.b64encode(wrong_path.read_bytes()).decode() + 'shape': 'pm.zip', + 'mask': '..pm.nc' } ]}) assert response.status_code == 400 assert response.json.get('status') == 'error' assert response.json.get('errors') == { - 'operations': ['shapefile is not a valid shape file for operation "create_mask"'] + 'operations': ['consecutive periods are not permitted in "mask" for operation "create_mask"'] } -def test_invalid_geojson(client): - response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ - { - 'operation': 'create_mask', - 'geojson': 'wrong' - } - ]}) +def test_missing_file(client, mocker): + mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + + data = { + 'paths': ['constant.nc'], + 'operations': [ + { + 'operation': 'create_mask', + 'shape': 'pm.zip', + 'mask': 'pm.nc' + } + ] + } + + response = client.post('/', data={ + 'data': (io.BytesIO(json.dumps(data).encode()), 'data', 'application/json') + }) + assert response.status_code == 400 assert response.json.get('status') == 'error' assert response.json.get('errors') == { - 'operations': ['geojson is not a valid json for operation "create_mask"'] + 'operations': ['File "pm.zip" for operation "create_mask" is not part of the uploads'] } diff --git a/isimip_files_api/tests/test_create_mask_mask.py b/isimip_files_api/tests/test_create_mask_mask.py new file mode 100644 index 0000000..52070a5 --- /dev/null +++ b/isimip_files_api/tests/test_create_mask_mask.py @@ -0,0 +1,73 @@ +def test_success(client, mocker): + mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) + + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_mask', + 'mask': 'pm.nc' + } + ]}) + + assert response.status_code == 201 + assert response.json.get('errors') is None + + +def test_missing_mask(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_mask' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['mask is missing for operation "mask_mask"'] + } + + +def test_invalid_mask1(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_mask', + 'shape': 'pm.zip', + 'mask': 'pm.nc ; wrong' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['only letters, numbers, hyphens, and periods are permitted in "mask"' + ' for operation "mask_mask"'] + } + + +def test_invalid_mask2(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_mask', + 'shape': 'pm.zip', + 'mask': '..pm.nc' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['consecutive periods are not permitted in "mask" for operation "mask_mask"'] + } + + +def test_invalid_var(client): + response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ + { + 'operation': 'mask_mask', + 'shape': 'pm.zip', + 'mask': 'pm.nc', + 'var': 'm_0 ; wrong' + } + ]}) + assert response.status_code == 400 + assert response.json.get('status') == 'error' + assert response.json.get('errors') == { + 'operations': ['only letters, numbers, underscores are permitted in "var"' + ' for operation "mask_mask"'] + } diff --git a/isimip_files_api/tests/test_create_fldmean.py b/isimip_files_api/tests/test_create_output_csv.py similarity index 89% rename from isimip_files_api/tests/test_create_fldmean.py rename to isimip_files_api/tests/test_create_output_csv.py index 23dd1f3..46cc16e 100644 --- a/isimip_files_api/tests/test_create_fldmean.py +++ b/isimip_files_api/tests/test_create_output_csv.py @@ -3,7 +3,7 @@ def test_success(client, mocker): response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ { - 'operation': 'fldmean' + 'operation': 'output_csv' } ]}) diff --git a/isimip_files_api/tests/test_root.py b/isimip_files_api/tests/test_root.py index 739953f..e9861cc 100644 --- a/isimip_files_api/tests/test_root.py +++ b/isimip_files_api/tests/test_root.py @@ -15,11 +15,11 @@ def test_success(client, mocker): 'select_country', 'select_point', 'mask_bbox', - 'mask_country', 'mask_mask', + 'mask_country', 'mask_landonly', - 'fldmean', - 'outputtab', + 'compute_mean', + 'output_csv', 'create_mask', 'cutout_bbox' ] diff --git a/isimip_files_api/utils.py b/isimip_files_api/utils.py index 7c3e681..4db9a9f 100644 --- a/isimip_files_api/utils.py +++ b/isimip_files_api/utils.py @@ -1,21 +1,92 @@ import hashlib import importlib +import json import re +import shutil from pathlib import Path +from zipfile import ZipFile from flask import current_app as app -def get_zip_file_name(job_id): - return Path(app.config['OUTPUT_PREFIX'] + job_id).with_suffix('.zip').as_posix() +def get_config_path(config_file): + config_path = Path(config_file) + if not config_path.is_absolute(): + config_path = Path().cwd() / config_path + return config_path + + +def handle_post_request(request): + data = {} + files = {} + if request.content_type.startswith('multipart/form-data'): + for file_name, file_storage in request.files.items(): + if file_name == 'data': + data = json.loads(file_storage.read()) + else: + files[file_name] = file_storage + else: + data = request.json -def get_hash(data): + return data, files + + +def get_hash(data, uploads): m = hashlib.sha1() m.update(str(data).encode()) + for file_name, file_storage in uploads.items(): + m.update(file_storage.read()) return m.hexdigest() +def get_input_path(): + return Path(app.config['INPUT_PATH']).expanduser().resolve() + + +def get_tmp_path(): + return Path(app.config['TMP_PATH']).expanduser().resolve() + + +def get_output_path(): + return Path(app.config['OUTPUT_PATH']).expanduser().resolve() + + +def get_job_path(job_id): + job_path = get_tmp_path().joinpath(app.config['OUTPUT_PREFIX'] + job_id) + job_path.mkdir(parents=True, exist_ok=True) + return job_path + + +def get_zip_file_name(job_id): + return Path(app.config['OUTPUT_PREFIX'] + job_id).with_suffix('.zip').as_posix() + + +def get_zip_path(job_id): + zip_path = get_output_path() / get_zip_file_name(job_id) + zip_path.parent.mkdir(parents=True, exist_ok=True) + return zip_path + + +def get_zip_file(job_id): + zip_path = get_zip_path(job_id) + return ZipFile(zip_path.as_posix(), 'w') + + +def store_uploads(job_id, uploads): + job_path = get_job_path(job_id) + + for file_name, file_storage in uploads.items(): + with open(job_path / file_name, 'wb') as fp: + file_storage.seek(0) + file_storage.save(fp) + + +def remove_job_path(job_id): + job_path = get_job_path(job_id) + shutil.rmtree(job_path) + + def mask_paths(string): return re.sub(r'\/\S+\/', '', string) diff --git a/isimip_files_api/validators.py b/isimip_files_api/validators.py index acef4bd..4628327 100644 --- a/isimip_files_api/validators.py +++ b/isimip_files_api/validators.py @@ -58,11 +58,11 @@ def validate_operations(data): errors['operations'].append('To many operations provided (max: {MAX_OPERATIONS}).'.format(**app.config)) else: operation_registry = OperationRegistry() - for index, operation_config in enumerate(data['operations']): - if 'operation' in operation_config: - operation = operation_registry.get(operation_config) + for index, config in enumerate(data['operations']): + if 'operation' in config: + operation = operation_registry.get(config) if operation is None: - errors['operations'].append('operation "{operation}" was not found'.format(**operation_config)) + errors['operations'].append('operation "{operation}" was not found'.format(**config)) else: operation_errors = operation.validate() if operation_errors: @@ -76,3 +76,17 @@ def validate_operations(data): )) return errors + + +def validate_uploads(data, uploads): + errors = defaultdict(list) + + operation_registry = OperationRegistry() + for index, config in enumerate(data['operations']): + if 'operation' in config: + operation = operation_registry.get(config) + operation_errors = operation.validate_uploads(uploads) + if operation_errors: + errors['operations'] += operation_errors + + return errors diff --git a/pyproject.toml b/pyproject.toml index fc04589..af6b88c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,7 @@ [build-system] -requires = ["setuptools", "setuptools-scm"] +requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" -__author__ = 'Jochen Klar' -__email__ = 'jochen.klar@pik-potsdam.de' -__license__ = 'MIT' -__copyright__ = 'Copyright (c) 2020 Potsdam Institute for Climate Impact Research' - - [project] name = "isimip-files-api" authors = [ @@ -40,7 +34,7 @@ dependencies = [ "numpy~=1.26.3", "python-dotenv~=1.0.0", "tomli", - "rioxarray~=0.15.1", + "rioxarray~=0.15.0", "rq~=1.15.1", ] dynamic = ["version"] @@ -62,8 +56,12 @@ dev = [ "twine" ] -[tool.setuptools] -packages = ["isimip_files_api"] +[tool.setuptools.packages.find] +include = ["isimip_files_api*"] +exclude = ["*tests*"] + +[tool.setuptools.package-data] +"*" = ["*"] [tool.setuptools.dynamic] version = { attr = "isimip_files_api.__version__" } diff --git a/testing/shapes/pm.nc b/testing/shapes/pm.nc new file mode 100644 index 0000000000000000000000000000000000000000..2a90ca73d3b5784e77b2750c875337c1bb152de6 GIT binary patch literal 29451 zcmeI*eQ+G*eZcWYI%9<2SjYGUjKf;k_-xDe>BY7T7@w@Sld+|>BpVT9?u2x<4)n4k zorws|jMHX{LX+k?4B*bRnVXiRnrU8+fi@~B;nI>uE$ML6=>&D!B%9KqrUT*9@gJHt zEq$JSzRyV~jIk+cNx1b`pZo3eJiE_6ySI1y?6cD6B26vJm)x+VyrM#w>20Q^V|q@A zgg3w1_vMaAQ)l_IXUi-7d4a1)GDQ};q+FD#ES9Byp7MVR5mrHAT79EfB!fViSXkP` zy7c4M=ps={E?%Qvlr1VJ4h;4tkD9Gm!OVF)@zRZd;8%b7E#+m`sf6;fva%e-je+Y_ zjaS+SgkDrw)GZZtZwG|9YK2M(Y*X{YpZ}Bb4c}PZRPiyjoIu%yl}aJb`gIb*Y||34 zQI=HGGx!fEzT`KIPAvWWP`1EdyuCfs(iw65L!n%7^s1_wBl9hGqB=hONgc^*GU`Yy z!S_NIE0olW4$6b>+CuTS|M-jd9qf)Y$r{PyRvmQx!K0K|QdmpPg{BAmT-T^v%XO~B zdfjumf_FagdP|pFmM~jkj!2`a?IPibaS@cEPPxE4aXlG~?Yn;Z+XuT3x5(PvrgF*> zYORMSZA-kQ^2&9o%3gT%rcL5el|zL9|Ke$)iC0~f`dL1wR>l3$&YgLXa`Noq> zkMHUfzpiq8T)uB5(ck*aqav+l$ReusJgl ziTIdUC6ji_|LR2eV0?5eF*3ZrrfN8z?CU?Zzi#KgowYmq;}6CMN8F+KaI$Kk|J3gN z)jO+ecGg6{_RVj$bagcg^NCd^PyQ>_azy(6+uW8&)4{@57By3BTa)` z<~uCsl8?4HGn`23UAc^9^~X<-#^Ynrc<)%U&>Z0Ig;*;C_3_%DkcWXfNJNE7FC20g+460CaW*X?vzMli zP7C;_hU@sETb3^qmOA3Z-o86NuTSivtZb8-Q!d{&=7er8ofgJ{W$GlL&JX%%st{LQ ztzHJUtN3h%Iy^<7Jn*dl%};+osAZ{CbJ|}aUJj{jLiAQ^AHk(-RL1ghb%m+^sdDO{ ze{>($d4CZP z<+*>*3USsPMN7p)_xagGYn}Fs998DaLx8>xy)>EU>pI^r)6X4Wc=hs+`|(p*f7=GI z2P0UIL0pFwI2+FjHJ!&9oW?1f#B(@-<2Zml7{PiB;ySFr*?!iKGdPV?IEm+Q0>^Ox zdoY6a7{qm0fwO(AA7^kHr*IO_;RKH30QO)6>oJJyumWfAWBoXT(>R5bcn&9U90#xm zBUq0?T!$4n+spcK2B&cfC-EFk;5ZIo4@R&agSZYWa5l#JaR#Sx3McU#PT)8WU=K#H z9)q|JD=;6`>(5~ZQ|O|DA+#`mFXb_VDRj}n5L%c&NqNj*3SD$Cgcj!Sp*&_Vg)TZ6 zLJRXJD32LTp^FZN(8ByDD32LTp^FZN(8B!Pl*bIF&_xGBXkosG@|eLCy69jCEzI9V zdCXu6U34&n7Uqvr9y6Fi7aa_th52KY#|);>MF&G@VZNL4n86gf=wJvf%y&^9Gnhgb z9Sos``A*7X22<#wgCVpqf0Xi=!4$gaUD~F|;vzK&OW>dj~OQ_Y-6Gc4Exl zMvU28i7~s67_;@nnB7Z^**(OV-A#i6E zxD|uA1#iL4xCw8@O5BLHR@{Uea6Mj+YjHKM!j-rJm*8S7$Dh9-{OJOo$DHwiPCt(q z@aI?_*6E9J39i7ExC&R}TD%_D;|9D*d)CH{Scx~|Cftm-;1&$xR@{c$u?lzKPTZvp zhqPG?R}*8nh8V-O#2Bt4#_(=p4DTVv@LpmJ*AruSA2Eh+CC2b=#2CJv7{mLCF?6k_j?K?@wGDUgFEK>u!UHQBWR}wO0kyyfIz)(_FXkUj zqG%t_KZ?Zson62EPapY!HmS6jD`)|Kkck=1bz-jETv1xRC7~Lrj*lBs0Whcw{s=@a|(w?*6|RV_{Hc12fE4urcpBFe4e#%aUKD{K>fE3xD(|U&T@?VS_%`EZ?|(iTIkDQK`?`f=g!x&O>%?-sKv zN4@Zesb6?u^SJ0yGh};QEqQM2%`b{}Rb+>dgAIjv>7QTxbFqaSi{*@6&&0ni9#%Q- z(SzFi9{Ziw^+4lhIa5w*Jh$*a?&Vt4N%7Txx$u_wj#`GS={1sV-);N%Z#@?Ks(e|R zWDBT)IcD&UX^?;%!1xxyV!8k1PG4jW^QB|1{EX=xjE=@neZ|t5+`8p|v+yWC*{9Ry z-nZX=W$bZ1k5cr*I2bbhGHCjFOutWtexDS&=wJvfH2qAb-zh`CQwm*lFoYJGelFAR zm7(7&g)TZ6LJLhlo9TDU(C?N)7aa_tg{Gg+^!sJ#_e-IR4u;S|)6Z!79W(ShrqD$P zLujGt=QRDEv5v66<7_njq^93AX8KLh^s}0N*O=*dMbl4f`fX#T-xf_juj%)VnSNi> z4}3`1$MhS=h;20e%%zL`cHvQQ2bDMtenCbUM(@$>t&0{SrA5A~I z>35Gcla8jJ-t^nYnn*{}&u{wuV~wPvcTmS^jG>L@K7+aM5Npu+Z8Y~O%zcYkm~_k@ z&~f3u#~s9&-A|0!+leuI8!={YCC2PNV$9YPV|FhwX7>xeO1ON`kXV$4<( zW8pqlNY6jJ3wPoUtitWM4Yy(tx8NLkr45I)h5O#s#2Bt2#&9h$hU4(lL!5+O4Ex8a=clq+=RAwA)C>G%hBl}bkM~VW-y0&w2o399dt2;8O&iGtxn3L zgD$2pgE`Ej)kS%9(8UyHFo$`xx+#wix|qTY<}i=eG0LNZE~YSpIn1MVobu?PdptY8 z|Fl`?>4ai8&j+Mbut8tW6CIsK&H+(UEoAK zvYWtv@=ZR8w@NPPD*0b6xT(1=B&$`S6Nk?v$D$X{u|%(PF#h0Rv~Q$8{+`bi2ISHU z&)fA4Cr5QLwQ{v$z4Af^^~_#WcA*aYwfh(uoRQ>MfA7$aBZ*`(J~-4n`oLTPRn@|i zewQjhYYmN@8BRu%54rDM^+i0NN@;?2KY zr=A{7^hbw!T{kg&+Ha|)wA8$7e$_2(sYegz9^3REJ-l9KwdDV`@_&UYCLb+SRaTQx z9b?b7`m_9b^J7)b(o|}JQVU#03$!+N9QIGx>K!L1bMjViJT;kLf@9uz3+J0Ro0`$Cd2Qik=|!mpN-a=ofl>>UTAi8|Vk&e07g{212MfO~(a#zf(Io;8XwBD6;~Rh9vstHH z$(k;JYQ`@8bVc;l&EFMkR1+*J^Pj&s`4jK2#m#D4_vt4i{_?q3exM(w;C|wdwtQ-) zqp4w?%u<@(Ukl71sAcZb&fl{|J5kI(Oo|SXqFq~bBoyuAqNAv2-xlrp$)7wo{r)zm zRI1bhr55<*ZGrK7-<+;qE}yOoq`&{rm*4uvXBW4tv29m8ts9kNOMexns)s)NmY3_1?POwKe-{c31D)S65qG)o`pk($U%0efNpV zW~ZyQvZ``lbxlpBTwp^_o2;369K5osy`!@`QkZT25(+79UE;VZry0j7I%G zMRKt}jd!tO;j$gWU+L~?Sg@dk{LK?rwxHaTH(vMR0kxo2`VYD!hSlG^iAPV#G2O}d zuo|v>xs{q2NMcq;P$5o<3aXEN8(K~$E6_HEzCpBu{|ClLW&RiX?k=gu@8}r#v zC~-PIa(cAa9Z2*=)iL_6wO*rFFfcNj_|(X7Qa-#b|3J){p}7J#u!_4zMhE-t*0>yj spG@@GT@Q^V<3qOkFbjx}>4nT6BR;nPGeTbO58W<>7SbE}%klGn0B{RaaR2}S literal 0 HcmV?d00001 From 9e89ecb872c9e233d8b9c33793ff80b9e92de8cf Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Mon, 4 Mar 2024 18:43:59 +0100 Subject: [PATCH 17/22] Set FLASK_CONFIG in Makefile --- Makefile | 1 + isimip_files_api/app.py | 6 ++++-- isimip_files_api/utils.py | 11 +++++++---- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 9ce4544..f4dad06 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ export FLASK_APP=isimip_files_api.app export FLASK_ENV=development export FLASK_DEBUG=true +export FLASK_CONFIG=config.toml export RQ_WORKER_CLASS=isimip_files_api.worker.Worker server: diff --git a/isimip_files_api/app.py b/isimip_files_api/app.py index 99439f5..341dc68 100644 --- a/isimip_files_api/app.py +++ b/isimip_files_api/app.py @@ -17,8 +17,10 @@ def create_app(): app = Flask(__name__) app.config.from_object('isimip_files_api.config') app.config.from_prefixed_env() - if 'CONFIG' in app.config: - app.config.from_file(get_config_path(app.config['CONFIG']), load=tomli.load, text=False) + + config_path = get_config_path(app.config.get('CONFIG')) + if config_path: + app.config.from_file(get_config_path(config_path), load=tomli.load, text=False) # configure logging configure_logging(app) diff --git a/isimip_files_api/utils.py b/isimip_files_api/utils.py index 4db9a9f..f016e4a 100644 --- a/isimip_files_api/utils.py +++ b/isimip_files_api/utils.py @@ -10,10 +10,13 @@ def get_config_path(config_file): - config_path = Path(config_file) - if not config_path.is_absolute(): - config_path = Path().cwd() / config_path - return config_path + if config_file is not None: + config_path = Path(config_file) + if not config_path.is_absolute(): + config_path = Path().cwd() / config_path + + if config_path.is_file(): + return config_path def handle_post_request(request): From 76b90fec968e0f6bfdc7382c30d9306b2d7cffaf Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Mon, 4 Mar 2024 18:44:39 +0100 Subject: [PATCH 18/22] Update version, README and add docs --- README.md | 282 +++++++---------------------------- docs/operations.md | 270 +++++++++++++++++++++++++++++++++ docs/setup.md | 181 ++++++++++++++++++++++ isimip_files_api/__init__.py | 2 +- 4 files changed, 505 insertions(+), 230 deletions(-) create mode 100644 docs/operations.md create mode 100644 docs/setup.md diff --git a/README.md b/README.md index 8b6e65d..92a2563 100644 --- a/README.md +++ b/README.md @@ -1,77 +1,55 @@ isimip-files-api ================ -A webservice to asynchronously mask regions from NetCDF files, using [Flask](https://palletsprojects.com/p/flask/) and [RQ](https://python-rq.org/). +A webservice to asynchronously perform operations on NetCDF files, using [Flask](https://palletsprojects.com/p/flask/) and [RQ](https://python-rq.org/). -Setup ------ - -The service needs [redis](https://redis.io/) to be set up and configured properly. With redit it is especially important to [guard it agains remote access](https://redis.io/topics/security). +The service is deployed on https://files.isimip.org/api/v2 as part of the [ISIMIP Repository](https://data.isimip.org). The previous version of the API is available at https://files.isimip.org/api/v1. -The python dependencies can be installed (in a virtual environment) using: -``` -pip install -r requirements.txt -``` +Setup +----- -The the `.env` file can be created from `.env.sample` and adjusted to the particular environment. +The API makes no assumptions about the files other than that they are globally gridded NetCDF files. In particular, no ISIMIP internal conventions are used. It can therefore be reused by other archives. Setup and deployment are described in [docs/install.md](docs/setup.md). Usage ----- -Once the application is setup, the development server can be started using: - -``` -flask run -``` - -The worker for the asynchronous jobs need to be started in a different terminal session using: - -``` -rq worker -``` +The service is integrated into the [ISIMIP Repository](https://data.isimip.org) and is available from the search interface and the dataset and file pages through the "Configure downloads" link. This functionality currently uses version 1.1.1 of the API. -Asynchronous jobs are created using a HTTP `POST` request to the root api entpoint. To mask everything but a bounding box in lat/lon use: +For programmatic access, the API can be used with standard HTTP libraries (e.g. [requests](https://requests.readthedocs.io) for Python). While the following examples use the ISIMIP Repository, Python and `requests`, they should be transferable to other servers, languages or libraries. -``` -POST / -{ - "path": "path/to/file.nc", - "task": "mask_bbox", - "bbox": [south, north, west, east] -} -``` +The API is used by sending HTTP POST request to its root endpoint. The request needs to use the content type `application/json` and contain a single JSON object with a list of `paths` and a list of `operations`. While the `paths` can be obtained from the [ISIMIP Repository](https://data.isimip.org) (they usually start with `ISIMIP3`), the operations are described in [docs/operations.md](docs/operations.md). -where `south`, `north`, `west`, `east` are floats and `path` is the path to the file on the server relative to `INPUT_PATH` given in `.env`. For a country use: +Using Python and `requests`, requests can be performed like this: -``` -POST / -{ - "path": "path/to/file.nc", - "task": "mask_country", - "country": "deu" -} -``` +```python +import requests -for, e. g. Germany. To mask out all sea and antarctica data use: +response = requests.post('https://files.isimip.org/api/v2', json={ + 'paths': [ + 'ISIMIP3b/InputData/climate/.../gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc', + 'ISIMIP3b/InputData/climate/.../gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2021_2030.nc', + 'ISIMIP3b/InputData/climate/.../gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2031_2031.nc', + ... + ], + 'operations': [ + { + 'operation': 'select_point', + 'bbox': [52.380551, 13.064332] + } + ] +}) -``` -POST / -{ - "path": "path/to/file.nc", - "task": "mask_landonly" -} +result = response.json() ``` -The response is a JSON like this: +The response is a JSON object: -``` +```json { - "file_name": "isimip-download-1eff769a7edd0a8076f11dc85609f0090562a671.zip", - "file_url": "https://files.isimip.org/api/v1/output/isimip-download-1eff769a7edd0a8076f11dc85609f0090562a671.zip", "id": "5741ca0e7f824d37ef23e107f5e5261a31e974a6", - "job_url": "http://127.0.0.1:5000/5741ca0e7f824d37ef23e107f5e5261a31e974a6", + "job_url": "https://files.isimip.org/api/v2/5741ca0e7f824d37ef23e107f5e5261a31e974a6", "meta": {}, "status": "queued", "ttl": 604800 @@ -80,188 +58,34 @@ The response is a JSON like this: Performing the initial request again, or performing a `GET` on the url given in `job_url`, will give an update on the job status, e.g. -``` +```json { - "file_name": "isimip-download-1eff769a7edd0a8076f11dc85609f0090562a671.zip", - "file_url": "https://files.isimip.org/api/v1/output/isimip-download-1eff769a7edd0a8076f11dc85609f0090562a671.zip", - "id": "5741ca0e7f824d37ef23e107f5e5261a31e974a6", - "job_url": "http://127.0.0.1:5000/5741ca0e7f824d37ef23e107f5e5261a31e974a6", - "meta": {"created_files": 1, "total_files": 1}, - "status": "finished", - "ttl": 604800 + "id": "5741ca0e7f824d37ef23e107f5e5261a31e974a6", + "job_url": "https://files.isimip.org/api/v2/5741ca0e7f824d37ef23e107f5e5261a31e974a6", + "meta": { + "created_files": 0, + "total_files": 1 + }, + "status": "started", + "ttl": 604800 } ``` -When the job is finished, the resulting file is located at `file_name` relative to the path given in `OUTPUT_PATH` in `.env`. When `OUTPUT_PATH` is made public via a web server (e.g. NGINX, see below for deployment), the file can be downloaded under the URL given by `file_url`. - -The following exaples can be used from the command line with [httpie](https://httpie.org/) or [curl](https://curl.haxx.se/): - -``` -http :5000 path=path/to/file.nc bbox=:"[0, 10, 0, 10]" -http :5000 path=path/to/file.nc country=deu -http :5000 path=path/to/file.nc landonly:=true - -curl 127.0.0.1:5000 -H "Content-Type: application/json" -d '{"path": "path/to/file.nc", "task": "mask_bbox","bbox": [south, north, west, east]}' -curl 127.0.0.1:5000 -H "Content-Type: application/json" -d '{"path": "path/to/file.nc", "task": "mask_country", "country": "deu"}' -curl 127.0.0.1:5000 -H "Content-Type: application/json" -d '{"path": "path/to/file.nc", "task": "mask_landonly"}' -``` - -Deployment ----------- - -When deploying to the internet, a setup of [NGINX](https://www.nginx.com/), (gunicorn)[https://gunicorn.org/], and [systemd](https://www.freedesktop.org/wiki/Software/systemd/) services is recommended, but other services can be used as well. We further assume that a user `isimip` with the group `isimip` and the home `/home/isimip` exists, and that the repository is cloned at `/home/isimip/api`. - -After following the steps under **Setup** (as the `isimip` user), add the folowing to `.env`: - -``` -# gunicorn configuration -GUNICORN_BIN=/home/isimip/api/env/bin/gunicorn -GUNICORN_WORKER=3 -GUNICORN_PORT=9002 -GUNICORN_TIMEOUT=120 -GUNICORN_PID_FILE=/run/gunicorn/api/pid -GUNICORN_ACCESS_LOG_FILE=/var/log/gunicorn/api/access.log -GUNICORN_ERROR_LOG_FILE=/var/log/gunicorn/api/error.log -``` - -Then, as `root`, create a file `/etc/tmpfiles.d/isimip-api.conf` with the following content: - -``` -d /var/log/isimip-api 750 isimip isimip -d /var/log/gunicorn/api 750 isimip isimip -d /run/gunicorn/api 750 isimip isimip -``` - -Create temporary directories using: - -``` -systemd-tmpfiles --create -``` - -In order to run the api service with systemd three scripts need to be added to `/etc/systemd/system` - -``` -# in /etc/systemd/system/isimip-files-api.service - -[Unit] -Description=pseudo-service to start/stop all isimip-files-api services - -[Service] -Type=oneshot -ExecStart=/bin/true -RemainAfterExit=yes - -[Install] -WantedBy=network.target -``` - -``` -# in /etc/systemd/system/isimip-files-api-app.service - -[Unit] -Description=isimip-api gunicorn daemon -PartOf=isimip-files-api.service -After=isimip-files-api.service - -[Service] -User=isimip -Group=isimip - -WorkingDirectory=/home/isimip/api -EnvironmentFile=/home/isimip/api/.env - -ExecStart=/bin/sh -c '${GUNICORN_BIN} \ - --workers ${GUNICORN_WORKER} \ - --pid ${GUNICORN_PID_FILE} \ - --bind localhost:${GUNICORN_PORT} \ - --timeout ${GUNICORN_TIMEOUT} \ - --access-logfile ${GUNICORN_ACCESS_LOG_FILE} \ - --error-logfile ${GUNICORN_ERROR_LOG_FILE} \ - "isimip_files_api:app:create_app()"' - -ExecReload=/bin/sh -c '/usr/bin/pkill -HUP -F ${GUNICORN_PID_FILE}' - -ExecStop=/bin/sh -c '/usr/bin/pkill -TERM -F ${GUNICORN_PID_FILE}' - -[Install] -WantedBy=isimip-api.target -``` - -``` -# in /etc/systemd/system/isimip-files-api-worker.service - -[Unit] -Description=RQ worker for isimip-api -PartOf=isimip-files-api.service -After=isimip-files-api.service - -[Service] -User=isimip -Group=isimip - -WorkingDirectory=/home/isimip/api -Environment=LANG=en_US.UTF-8 -Environment=LC_ALL=en_US.UTF-8 -Environment=LC_LANG=en_US.UTF-8 - -ExecStart=/home/isimip/api/env/bin/rq worker -w 'isimip_files_api.worker.LogWorker' - -ExecReload=/bin/kill -s HUP $MAINPID +When the job is completed on the server the status becomes `finished` and the JSON contains a `file_name` and a `file_url`. -ExecStop=/bin/kill -s TERM $MAINPID - -PrivateTmp=true -Restart=always - -[Install] -WantedBy=isimip-api.target -``` - -Reload `systemd`, start and enable the service: - -``` -systemctl daemon-reload -systemctl start isimip-api-app -systemctl start isimip-api-worker - -systemctl enable isimip-api-app -systemctl enable isimip-api-worker -systemctl enable isimip-api -``` - -From now on, the services can be controlled using: - -``` -systemctl start isimip-api -systemctl stop isimip-api -systemctl restart isimip-api -``` - -If the services won't start: `journalctl -xf` might give a clue why. - -Lastly, add - -``` - location /api/v1 { - proxy_pass http://127.0.0.1:9002/; - proxy_redirect off; - - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /api/public { - alias /data/api/public; - } +```json +{ + "file_name": "download-5741ca0e7f824d37ef23e107f5e5261a31e974a6.zip", + "file_url": "https://files.isimip.org/api/v2/output/download-5741ca0e7f824d37ef23e107f5e5261a31e974a6.zip", + "id": "5741ca0e7f824d37ef23e107f5e5261a31e974a6", + "job_url": "https://files.isimip.org/api/v2/5741ca0e7f824d37ef23e107f5e5261a31e974a6", + "meta": { + "created_files": 1, + "total_files": 1 + }, + "status": "finished", + "ttl": 604800 +} ``` -to your NGINX virtual host configuration. The service should then be available at https://yourdomain/api/v1/. - -The created files can be automatically deleted using the included `isimip-files-api-clean` script. To do so, add the following to the crontab of the `isimip` user (by using `crontab -e`): - -``` -# clean files everyday at 5 a.m. -0 5 * * * cd /home/isimip/api; /home/isimip/api/env/bin/isimip-files-api-clean -``` +The file can be downloaded under the URL given by `file_url` (if the output directory of the API is made public via a web server). diff --git a/docs/operations.md b/docs/operations.md new file mode 100644 index 0000000..8257939 --- /dev/null +++ b/docs/operations.md @@ -0,0 +1,270 @@ +Operations +========== + +The following operations are available. Please note that some of the operations can be chained, e.g. + +``` +create_mask -> mask_mask -> compute_mean -> output_csv +``` + +Pleas also note the examples given in the [examples](../examples) directory. + +### Select point + +A time series of a point can be selected using: + +```python +response = requests.post('https://files.isimip.org/api/v2', json={ + 'paths': [...], + 'operations': [ + { + 'operation': 'select_point', + 'bbox': [52.380551, 13.064332] + } + ] +}) +``` + +The operation is performed using [CDO](https://code.mpimet.mpg.de/projects/cdo) using: + +```bash +cdo -f nc4c -z zip_5 -L -sellonlatbox,WEST,EAST,SOUTH,NORTH IFILE OFILE +``` + +### Select bounding box + +A rectangular bounding box can be selected using: + +```python +response = requests.post('https://files.isimip.org/api/v2', json={ + 'paths': [...], + 'operations': [ + { + 'operation': 'select_bbox', + 'bbox': [-23.43651, 23.43651, -180, 180] + } + ] +}) +``` + +The operation is performed using [CDO](https://code.mpimet.mpg.de/projects/cdo) using: + +```bash +cdo -f nc4c -z zip_5 -L -selindexbox,IX,IX,IY,IY IFILE OFILE +``` + +where `IX` and `IY` are the grid indexes of the point computed from the file. + +### Mask bounding box + +A rectangular bounding box can be masked (everything outside is set to `missing_value`) using: + +```python +response = requests.post('https://files.isimip.org/api/v2', json={ + 'paths': [...], + 'operations': [ + { + 'operation': 'mask_bbox', + 'bbox': [-23.43651, 23.43651, -180, 180] + } + ] +}) +``` + +The operation is performed using [CDO](https://code.mpimet.mpg.de/projects/cdo) using: + +```bash +cdo -f nc4c -z zip_5 -L -sellonlatbox,WEST,EAST,SOUTH,NORTH IFILE OFILE +``` + +### Mask country + +A country can be masked (everything outside is set to `missing_value`) using: + +```python +response = requests.post('https://files.isimip.org/api/v2', json={ + 'paths': [...], + 'operations': [ + { + 'operation': 'mask_country', + 'country': "bra" # e.g. Brasil + } + ] +}) +``` + +The operation is performed using [CDO](https://code.mpimet.mpg.de/projects/cdo) using: + +```bash +cdo -f nc4c -z zip_5 -L -ifthen -selname,m_BRA COUNTRYMASK IFILE OFILE +``` + +### Mask land only + +The landmass (without antarctica) can be masked (everything outside is set to `missing_value`) using: + +```python +response = requests.post('https://files.isimip.org/api/v2', json={ + 'paths': [...], + 'operations': [ + { + 'operation': 'mask_landonly' + } + ] +}) +``` + +The operation is performed using [CDO](https://code.mpimet.mpg.de/projects/cdo) using: + +```bash +cdo -f nc4c -z zip_5 -L -ifthen LANDSEAMASK IFILE OFILE +``` + +### Mask using a NetCDF mask + +In order to mask using a custom NetCDF file, the file needs to be uploaded together with the JSON. This is done using the content type `multipart/form-data`. Using `requests` this is done slightly different as before: + +```python +import json +from pathlib import Path + +import requests + +mask_path = Path('path/to/mask.nc') + +data = { + 'paths': [...], + 'operations': [ + { + 'operation': 'mask_mask', + 'mask': 'mask.nc', + 'var': 'm_VAR' # the mask variable in the NetCDF file + } + ] +} + +response = requests.post(url, files={ + 'data': json.dumps(data), + 'mask.nc': mask_path.read_bytes(), +}) +``` + +The operation is performed using [CDO](https://code.mpimet.mpg.de/projects/cdo) using: + +```bash +cdo -f nc4c -z zip_5 -L -ifthen -selname,m_VAR mask.nc IFILE OFILE +``` + +### Compute mean + +After one of the [CDO](https://code.mpimet.mpg.de/projects/cdo) based operations (e.g. `mask_country`) a field mean can be computed using: + +```python +response = requests.post('https://files.isimip.org/api/v2', json={ + 'paths': [...], + 'operations': [ + { + 'operation': 'mask_country', + 'country': "bra" # e.g. Brasil + }, + { + 'operation': 'compute_mean' + } + ] +}) +``` + +The operation is performed using [CDO](https://code.mpimet.mpg.de/projects/cdo) using: + +```bash +cdo -f nc4c -z zip_5 -L -fldmean -ifthen -selname,m_BRA COUNTRYMASK IFILE OFILE +``` + +### Output CSV + +After one of the other [CDO](https://code.mpimet.mpg.de/projects/cdo) based operations (e.g. `mask_country` and `compute_mean`) the output can be converted to [CSV](https://en.wikipedia.org/wiki/Comma-separated_values): + +```python +response = requests.post('https://files.isimip.org/api/v2', json={ + 'paths': [...], + 'operations': [ + { + 'operation': 'mask_country', + 'country': "bra" # e.g. Brasil + }, + { + 'operation': 'compute_mean' + }, + { + 'operation': 'output_csv' + } + ] +}) +``` + +The operation is performed using [CDO](https://code.mpimet.mpg.de/projects/cdo) using: + +```bash +cdo -s outputtab,date,value,nohead -fldmean -ifthen -selname,m_BRA COUNTRYMASK IFILE OFILE +``` + +Afterwards the TAB seperated CDO output is converted to CSV. + +Full examples are is given in [examples/time_series_bbox.py](../time_series_bbox.py) and [examples/time_series_country.py](../time_series_country.py). + +### Cutout bounding box + +Instead of using [CDO](https://code.mpimet.mpg.de/projects/cdo) to select a bounding box, the cut-out can also be performed using [ncks](https://nco.sourceforge.net/nco.html). This operation has a much better performance when applied to the high resolution data from [CHELSA-W5E5 v1.0: W5E5 v1.0 downscaled with CHELSA v2.0](https://doi.org/10.48364/ISIMIP.836809.3). + +```python +response = requests.post('https://files.isimip.org/api/v2', json={ + 'paths': [...], + 'operations': [ + { + 'operation': 'cutout_bbox', + 'bbox': [47.5520, 47.6680, 12.8719, 13.1393] + } + ] +}) +``` + +The operation is performed using [ncks](https://nco.sourceforge.net/nco.html) using: + +```bash +ncks -h -d lat,SOUTH,NORTH -d WEST,EAST IFILE OFILE +``` + +### Create mask + +Mask can be created from vector based input file, namely [Shapefiles](https://en.wikipedia.org/wiki/Shapefile) or [GeoJSON files](https://en.wikipedia.org/wiki/GeoJSON). This operation is performed once and the resulting mask can then be used in the `mask_mask` operation. As for the `mask_mask` operation, the input file needs to be uploaded together with the JSON. This is done using the content type `multipart/form-data`. For a shapefile, the different files need to be in one zip file. Using `requests` this is done like this: + +```python +import json +from pathlib import Path + +import requests + +shape_path = Path('path/to/shape.zip') + +data = { + 'paths': [...], + 'operations': [ + { + 'operation': 'create_mask', + 'shape': 'shape.zip', + 'mask': 'shape.nc' + }, + { + 'operation': 'mask_mask', + 'mask': 'shape.nc' + } + ] +} + +response = requests.post(url, files={ + 'data': json.dumps(data), + 'shape.zip': shape_path.read_bytes(), +}) +``` + +Full examples are is given in [examples/time_series_shapefile.py](../time_series_shapefile.py) and [examples/time_series_geojson.py](../time_series_geojson.py). diff --git a/docs/setup.md b/docs/setup.md new file mode 100644 index 0000000..5cd7d27 --- /dev/null +++ b/docs/setup.md @@ -0,0 +1,181 @@ +# Setup + +The service needs [redis](https://redis.io/) to be set up and configured properly. With redit it is especially important to [guard it agains remote access](https://redis.io/topics/security). + +The Package and its dependencies can be installed (in a virtual environment) using: + +``` +pip install -e . +``` + +The service can be configured using a `config.toml` located at the root of the repository. Please refer to [isimip_files_api/config.py](../isimip_files_api/config.py) for the different settings and their default values. + +## Usage + +Once the application is setup, the development setup can be controlled using the provided `Makefile`, which some enviroment variables and wraps `flask run` +and `rq worker`. + +The development server can be started using: + +``` +make server +``` + +The worker for the asynchronous jobs need to be started in a different terminal using: + +``` +make worker +``` + +The API is then available at http://127.0.0.1:5000. + +## Deployment + +When deploying to the internet, a setup of [NGINX](https://www.nginx.com/), (gunicorn)[https://gunicorn.org/], and [systemd](https://www.freedesktop.org/wiki/Software/systemd/) services is recommended, but other services can be used as well. We further assume that a user `isimip` with the group `isimip` and the home `/home/isimip` exists, and that the repository is cloned at `/home/isimip/api`. + +Then, as `root`, create a file `/etc/tmpfiles.d/api.conf` with the following content: + +``` +d /var/log/gunicorn/api 750 isimip isimip +d /var/log/flask/api 750 isimip isimip +d /run/gunicorn/api 750 isimip isimip +``` + +Create temporary directories using: + +``` +systemd-tmpfiles --create +``` + +In order to run the api service with systemd three scripts need to be added to `/etc/systemd/system` + +``` +# in /etc/systemd/system/api.service + +[Unit] +Description=isimip-files-api v2 gunicorn daemon +After=network.target + +[Service] +User=isimip +Group=isimip + +WorkingDirectory=/srv/isimip/api + +Environment=FLASK_APP=isimip_files_api.app +Environment=FLASK_ENV=production +Environment=FLASK_CONFIG=config.toml +Environment=FLASK_REDIS_URL=redis://localhost:6379 + +Environment=GUNICORN_BIN=env/bin/gunicorn +Environment=GUNICORN_WORKER=3 +Environment=GUNICORN_PORT=9001 +Environment=GUNICORN_TIMEOUT=120 +Environment=GUNICORN_PID_FILE=/run/gunicorn/api-v2/pid +Environment=GUNICORN_ACCESS_LOG_FILE=/var/log/gunicorn/api-v2/access.log +Environment=GUNICORN_ERROR_LOG_FILE=/var/log/gunicorn/api-v2/error.log + +ExecStart=/bin/sh -c '${GUNICORN_BIN} \ + --workers ${GUNICORN_WORKER} \ + --pid ${GUNICORN_PID_FILE} \ + --bind localhost:${GUNICORN_PORT} \ + --timeout ${GUNICORN_TIMEOUT} \ + --access-logfile ${GUNICORN_ACCESS_LOG_FILE} \ + --error-logfile ${GUNICORN_ERROR_LOG_FILE} \ + "isimip_files_api.app:create_app()"' + +ExecReload=/bin/sh -c '/usr/bin/pkill -HUP -F ${GUNICORN_PID_FILE}' + +ExecStop=/bin/sh -c '/usr/bin/pkill -TERM -F ${GUNICORN_PID_FILE}' + +[Install] +WantedBy=multi-user.target +``` + +``` +# in /etc/systemd/system/api-worker@.service + +[Unit] +Description=RQ worker for isimip-files-api v2 (#%i) +After=network.target + +[Service] +Type=simple +User=isimip +Group=isimip + +WorkingDirectory=/srv/isimip/api + +Environment=LANG=en_US.UTF-8 +Environment=LC_ALL=en_US.UTF-8 +Environment=LC_LANG=en_US.UTF-8 + +Environment=FLASK_APP=isimip_files_api.app +Environment=FLASK_ENV=production +Environment=FLASK_CONFIG=config.toml + +Environment=RQ_BIN=env/bin/rq +Environment=RQ_WORKER_CLASS=isimip_files_api.worker.Worker +Environment=RQ_REDIS_URL=redis://localhost:6379 + +ExecStart=/bin/sh -c '${RQ_BIN} worker' +ExecReload=/bin/kill -s HUP $MAINPID +ExecStop=/bin/kill -s TERM $MAINPID + +PrivateTmp=true +Restart=always +RestartSec=5 + +[Install] +WantedBy=multi-user.target +``` + +Reload `systemd`, start and enable the service: + +``` +systemctl daemon-reload +systemctl start api +systemctl start api-worker@1 +systemctl start api-worker@2 # more worked can be created + +systemctl enable api +systemctl enable api-worker@1 +systemctl enable api-worker@2 +``` + +From now on, the services can be controlled using: + +``` +systemctl start api +systemctl stop api +systemctl restart api +``` + +If the services won't start: `journalctl -xf` might give a clue why. + +Lastly, add + +``` + location /api/v2 { + proxy_pass http://127.0.0.1:9000/; + proxy_redirect off; + + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Host $host; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /api/v2/public { + alias /data/api/public; + } +``` + +to your NGINX virtual host configuration. The service should then be available at https://yourdomain/api/v1/. + +The created files can be automatically deleted using the included `isimip-files-api-clean` script. To do so, add the following to the crontab of the `isimip` user (by using `crontab -e`): + +``` +# clean files everyday at 5 a.m. +0 5 * * * cd /home/isimip/api; /home/isimip/api/env/bin/isimip-files-api-clean +``` diff --git a/isimip_files_api/__init__.py b/isimip_files_api/__init__.py index 04af162..568587b 100644 --- a/isimip_files_api/__init__.py +++ b/isimip_files_api/__init__.py @@ -1 +1 @@ -VERSION = __version__ = '2.0.0' +VERSION = __version__ = '2.0.0dev1' From 0d2a249b12eb34ecbc35281b02ce32a6b5cccae0 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Mon, 4 Mar 2024 19:23:23 +0100 Subject: [PATCH 19/22] Remove SelectCountryOperation --- docs/operations.md | 2 +- isimip_files_api/config.py | 1 - isimip_files_api/operations/cdo.py | 18 +----------------- 3 files changed, 2 insertions(+), 19 deletions(-) diff --git a/docs/operations.md b/docs/operations.md index 8257939..b25c0f1 100644 --- a/docs/operations.md +++ b/docs/operations.md @@ -19,7 +19,7 @@ response = requests.post('https://files.isimip.org/api/v2', json={ 'operations': [ { 'operation': 'select_point', - 'bbox': [52.380551, 13.064332] + 'point': [52.380551, 13.064332] } ] }) diff --git a/isimip_files_api/config.py b/isimip_files_api/config.py index f40aeaa..7c8d4d9 100644 --- a/isimip_files_api/config.py +++ b/isimip_files_api/config.py @@ -49,7 +49,6 @@ # list of operations which can be performed OPERATIONS = [ 'isimip_files_api.operations.cdo.SelectBBoxOperation', - 'isimip_files_api.operations.cdo.SelectCountryOperation', 'isimip_files_api.operations.cdo.SelectPointOperation', 'isimip_files_api.operations.cdo.MaskBBoxOperation', 'isimip_files_api.operations.cdo.MaskMaskOperation', diff --git a/isimip_files_api/operations/cdo.py b/isimip_files_api/operations/cdo.py index a6414b9..4750eac 100644 --- a/isimip_files_api/operations/cdo.py +++ b/isimip_files_api/operations/cdo.py @@ -27,22 +27,6 @@ def get_region(self): return f'lat{south}to{north}lon{west}to{east}' -class SelectCountryOperation(CountryOperationMixin, CdoOperation): - - operation = 'select_country' - - def validate(self): - return self.validate_country() - - def get_args(self): - country = self.get_country() - mask_path = self.get_mask_path() - return ['-ifthen', f'-selname,m_{country:3.3}', str(mask_path)] - - def get_region(self): - return self.get_country().lower() - - class SelectPointOperation(PointOperationMixin, CdoOperation): operation = 'select_point' @@ -110,7 +94,7 @@ def validate(self): def get_args(self): country = self.get_country() mask_path = str(Path(app.config['COUNTRYMASKS_FILE_PATH']).expanduser()) - return [f'-selname,m_{country:3.3}', mask_path] + return ['-ifthen', f'-selname,m_{country:3.3}', mask_path] def get_region(self): return self.get_country().lower() From 6ee961ff9c55292a4d3d349aa44f3793f66de3e0 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Mon, 4 Mar 2024 19:23:44 +0100 Subject: [PATCH 20/22] Update examples --- examples/cutout_bbox.py | 50 +++++++++++++++++++++++++++ examples/mask_bbox.py | 50 +++++++++++++++++++++++++++ examples/mask_country.py | 50 +++++++++++++++++++++++++++ examples/mask_landonly.py | 49 +++++++++++++++++++++++++++ examples/time_series_bbox.py | 22 ++++++------ examples/time_series_country.py | 24 +++++++------ examples/time_series_geojson.py | 21 ++++++------ examples/time_series_point.py | 56 +++++++++++++++++++++++++++++++ examples/time_series_shapefile.py | 33 +++++++++--------- 9 files changed, 308 insertions(+), 47 deletions(-) create mode 100644 examples/cutout_bbox.py create mode 100644 examples/mask_bbox.py create mode 100644 examples/mask_country.py create mode 100644 examples/mask_landonly.py create mode 100644 examples/time_series_point.py diff --git a/examples/cutout_bbox.py b/examples/cutout_bbox.py new file mode 100644 index 0000000..8f0b77e --- /dev/null +++ b/examples/cutout_bbox.py @@ -0,0 +1,50 @@ +import json +import time +import zipfile +from pathlib import Path + +import requests + +url = 'http://localhost:5000' + +paths = [ + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc' +] + +data = { + 'paths': paths, + 'operations': [ + { + 'operation': 'cutout_bbox', + 'bbox': [-23.43651, 23.43651, -180, 180] + } + ] +} + +response = requests.post(url, json=data) + +job = response.json() +print(json.dumps(job, indent=2)) + +for i in range(100): + job = requests.get(job['job_url']).json() + print(json.dumps(job, indent=2)) + + if job['status'] in ['queued', 'started']: + time.sleep(2) + else: + break + +if job['status'] == 'finished': + # download file + zip_path = Path(job['file_name']) + with requests.get(job['file_url'], stream=True) as response: + with zip_path.open('wb') as fp: + for chunk in response.iter_content(chunk_size=8192): + fp.write(chunk) + + # extract zip file + out_path = Path(job['file_name']).with_suffix('') + out_path.mkdir(exist_ok=True) + with zipfile.ZipFile(zip_path, 'r') as zf: + zf.extractall(out_path) diff --git a/examples/mask_bbox.py b/examples/mask_bbox.py new file mode 100644 index 0000000..151df9a --- /dev/null +++ b/examples/mask_bbox.py @@ -0,0 +1,50 @@ +import json +import time +import zipfile +from pathlib import Path + +import requests + +url = 'http://localhost:5000' + +paths = [ + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc' +] + +data = { + 'paths': paths, + 'operations': [ + { + 'operation': 'mask_bbox', + 'bbox': [-23.43651, 23.43651, -180, 180] + } + ] +} + +response = requests.post(url, json=data) + +job = response.json() +print(json.dumps(job, indent=2)) + +for i in range(100): + job = requests.get(job['job_url']).json() + print(json.dumps(job, indent=2)) + + if job['status'] in ['queued', 'started']: + time.sleep(2) + else: + break + +if job['status'] == 'finished': + # download file + zip_path = Path(job['file_name']) + with requests.get(job['file_url'], stream=True) as response: + with zip_path.open('wb') as fp: + for chunk in response.iter_content(chunk_size=8192): + fp.write(chunk) + + # extract zip file + out_path = Path(job['file_name']).with_suffix('') + out_path.mkdir(exist_ok=True) + with zipfile.ZipFile(zip_path, 'r') as zf: + zf.extractall(out_path) diff --git a/examples/mask_country.py b/examples/mask_country.py new file mode 100644 index 0000000..3f885c0 --- /dev/null +++ b/examples/mask_country.py @@ -0,0 +1,50 @@ +import json +import time +import zipfile +from pathlib import Path + +import requests + +url = 'http://localhost:5000' + +paths = [ + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc' +] + +data = { + 'paths': paths, + 'operations': [ + { + 'operation': 'mask_country', + 'country': 'aus' + } + ] +} + +response = requests.post(url, json=data) + +job = response.json() +print(json.dumps(job, indent=2)) + +for i in range(100): + job = requests.get(job['job_url']).json() + print(json.dumps(job, indent=2)) + + if job['status'] in ['queued', 'started']: + time.sleep(2) + else: + break + +if job['status'] == 'finished': + # download file + zip_path = Path(job['file_name']) + with requests.get(job['file_url'], stream=True) as response: + with zip_path.open('wb') as fp: + for chunk in response.iter_content(chunk_size=8192): + fp.write(chunk) + + # extract zip file + out_path = Path(job['file_name']).with_suffix('') + out_path.mkdir(exist_ok=True) + with zipfile.ZipFile(zip_path, 'r') as zf: + zf.extractall(out_path) diff --git a/examples/mask_landonly.py b/examples/mask_landonly.py new file mode 100644 index 0000000..4ad8ace --- /dev/null +++ b/examples/mask_landonly.py @@ -0,0 +1,49 @@ +import json +import time +import zipfile +from pathlib import Path + +import requests + +url = 'http://localhost:5000' + +paths = [ + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc' +] + +data = { + 'paths': paths, + 'operations': [ + { + 'operation': 'mask_landonly' + } + ] +} + +response = requests.post(url, json=data) + +job = response.json() +print(json.dumps(job, indent=2)) + +for i in range(100): + job = requests.get(job['job_url']).json() + print(json.dumps(job, indent=2)) + + if job['status'] in ['queued', 'started']: + time.sleep(2) + else: + break + +if job['status'] == 'finished': + # download file + zip_path = Path(job['file_name']) + with requests.get(job['file_url'], stream=True) as response: + with zip_path.open('wb') as fp: + for chunk in response.iter_content(chunk_size=8192): + fp.write(chunk) + + # extract zip file + out_path = Path(job['file_name']).with_suffix('') + out_path.mkdir(exist_ok=True) + with zipfile.ZipFile(zip_path, 'r') as zf: + zf.extractall(out_path) diff --git a/examples/time_series_bbox.py b/examples/time_series_bbox.py index 69d96f6..6f3aaf4 100644 --- a/examples/time_series_bbox.py +++ b/examples/time_series_bbox.py @@ -1,20 +1,14 @@ import json import time +import zipfile +from pathlib import Path import requests url = 'http://localhost:5000' paths = [ - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2021_2030.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2031_2040.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2041_2050.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2051_2060.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2061_2070.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2071_2080.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2081_2090.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2091_2100.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc' ] data = { @@ -48,7 +42,15 @@ break if job['status'] == 'finished': + # download file + zip_path = Path(job['file_name']) with requests.get(job['file_url'], stream=True) as response: - with open(job['file_name'], 'wb') as fp: + with zip_path.open('wb') as fp: for chunk in response.iter_content(chunk_size=8192): fp.write(chunk) + + # extract zip file + out_path = Path(job['file_name']).with_suffix('') + out_path.mkdir(exist_ok=True) + with zipfile.ZipFile(zip_path, 'r') as zf: + zf.extractall(out_path) diff --git a/examples/time_series_country.py b/examples/time_series_country.py index 789a9a2..1bfdde0 100644 --- a/examples/time_series_country.py +++ b/examples/time_series_country.py @@ -1,27 +1,21 @@ import json import time +import zipfile +from pathlib import Path import requests url = 'http://localhost:5000' paths = [ - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2021_2030.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2031_2040.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2041_2050.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2051_2060.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2061_2070.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2071_2080.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2081_2090.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2091_2100.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc' ] data = { 'paths': paths, 'operations': [ { - 'operation': 'select_country', + 'operation': 'mask_country', 'country': 'bra' }, { @@ -48,7 +42,15 @@ break if job['status'] == 'finished': + # download file + zip_path = Path(job['file_name']) with requests.get(job['file_url'], stream=True) as response: - with open(job['file_name'], 'wb') as fp: + with zip_path.open('wb') as fp: for chunk in response.iter_content(chunk_size=8192): fp.write(chunk) + + # extract zip file + out_path = Path(job['file_name']).with_suffix('') + out_path.mkdir(exist_ok=True) + with zipfile.ZipFile(zip_path, 'r') as zf: + zf.extractall(out_path) diff --git a/examples/time_series_geojson.py b/examples/time_series_geojson.py index db51bb5..42c9a05 100644 --- a/examples/time_series_geojson.py +++ b/examples/time_series_geojson.py @@ -1,5 +1,6 @@ import json import time +import zipfile from pathlib import Path import requests @@ -7,15 +8,7 @@ url = 'http://localhost:5000/' paths = [ - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2021_2030.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2031_2040.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2041_2050.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2051_2060.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2061_2070.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2071_2080.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2081_2090.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2091_2100.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc' ] shape_path = Path('testing') / 'shapes' / 'pm.json' @@ -59,7 +52,15 @@ break if job['status'] == 'finished': + # download file + zip_path = Path(job['file_name']) with requests.get(job['file_url'], stream=True) as response: - with open(job['file_name'], 'wb') as fp: + with zip_path.open('wb') as fp: for chunk in response.iter_content(chunk_size=8192): fp.write(chunk) + + # extract zip file + out_path = Path(job['file_name']).with_suffix('') + out_path.mkdir(exist_ok=True) + with zipfile.ZipFile(zip_path, 'r') as zf: + zf.extractall(out_path) diff --git a/examples/time_series_point.py b/examples/time_series_point.py new file mode 100644 index 0000000..21fb5a7 --- /dev/null +++ b/examples/time_series_point.py @@ -0,0 +1,56 @@ +import json +import time +import zipfile +from pathlib import Path + +import requests + +url = 'http://localhost:5000' + +paths = [ + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc' +] + +data = { + 'paths': paths, + 'operations': [ + { + 'operation': 'select_point', + 'point': [52.380551, 13.064332] + }, + { + 'operation': 'compute_mean', + }, + { + 'operation': 'output_csv' + } + ] +} + +response = requests.post(url, json=data) + +job = response.json() +print(json.dumps(job, indent=2)) + +for i in range(100): + job = requests.get(job['job_url']).json() + print(json.dumps(job, indent=2)) + + if job['status'] in ['queued', 'started']: + time.sleep(2) + else: + break + +if job['status'] == 'finished': + # download file + zip_path = Path(job['file_name']) + with requests.get(job['file_url'], stream=True) as response: + with zip_path.open('wb') as fp: + for chunk in response.iter_content(chunk_size=8192): + fp.write(chunk) + + # extract zip file + out_path = Path(job['file_name']).with_suffix('') + out_path.mkdir(exist_ok=True) + with zipfile.ZipFile(zip_path, 'r') as zf: + zf.extractall(out_path) diff --git a/examples/time_series_shapefile.py b/examples/time_series_shapefile.py index ef09ca4..29333fd 100644 --- a/examples/time_series_shapefile.py +++ b/examples/time_series_shapefile.py @@ -1,5 +1,6 @@ import json import time +import zipfile from pathlib import Path import requests @@ -7,15 +8,7 @@ url = 'http://localhost:5000/' paths = [ - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2021_2030.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2031_2040.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2041_2050.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2051_2060.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2061_2070.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2071_2080.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2081_2090.nc', - 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2091_2100.nc', + 'ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc' ] shape_path = Path('testing') / 'shapes' / 'pm.zip' @@ -32,12 +25,12 @@ 'operation': 'mask_mask', 'mask': 'pm.nc' }, - { - 'operation': 'compute_mean', - }, - { - 'operation': 'output_csv' - } + # { + # 'operation': 'compute_mean', + # }, + # { + # 'operation': 'output_csv' + # } ] } @@ -59,7 +52,15 @@ break if job['status'] == 'finished': + # download file + zip_path = Path(job['file_name']) with requests.get(job['file_url'], stream=True) as response: - with open(job['file_name'], 'wb') as fp: + with zip_path.open('wb') as fp: for chunk in response.iter_content(chunk_size=8192): fp.write(chunk) + + # extract zip file + out_path = Path(job['file_name']).with_suffix('') + out_path.mkdir(exist_ok=True) + with zipfile.ZipFile(zip_path, 'r') as zf: + zf.extractall(out_path) From 4fee4cb3a076366a9fd71bb97e7a09339d03ecf4 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Mon, 4 Mar 2024 19:40:47 +0100 Subject: [PATCH 21/22] Update docs --- README.md | 18 ++++++++++-------- docs/operations.md | 32 +++++++++++++++++++++++++------- docs/setup.md | 2 +- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 92a2563..6a01618 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -isimip-files-api +ISIMIP Files API ================ -A webservice to asynchronously perform operations on NetCDF files, using [Flask](https://palletsprojects.com/p/flask/) and [RQ](https://python-rq.org/). +A webservice to asynchronously perform operations on NetCDF files before downloading them, using [Flask](https://palletsprojects.com/p/flask/) and [RQ](https://python-rq.org/). The service is deployed on https://files.isimip.org/api/v2 as part of the [ISIMIP Repository](https://data.isimip.org). The previous version of the API is available at https://files.isimip.org/api/v1. @@ -19,7 +19,7 @@ The service is integrated into the [ISIMIP Repository](https://data.isimip.org) For programmatic access, the API can be used with standard HTTP libraries (e.g. [requests](https://requests.readthedocs.io) for Python). While the following examples use the ISIMIP Repository, Python and `requests`, they should be transferable to other servers, languages or libraries. -The API is used by sending HTTP POST request to its root endpoint. The request needs to use the content type `application/json` and contain a single JSON object with a list of `paths` and a list of `operations`. While the `paths` can be obtained from the [ISIMIP Repository](https://data.isimip.org) (they usually start with `ISIMIP3`), the operations are described in [docs/operations.md](docs/operations.md). +The API is used by sending HTTP POST request to its root endpoint. The request needs to use the content type `application/json` and contain a single JSON object with a list of `paths` and a list of `operations`. While the `paths` can be obtained from the [ISIMIP Repository](https://data.isimip.org) (e.g. `ISIMIP3b/InputData/climate/atmosphere/bias-adjusted/global/daily/ssp585/GFDL-ESM4/gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc`), the operations are described in [docs/operations.md](docs/operations.md). Using Python and `requests`, requests can be performed like this: @@ -28,9 +28,9 @@ import requests response = requests.post('https://files.isimip.org/api/v2', json={ 'paths': [ - 'ISIMIP3b/InputData/climate/.../gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc', - 'ISIMIP3b/InputData/climate/.../gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2021_2030.nc', - 'ISIMIP3b/InputData/climate/.../gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2031_2031.nc', + 'ISIMIP3b/InputData/.../gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2015_2020.nc', + 'ISIMIP3b/InputData/.../gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2021_2030.nc', + 'ISIMIP3b/InputData/.../gfdl-esm4_r1i1p1f1_w5e5_ssp585_tas_global_daily_2031_2031.nc', ... ], 'operations': [ @@ -44,7 +44,7 @@ response = requests.post('https://files.isimip.org/api/v2', json={ result = response.json() ``` -The response is a JSON object: +The `result` is a dictionary describing the job on the server: ```json { @@ -71,7 +71,7 @@ Performing the initial request again, or performing a `GET` on the url given in } ``` -When the job is completed on the server the status becomes `finished` and the JSON contains a `file_name` and a `file_url`. +When the job is completed on the server the status becomes `finished` and the result contains a `file_name` and a `file_url`. ```json { @@ -89,3 +89,5 @@ When the job is completed on the server the status becomes `finished` and the JS ``` The file can be downloaded under the URL given by `file_url` (if the output directory of the API is made public via a web server). + +Please also note the examples given in the [examples](examples) directory. diff --git a/docs/operations.md b/docs/operations.md index b25c0f1..67f514c 100644 --- a/docs/operations.md +++ b/docs/operations.md @@ -3,11 +3,29 @@ Operations The following operations are available. Please note that some of the operations can be chained, e.g. -``` -create_mask -> mask_mask -> compute_mean -> output_csv +```python +data = { + 'paths': [...], + 'operations': [ + { + 'operation': 'create_mask', + ... + }, + { + 'operation': 'mask_mask', + ... + }, + { + 'operation': 'compute_mean' + }, + { + 'operation': 'output_csv' + } + ] +} ``` -Pleas also note the examples given in the [examples](../examples) directory. +Please also note the examples given in the [examples](../examples) directory. ### Select point @@ -79,7 +97,7 @@ cdo -f nc4c -z zip_5 -L -sellonlatbox,WEST,EAST,SOUTH,NORTH IFILE OFILE ### Mask country -A country can be masked (everything outside is set to `missing_value`) using: +A country can be masked (i.e. everything outside is set to `missing_value`) using: ```python response = requests.post('https://files.isimip.org/api/v2', json={ @@ -101,7 +119,7 @@ cdo -f nc4c -z zip_5 -L -ifthen -selname,m_BRA COUNTRYMASK IFILE OFILE ### Mask land only -The landmass (without antarctica) can be masked (everything outside is set to `missing_value`) using: +The landmass (without antarctica) can be masked (i.e. the ocean is set to `missing_value`) using: ```python response = requests.post('https://files.isimip.org/api/v2', json={ @@ -210,7 +228,7 @@ cdo -s outputtab,date,value,nohead -fldmean -ifthen -selname,m_BRA COUNTRYMASK I Afterwards the TAB seperated CDO output is converted to CSV. -Full examples are is given in [examples/time_series_bbox.py](../time_series_bbox.py) and [examples/time_series_country.py](../time_series_country.py). +Full examples are is given in [examples/time_series_bbox.py](../examples/time_series_bbox.py) and [examples/time_series_country.py](../examples/time_series_country.py). ### Cutout bounding box @@ -267,4 +285,4 @@ response = requests.post(url, files={ }) ``` -Full examples are is given in [examples/time_series_shapefile.py](../time_series_shapefile.py) and [examples/time_series_geojson.py](../time_series_geojson.py). +Full examples are is given in [examples/time_series_shapefile.py](../examples/time_series_shapefile.py) and [examples/time_series_geojson.py](../examples/time_series_geojson.py). diff --git a/docs/setup.md b/docs/setup.md index 5cd7d27..98ef050 100644 --- a/docs/setup.md +++ b/docs/setup.md @@ -31,7 +31,7 @@ The API is then available at http://127.0.0.1:5000. ## Deployment -When deploying to the internet, a setup of [NGINX](https://www.nginx.com/), (gunicorn)[https://gunicorn.org/], and [systemd](https://www.freedesktop.org/wiki/Software/systemd/) services is recommended, but other services can be used as well. We further assume that a user `isimip` with the group `isimip` and the home `/home/isimip` exists, and that the repository is cloned at `/home/isimip/api`. +When deploying to the internet, a setup of [NGINX](https://www.nginx.com/), [gunicorn](https://gunicorn.org/), and [systemd](https://www.freedesktop.org/wiki/Software/systemd/) is recommended, but other services can be used as well. We further assume that a user `isimip` with the group `isimip` and the home `/home/isimip` exists, and that the repository is cloned at `/home/isimip/api`. Then, as `root`, create a file `/etc/tmpfiles.d/api.conf` with the following content: From 0a1395cb974534af495f63799dc49c31e16f584c Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Mon, 4 Mar 2024 19:45:00 +0100 Subject: [PATCH 22/22] Fix tests --- .../tests/test_create_select_country.py | 39 ------------------- isimip_files_api/tests/test_root.py | 1 - 2 files changed, 40 deletions(-) delete mode 100644 isimip_files_api/tests/test_create_select_country.py diff --git a/isimip_files_api/tests/test_create_select_country.py b/isimip_files_api/tests/test_create_select_country.py deleted file mode 100644 index 0a78db2..0000000 --- a/isimip_files_api/tests/test_create_select_country.py +++ /dev/null @@ -1,39 +0,0 @@ -def test_success(client, mocker): - mocker.patch('isimip_files_api.app.create_job', mocker.Mock(return_value=({}, 201))) - - response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ - { - 'operation': 'select_country', - 'country': 'deu' - } - ]}) - - assert response.status_code == 201 - assert response.json.get('errors') is None - - -def test_missing_country(client): - response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ - { - 'operation': 'select_country' - } - ]}) - assert response.status_code == 400 - assert response.json.get('status') == 'error' - assert response.json.get('errors') == { - 'operations': ['country is missing for operation "select_country"'] - } - - -def test_wrong_country(client): - response = client.post('/', json={'paths': ['constant.nc'], 'operations': [ - { - 'operation': 'select_country', - 'country': 'wrong' - } - ]}) - assert response.status_code == 400 - assert response.json.get('status') == 'error' - assert response.json.get('errors') == { - 'operations': ['country not in the list of supported countries (e.g. deu) for operation "select_country"'] - } diff --git a/isimip_files_api/tests/test_root.py b/isimip_files_api/tests/test_root.py index e9861cc..a090c34 100644 --- a/isimip_files_api/tests/test_root.py +++ b/isimip_files_api/tests/test_root.py @@ -12,7 +12,6 @@ def test_success(client, mocker): ] assert response.json.get('operations') == [ 'select_bbox', - 'select_country', 'select_point', 'mask_bbox', 'mask_mask',