From e554255544cfe704ed8b8298e45a6ea51decc717 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 8 Mar 2024 14:09:58 +0100 Subject: [PATCH 01/38] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 40c0233..388f184 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The goal of this repository is to provide a CLI and Python library to make Argo The library produces a prediction _patch_ or _cone_ that could be displayed on a map like here: https://floatrecovery.euro-argo.eu More about Argo floats recovery in here: https://github.com/euroargodev/recovery/issues -# New design +# Documentation ## Command Line Interface From e948c819500cd48e16d6e6f1e05e7ecfe3f0bfc7 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 8 Mar 2024 15:15:01 +0100 Subject: [PATCH 02/38] Delete launch_webapi --- cli/launch_webapi | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) delete mode 100755 cli/launch_webapi diff --git a/cli/launch_webapi b/cli/launch_webapi deleted file mode 100755 index a05b90c..0000000 --- a/cli/launch_webapi +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env bash - -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -APIDIR="$SCRIPT_DIR/../webapi" -APIDIR=$(python -c "import os,sys; print(os.path.realpath(sys.argv[1]))" $APIDIR) -#echo $APIDIR - -POSITIONAL_ARGS=() -WHERETO="local" -while [[ $# -gt 0 ]]; do - case $1 in - -l|--local) - WHERETO="local" - shift # past argument - shift # past value - ;; - -p|--pacnet) - WHERETO="pacnet" - shift # past argument - shift # past value - ;; - -*|--*) - echo "Unknown option $1" - exit 1 - ;; - esac -done -#echo $WHERETO - -IP=$(ifconfig en12 | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*' | grep -v '127.0.0.1') -#echo $IP -case $WHERETO in - "pacnet") - IP="134.246.146.54" - ;; - -esac - -echo "Launching VirtualFleet-Recovery webapi with Flask on $WHERETO at $IP ..." - -export FLASK_DEBUG=True -export FLASK_APP=myapp -cd $APIDIR -flask -A myapp routes -flask -A myapp run --host=$IP - -exit 0 From dda1391965f05dd794a267ed13355dc7c63632a7 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 8 Mar 2024 15:16:03 +0100 Subject: [PATCH 03/38] Refactoring first sketching Inspired by the copernicusmarine CLI ! --- setup.py | 16 ++++++++ vfrecovery/__init__.py | 6 ++- vfrecovery/command_line_interface/__init__.py | 0 .../command_line_interface/group_describe.py | 28 +++++++++++++ .../command_line_interface/group_predict.py | 39 +++++++++++++++++++ .../virtualfleet_recovery.py | 24 ++++++++++++ vfrecovery/core_functions/__init__.py | 0 vfrecovery/core_functions/predict.py | 21 ++++++++++ vfrecovery/python_interface/predict.py | 29 ++++++++++++++ 9 files changed, 161 insertions(+), 2 deletions(-) create mode 100644 setup.py create mode 100644 vfrecovery/command_line_interface/__init__.py create mode 100644 vfrecovery/command_line_interface/group_describe.py create mode 100644 vfrecovery/command_line_interface/group_predict.py create mode 100644 vfrecovery/command_line_interface/virtualfleet_recovery.py create mode 100644 vfrecovery/core_functions/__init__.py create mode 100644 vfrecovery/core_functions/predict.py create mode 100644 vfrecovery/python_interface/predict.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..0cd8f35 --- /dev/null +++ b/setup.py @@ -0,0 +1,16 @@ +from setuptools import setup, find_packages + +setup( + name='vfrecovery', + version='2.0', + packages=find_packages(), + include_package_data=True, + install_requires=[ + 'Click', + ], + entry_points={ + 'console_scripts': [ + 'vfrecovery = vfrecovery.command_line_interface.virtualfleet_recovery:base_command_line_interface', + ], + }, +) \ No newline at end of file diff --git a/vfrecovery/__init__.py b/vfrecovery/__init__.py index 1f36d3d..f6db858 100644 --- a/vfrecovery/__init__.py +++ b/vfrecovery/__init__.py @@ -1,2 +1,4 @@ -# from importlib.metadata import version -# __version__ = version("vfrecovery") +from importlib.metadata import version +__version__ = version("vfrecovery") + +from vfrecovery.python_interface.predict import predict diff --git a/vfrecovery/command_line_interface/__init__.py b/vfrecovery/command_line_interface/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vfrecovery/command_line_interface/group_describe.py b/vfrecovery/command_line_interface/group_describe.py new file mode 100644 index 0000000..2b1069f --- /dev/null +++ b/vfrecovery/command_line_interface/group_describe.py @@ -0,0 +1,28 @@ +import click + + +@click.group() +def cli_group_describe() -> None: + pass + +@cli_group_describe.command( + "describe", + short_help="Describe VirtualFleet-Recovery predictions", + help=""" + Returns data about an existing VirtualFleet-Recovery prediction + + Data could be a JSON file, specific metrics or images + """, + epilog=""" + Examples: + + \b + vfrecovery describe 6903091 112 + """, # noqa + ) +@click.argument('WMO') +@click.argument('CYC') +def describe( + wmo: int, + cyc: int): + click.echo(f"Return description for {wmo} {cyc}") \ No newline at end of file diff --git a/vfrecovery/command_line_interface/group_predict.py b/vfrecovery/command_line_interface/group_predict.py new file mode 100644 index 0000000..c3ec036 --- /dev/null +++ b/vfrecovery/command_line_interface/group_predict.py @@ -0,0 +1,39 @@ +import click +from typing import Union, List +from vfrecovery.core_functions.predict import predict_function + +@click.group() +def cli_group_predict() -> None: + pass + +@cli_group_predict.command( + "predict", + short_help="Execute VirtualFleet-Recovery predictions", + help=""" + Execute VirtualFleet-Recovery predictor and return results as a JSON string + """, + epilog=""" + Examples: + + \b + vfrecovery predict 6903091 112 + """, # noqa + ) +@click.option( + "-n", "--n_predictions", + type=int, + required=False, + default=1, + show_default=True, + is_flag=False, + help="Number of profiles to simulate", +) +@click.argument('WMO') +@click.argument('CYC', nargs=-1) +def predict( + wmo: int, + cyc: Union[int, List], + n_predictions) -> None: + # click.echo(f"Prediction for {wmo} {cyc}") + results = predict_function(wmo, cyc, n_predictions=n_predictions) + click.echo(results) diff --git a/vfrecovery/command_line_interface/virtualfleet_recovery.py b/vfrecovery/command_line_interface/virtualfleet_recovery.py new file mode 100644 index 0000000..b38adac --- /dev/null +++ b/vfrecovery/command_line_interface/virtualfleet_recovery.py @@ -0,0 +1,24 @@ +import click + +from vfrecovery.command_line_interface.group_describe import cli_group_describe +from vfrecovery.command_line_interface.group_predict import cli_group_predict + +@click.command( + cls=click.CommandCollection, + sources=[ + cli_group_describe, + cli_group_predict, + ], + context_settings=dict(help_option_names=["-h", "--help"]), +) +@click.version_option(None, "-V", "--version", package_name="vfrecovery") +def base_command_line_interface(): + pass + + +def command_line_interface(): + base_command_line_interface(windows_expand_args=False) + + +if __name__ == "__main__": + command_line_interface() diff --git a/vfrecovery/core_functions/__init__.py b/vfrecovery/core_functions/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vfrecovery/core_functions/predict.py b/vfrecovery/core_functions/predict.py new file mode 100644 index 0000000..7049385 --- /dev/null +++ b/vfrecovery/core_functions/predict.py @@ -0,0 +1,21 @@ + +def predict_function( + wmo: int, + cyc: int, + n_predictions: int = 1, +): + """ + Execute VirtualFleet-Recovery predictor and return results as a JSON string + + Inputs + ------ + wmo + cyc + n_predictions + + Returns + ------- + data + + """ # noqa + return {'wmo': wmo, 'cyc': cyc} diff --git a/vfrecovery/python_interface/predict.py b/vfrecovery/python_interface/predict.py new file mode 100644 index 0000000..da128b9 --- /dev/null +++ b/vfrecovery/python_interface/predict.py @@ -0,0 +1,29 @@ +import json +from vfrecovery.core_functions.predict import predict_function + + +def predict( + wmo: int, + cyc: int, + n_predictions, +): + """ + Execute VirtualFleet-Recovery predictor and return results as a JSON string + + Inputs + ------ + wmo + cyc + n_predictions + + Returns + ------- + data + + """ # noqa + results_json = predict_function( + wmo, cyc, + n_predictions=n_predictions, + ) + results = json.loads(results_json) + return results From 8333cc2acd9d5c4fcce2ca639ce2945b29dbdd71 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 8 Mar 2024 15:25:20 +0100 Subject: [PATCH 04/38] Starting functions dispatch to sub-modules --- cli/recovery_prediction.py | 389 --------------------- vfrecovery/command_line_interface/utils.py | 37 ++ vfrecovery/download_functions/__init__.py | 2 + vfrecovery/download_functions/armor3d.py | 116 ++++++ vfrecovery/download_functions/glorys.py | 154 ++++++++ vfrecovery/utilities/__init__.py | 0 vfrecovery/utilities/formatters.py | 18 + vfrecovery/utilities/geo.py | 63 ++++ 8 files changed, 390 insertions(+), 389 deletions(-) create mode 100644 vfrecovery/command_line_interface/utils.py create mode 100644 vfrecovery/download_functions/__init__.py create mode 100644 vfrecovery/download_functions/armor3d.py create mode 100644 vfrecovery/download_functions/glorys.py create mode 100644 vfrecovery/utilities/__init__.py create mode 100644 vfrecovery/utilities/formatters.py create mode 100644 vfrecovery/utilities/geo.py diff --git a/cli/recovery_prediction.py b/cli/recovery_prediction.py index 50079a6..6cf0cb9 100755 --- a/cli/recovery_prediction.py +++ b/cli/recovery_prediction.py @@ -48,401 +48,12 @@ log = logging.getLogger("virtualfleet.recovery") -PREF = "\033[" -RESET = f"{PREF}0m" -class COLORS: - black = "30m" - red = "31m" - green = "32m" - yellow = "33m" - blue = "34m" - magenta = "35m" - cyan = "36m" - white = "37m" - - def get_package_dir(): fpath = Path(__file__) return str(fpath.parent.parent) -def puts(text, color=None, bold=False, file=sys.stdout): - """Alternative to print, uses no color by default but accepts any color from the COLORS class. - - Parameters - ---------- - text - color=None - bold=False - file=sys.stdout - """ - if color is None: - txt = f'{PREF}{1 if bold else 0}m' + text + RESET - print(txt, file=file) - else: - txt = f'{PREF}{1 if bold else 0};{color}' + text + RESET - print(txt, file=file) - log.info(text) - - -def haversine(lon1, lat1, lon2, lat2): - """ - Calculate the great circle distance (in [km]) between two points - on the earth (specified in decimal degrees) - - see: https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points - - Parameters - ---------- - lon1 - lat1 - lon2 - lat2 - - Returns - ------- - km - """ - from math import radians, cos, sin, asin, sqrt - # convert decimal degrees to radians - lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) - - # haversine formula - dlon = lon2 - lon1 - dlat = lat2 - lat1 - a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2 - c = 2 * asin(sqrt(a)) - r = 6371 # Radius of earth in kilometers. - return c * r - - -def bearing(lon1, lat1, lon2, lat2): - """ - - Parameters - ---------- - lon1 - lat1 - lon2 - lat2 - - Returns - ------- - - """ - # from math import cos, sin, atan2, degrees - # b = atan2(cos(lat1) * sin(lat2) - sin(lat1) * cos(lat2) * cos(lon2 - lon1), sin(lon2 - lon1) * cos(lat2)) - # b = degrees(b) - # return b - - import pyproj - geodesic = pyproj.Geod(ellps='WGS84') - fwd_azimuth, back_azimuth, distance = geodesic.inv(lon1, lat1, lon2, lat2) - return fwd_azimuth - - -def strfdelta(tdelta, fmt): - """ - - Parameters - ---------- - tdelta - fmt - - Returns - ------- - - """ - d = {"days": tdelta.days} - d["hours"], rem = divmod(tdelta.seconds, 3600) - d["minutes"], d["seconds"] = divmod(rem, 60) - return fmt.format(**d) - - -def fixLON(x): - """Ensure a 0-360 longitude""" - if x < 0: - x = 360 + x - return x - - -def getSystemInfo(): - """Return system information as a dict""" - try: - info = {} - info['platform']=platform.system() - info['platform-release']=platform.release() - info['platform-version']=platform.version() - info['architecture']=platform.machine() - info['hostname']=socket.gethostname() - info['ip-address']=socket.gethostbyname(socket.gethostname()) - # info['mac-address']=':'.join(re.findall('..', '%012x' % uuid.getnode())) - info['processor']=platform.processor() - info['ram']=str(round(psutil.virtual_memory().total / (1024.0 **3)))+" GB" - return info - except Exception as e: - logging.exception(e) - - -def get_glorys_forecast_from_datarmor(a_box, a_start_date, n_days=1): - """Load Datarmor Global Ocean 1/12° Physics Analysis and Forecast updated Daily - - Fields: daily, from 2020-11-25T12:00 to 'now' + 5 days - Src: /home/ref-ocean-model-public/multiparameter/physic/global/cmems/global-analysis-forecast-phy-001-024 - Info: https://resources.marine.copernicus.eu/product-detail/GLOBAL_ANALYSISFORECAST_PHY_001_024/INFORMATION - - Parameters - ---------- - a_box - a_start_date - n_days - """ - def get_forecast_files(a_date, n_days=1): - file_list = [] - for n in range(0, n_days): - t = a_date + pd.Timedelta(n, 'D') - p = os.path.join(src, "%i" % t.year, "%0.3d" % t.day_of_year) - # print(p, os.path.exists(p)) - if os.path.exists(p): - file_list.append(sorted(glob.glob(os.path.join(p, "*.nc")))[0]) - return file_list - - def preprocess(this_ds): - idpt = np.argwhere(this_ds['depth'].values > 2000)[0][0] - ilon = np.argwhere(this_ds['longitude'].values >= a_box[0])[0][0], \ - np.argwhere(this_ds['longitude'].values >= a_box[1])[0][0] - ilat = np.argwhere(this_ds['latitude'].values >= a_box[2])[0][0], \ - np.argwhere(this_ds['latitude'].values >= a_box[3])[0][0] - this_ds = this_ds.isel({'depth': range(0, idpt), - 'longitude': range(ilon[0], ilon[1]), - 'latitude': range(ilat[0], ilat[1])}) - return this_ds - - root = "/home/ref-ocean-model-public" if not os.uname()[0] == 'Darwin' else "/Volumes/MODEL-PUBLIC/" - src = os.path.join(root, "multiparameter/physic/global/cmems/global-analysis-forecast-phy-001-024") - # puts("\t%s" % src, color=COLORS.green) - flist = get_forecast_files(a_start_date, n_days=n_days) - if len(flist) == 0: - raise ValueError("This float cycle is too old for this velocity field.") - glorys = xr.open_mfdataset(flist, preprocess=preprocess, combine='nested', concat_dim='time', parallel=True) - # - return glorys - - -class Armor3d: - """Global Ocean 1/4° Multi Observation Product ARMOR3D - - Product description: - https://data.marine.copernicus.eu/product/MULTIOBS_GLO_PHY_TSUV_3D_MYNRT_015_012 - - If start_date + n_days <= 2022-12-28: - Delivers the multi-year reprocessed (REP) weekly data - - otherwise: - Delivers the near-real-time (NRT) weekly data - - Examples - -------- - >>> Armor3d([-25, -13, 6.5, 13], pd.to_datetime('20091130', utc=True)).to_xarray() - >>> Armor3d([-25, -13, 6.5, 13], pd.to_datetime('20231121', utc=True), n_days=10).to_xarray() - - """ - - def __init__(self, box, start_date, n_days=1, max_depth=2500): - """ - Parameters - ---------- - box: list(float) - Define domain to load: [lon_min, lon_max, lat_min, lat_max] - start_date: :class:`pandas.Timestamp` - Starting date of the time series to load. Since ARMOR3D is weekly, the effective starting - date will be the first weekly period including the user-defined ``start_date`` - n_days: int (default=1) - Number of days to load data for. - max_depth: float (default=2500) - Maximum depth levels to load data for. - """ - self.box = box - self.start_date = start_date - self.n_days = n_days - self.max_depth = max_depth - - dt = pd.Timedelta(n_days, 'D') if n_days > 1 else pd.Timedelta(0, 'D') - if start_date + dt <= pd.to_datetime('2022-12-28', utc=True): - self._loader = self._get_rep - self.dataset_id = "dataset-armor-3d-rep-weekly" - self.time_axis = pd.Series(pd.date_range('19930106', '20221228', freq='7D').tz_localize("UTC")) - else: - self._loader = self._get_nrt - self.dataset_id = "dataset-armor-3d-nrt-weekly" - self.time_axis = pd.Series( - pd.date_range('20190102', pd.to_datetime('now', utc=True).strftime("%Y%m%d"), freq='7D').tz_localize( - "UTC")[0:-1]) - - if start_date < self.time_axis.iloc[0]: - raise ValueError('Date out of bounds') - elif start_date + dt > self.time_axis.iloc[-1]: - raise ValueError('Date out of bounds, %s > %s' % ( - start_date + dt, self.time_axis.iloc[-1])) - - def _get_this(self, dataset_id): - start_date = self.time_axis[self.time_axis <= self.start_date].iloc[-1] - if self.n_days == 1: - end_date = start_date - else: - end_date = \ - self.time_axis[self.time_axis <= self.start_date + (self.n_days + 1) * pd.Timedelta(1, 'D')].iloc[-1] - - ds = copernicusmarine.open_dataset( - dataset_id=dataset_id, - minimum_longitude=self.box[0], - maximum_longitude=self.box[1], - minimum_latitude=self.box[2], - maximum_latitude=self.box[3], - maximum_depth=self.max_depth, - start_datetime=start_date.strftime("%Y-%m-%dT%H:%M:%S"), - end_datetime=end_date.strftime("%Y-%m-%dT%H:%M:%S"), - variables=['ugo', 'vgo'] - ) - return ds - - def _get_rep(self): - """multi-year reprocessed (REP) weekly data - - Returns - ------- - :class:xarray.dataset - """ - return self._get_this(self.dataset_id) - - def _get_nrt(self): - """near-real-time (NRT) weekly data - - Returns - ------- - :class:xarray.dataset - """ - return self._get_this(self.dataset_id) - - def to_xarray(self): - """Load and return data as a :class:`xarray.dataset` - - Returns - ------- - :class:xarray.dataset - """ - return self._loader() - - def __repr__(self): - summary = [""] - summary.append("dataset_id: %s" % self.dataset_id) - summary.append("First day: %s" % self.start_date) - summary.append("N days: %s" % self.n_days) - summary.append("Domain: %s" % self.box) - summary.append("Max depth (m): %s" % self.max_depth) - return "\n".join(summary) - - -class Glorys: - """Global Ocean 1/12° Physics Re-Analysis or Forecast - If start_date + n_days <= 2021-01-09: - Delivers the multi-year reprocessed (REP) daily data - https://resources.marine.copernicus.eu/product-detail/GLOBAL_MULTIYEAR_PHY_001_030 - - otherwise: - Delivers the near-real-time (NRT) Analysis and Forecast daily data - https://resources.marine.copernicus.eu/product-detail/GLOBAL_ANALYSISFORECAST_PHY_001_024 - - Examples - -------- - >>> Glorys([-25, -13, 6.5, 13], pd.to_datetime('20091130', utc=True)).to_xarray() - >>> Glorys([-25, -13, 6.5, 13], pd.to_datetime('20231121', utc=True), n_days=10).to_xarray() - - """ - - def __init__(self, box, start_date, n_days=1, max_depth=2500): - """ - Parameters - ---------- - box: list(float) - Define domain to load: [lon_min, lon_max, lat_min, lat_max] - start_date: :class:`pandas.Timestamp` - Starting date of the time series to load. - n_days: int (default=1) - Number of days to load data for. - max_depth: float (default=2500) - Maximum depth levels to load data for. - """ - self.box = box - self.start_date = start_date - self.n_days = n_days - self.max_depth = max_depth - - dt = pd.Timedelta(n_days, 'D') if n_days > 1 else pd.Timedelta(0, 'D') - if start_date + dt <= pd.to_datetime('2021-01-09', utc=True): - self._loader = self._get_reanalysis - self.dataset_id = "cmems_mod_glo_phy_my_0.083_P1D-m" - else: - self._loader = self._get_forecast - self.dataset_id = "cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m" - - def _get_this(self, dataset_id, dates): - ds = copernicusmarine.open_dataset( - dataset_id=dataset_id, - minimum_longitude=self.box[0], - maximum_longitude=self.box[1], - minimum_latitude=self.box[2], - maximum_latitude=self.box[3], - maximum_depth=self.max_depth, - start_datetime=dates[0].strftime("%Y-%m-%dT%H:%M:%S"), - end_datetime=dates[1].strftime("%Y-%m-%dT%H:%M:%S"), - variables=['uo', 'vo'] - ) - return ds - - def _get_forecast(self): - """ - Returns - ------- - :class:`xarray.dataset` - """ - start_date = self.start_date - if self.n_days == 1: - end_date = start_date - else: - end_date = start_date + pd.Timedelta(self.n_days - 1, 'D') - return self._get_this(self.dataset_id, [start_date, end_date]) - - def _get_reanalysis(self): - """ - Returns - ------- - :class:`xarray.dataset` - """ - start_date = self.start_date - if self.n_days == 1: - end_date = start_date - else: - end_date = self.start_date + pd.Timedelta(self.n_days - 1, 'D') - return self._get_this(self.dataset_id, [start_date, end_date]) - - def to_xarray(self): - """ Load and return data as a :class:`xarray.dataset` - Returns - ------- - :class:`xarray.dataset` - """ - return self._loader() - - def __repr__(self): - summary = [""] - summary.append("dataset_id: %s" % self.dataset_id) - summary.append("First day: %s" % self.start_date) - summary.append("N days: %s" % self.n_days) - summary.append("Domain: %s" % self.box) - summary.append("Max depth (m): %s" % self.max_depth) - return "\n".join(summary) def get_velocity_field(a_box, a_date, n_days=1, output='.', dataset='ARMOR3D'): diff --git a/vfrecovery/command_line_interface/utils.py b/vfrecovery/command_line_interface/utils.py new file mode 100644 index 0000000..c348565 --- /dev/null +++ b/vfrecovery/command_line_interface/utils.py @@ -0,0 +1,37 @@ +import sys +import logging + +log = logging.getLogger("vfrecovery.cli") + + +PREF = "\033[" +RESET = f"{PREF}0m" +class COLORS: + black = "30m" + red = "31m" + green = "32m" + yellow = "33m" + blue = "34m" + magenta = "35m" + cyan = "36m" + white = "37m" + + +def puts(text, color=None, bold=False, file=sys.stdout): + """Alternative to print, uses no color by default but accepts any color from the COLORS class. + + Parameters + ---------- + text + color=None + bold=False + file=sys.stdout + """ + if color is None: + txt = f'{PREF}{1 if bold else 0}m' + text + RESET + print(txt, file=file) + else: + txt = f'{PREF}{1 if bold else 0};{color}' + text + RESET + print(txt, file=file) + log.info(text) + diff --git a/vfrecovery/download_functions/__init__.py b/vfrecovery/download_functions/__init__.py new file mode 100644 index 0000000..311103f --- /dev/null +++ b/vfrecovery/download_functions/__init__.py @@ -0,0 +1,2 @@ +from armor3d import Armor3d +from glorys import Glorys diff --git a/vfrecovery/download_functions/armor3d.py b/vfrecovery/download_functions/armor3d.py new file mode 100644 index 0000000..9d4d020 --- /dev/null +++ b/vfrecovery/download_functions/armor3d.py @@ -0,0 +1,116 @@ +import pandas as pd +import copernicusmarine + + +class Armor3d: + """Global Ocean 1/4° Multi Observation Product ARMOR3D + + Product description: + https://data.marine.copernicus.eu/product/MULTIOBS_GLO_PHY_TSUV_3D_MYNRT_015_012 + + If start_date + n_days <= 2022-12-28: + Delivers the multi-year reprocessed (REP) weekly data + + otherwise: + Delivers the near-real-time (NRT) weekly data + + Examples + -------- + >>> Armor3d([-25, -13, 6.5, 13], pd.to_datetime('20091130', utc=True)).to_xarray() + >>> Armor3d([-25, -13, 6.5, 13], pd.to_datetime('20231121', utc=True), n_days=10).to_xarray() + + """ + + def __init__(self, box, start_date, n_days=1, max_depth=2500): + """ + Parameters + ---------- + box: list(float) + Define domain to load: [lon_min, lon_max, lat_min, lat_max] + start_date: :class:`pandas.Timestamp` + Starting date of the time series to load. Since ARMOR3D is weekly, the effective starting + date will be the first weekly period including the user-defined ``start_date`` + n_days: int (default=1) + Number of days to load data for. + max_depth: float (default=2500) + Maximum depth levels to load data for. + """ + self.box = box + self.start_date = start_date + self.n_days = n_days + self.max_depth = max_depth + + dt = pd.Timedelta(n_days, 'D') if n_days > 1 else pd.Timedelta(0, 'D') + if start_date + dt <= pd.to_datetime('2022-12-28', utc=True): + self._loader = self._get_rep + self.dataset_id = "dataset-armor-3d-rep-weekly" + self.time_axis = pd.Series(pd.date_range('19930106', '20221228', freq='7D').tz_localize("UTC")) + else: + self._loader = self._get_nrt + self.dataset_id = "dataset-armor-3d-nrt-weekly" + self.time_axis = pd.Series( + pd.date_range('20190102', pd.to_datetime('now', utc=True).strftime("%Y%m%d"), freq='7D').tz_localize( + "UTC")[0:-1]) + + if start_date < self.time_axis.iloc[0]: + raise ValueError('Date out of bounds') + elif start_date + dt > self.time_axis.iloc[-1]: + raise ValueError('Date out of bounds, %s > %s' % ( + start_date + dt, self.time_axis.iloc[-1])) + + def _get_this(self, dataset_id): + start_date = self.time_axis[self.time_axis <= self.start_date].iloc[-1] + if self.n_days == 1: + end_date = start_date + else: + end_date = \ + self.time_axis[self.time_axis <= self.start_date + (self.n_days + 1) * pd.Timedelta(1, 'D')].iloc[-1] + + ds = copernicusmarine.open_dataset( + dataset_id=dataset_id, + minimum_longitude=self.box[0], + maximum_longitude=self.box[1], + minimum_latitude=self.box[2], + maximum_latitude=self.box[3], + maximum_depth=self.max_depth, + start_datetime=start_date.strftime("%Y-%m-%dT%H:%M:%S"), + end_datetime=end_date.strftime("%Y-%m-%dT%H:%M:%S"), + variables=['ugo', 'vgo'] + ) + return ds + + def _get_rep(self): + """multi-year reprocessed (REP) weekly data + + Returns + ------- + :class:xarray.dataset + """ + return self._get_this(self.dataset_id) + + def _get_nrt(self): + """near-real-time (NRT) weekly data + + Returns + ------- + :class:xarray.dataset + """ + return self._get_this(self.dataset_id) + + def to_xarray(self): + """Load and return data as a :class:`xarray.dataset` + + Returns + ------- + :class:xarray.dataset + """ + return self._loader() + + def __repr__(self): + summary = [""] + summary.append("dataset_id: %s" % self.dataset_id) + summary.append("First day: %s" % self.start_date) + summary.append("N days: %s" % self.n_days) + summary.append("Domain: %s" % self.box) + summary.append("Max depth (m): %s" % self.max_depth) + return "\n".join(summary) diff --git a/vfrecovery/download_functions/glorys.py b/vfrecovery/download_functions/glorys.py new file mode 100644 index 0000000..5daec78 --- /dev/null +++ b/vfrecovery/download_functions/glorys.py @@ -0,0 +1,154 @@ +import os +import numpy as np +import glob +import pandas as pd +import xarray as xr +import copernicusmarine + + +def get_glorys_forecast_from_datarmor(a_box, a_start_date, n_days=1): + """Load Datarmor Global Ocean 1/12° Physics Analysis and Forecast updated Daily + + Fields: daily, from 2020-11-25T12:00 to 'now' + 5 days + Src: /home/ref-ocean-model-public/multiparameter/physic/global/cmems/global-analysis-forecast-phy-001-024 + Info: https://resources.marine.copernicus.eu/product-detail/GLOBAL_ANALYSISFORECAST_PHY_001_024/INFORMATION + + Parameters + ---------- + a_box + a_start_date + n_days + """ + def get_forecast_files(a_date, n_days=1): + file_list = [] + for n in range(0, n_days): + t = a_date + pd.Timedelta(n, 'D') + p = os.path.join(src, "%i" % t.year, "%0.3d" % t.day_of_year) + # print(p, os.path.exists(p)) + if os.path.exists(p): + file_list.append(sorted(glob.glob(os.path.join(p, "*.nc")))[0]) + return file_list + + def preprocess(this_ds): + idpt = np.argwhere(this_ds['depth'].values > 2000)[0][0] + ilon = np.argwhere(this_ds['longitude'].values >= a_box[0])[0][0], \ + np.argwhere(this_ds['longitude'].values >= a_box[1])[0][0] + ilat = np.argwhere(this_ds['latitude'].values >= a_box[2])[0][0], \ + np.argwhere(this_ds['latitude'].values >= a_box[3])[0][0] + this_ds = this_ds.isel({'depth': range(0, idpt), + 'longitude': range(ilon[0], ilon[1]), + 'latitude': range(ilat[0], ilat[1])}) + return this_ds + + root = "/home/ref-ocean-model-public" if not os.uname()[0] == 'Darwin' else "/Volumes/MODEL-PUBLIC/" + src = os.path.join(root, "multiparameter/physic/global/cmems/global-analysis-forecast-phy-001-024") + # puts("\t%s" % src, color=COLORS.green) + flist = get_forecast_files(a_start_date, n_days=n_days) + if len(flist) == 0: + raise ValueError("This float cycle is too old for this velocity field.") + glorys = xr.open_mfdataset(flist, preprocess=preprocess, combine='nested', concat_dim='time', parallel=True) + # + return glorys + + +class Glorys: + """Global Ocean 1/12° Physics Re-Analysis or Forecast + + If start_date + n_days <= 2021-01-09: + Delivers the multi-year reprocessed (REP) daily data + https://resources.marine.copernicus.eu/product-detail/GLOBAL_MULTIYEAR_PHY_001_030 + + otherwise: + Delivers the near-real-time (NRT) Analysis and Forecast daily data + https://resources.marine.copernicus.eu/product-detail/GLOBAL_ANALYSISFORECAST_PHY_001_024 + + Examples + -------- + >>> Glorys([-25, -13, 6.5, 13], pd.to_datetime('20091130', utc=True)).to_xarray() + >>> Glorys([-25, -13, 6.5, 13], pd.to_datetime('20231121', utc=True), n_days=10).to_xarray() + + """ + + def __init__(self, box, start_date, n_days=1, max_depth=2500): + """ + Parameters + ---------- + box: list(float) + Define domain to load: [lon_min, lon_max, lat_min, lat_max] + start_date: :class:`pandas.Timestamp` + Starting date of the time series to load. + n_days: int (default=1) + Number of days to load data for. + max_depth: float (default=2500) + Maximum depth levels to load data for. + """ + self.box = box + self.start_date = start_date + self.n_days = n_days + self.max_depth = max_depth + + dt = pd.Timedelta(n_days, 'D') if n_days > 1 else pd.Timedelta(0, 'D') + if start_date + dt <= pd.to_datetime('2021-01-09', utc=True): + self._loader = self._get_reanalysis + self.dataset_id = "cmems_mod_glo_phy_my_0.083_P1D-m" + else: + self._loader = self._get_forecast + self.dataset_id = "cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m" + + def _get_this(self, dataset_id, dates): + ds = copernicusmarine.open_dataset( + dataset_id=dataset_id, + minimum_longitude=self.box[0], + maximum_longitude=self.box[1], + minimum_latitude=self.box[2], + maximum_latitude=self.box[3], + maximum_depth=self.max_depth, + start_datetime=dates[0].strftime("%Y-%m-%dT%H:%M:%S"), + end_datetime=dates[1].strftime("%Y-%m-%dT%H:%M:%S"), + variables=['uo', 'vo'] + ) + return ds + + def _get_forecast(self): + """ + Returns + ------- + :class:`xarray.dataset` + """ + start_date = self.start_date + if self.n_days == 1: + end_date = start_date + else: + end_date = start_date + pd.Timedelta(self.n_days - 1, 'D') + return self._get_this(self.dataset_id, [start_date, end_date]) + + def _get_reanalysis(self): + """ + Returns + ------- + :class:`xarray.dataset` + """ + start_date = self.start_date + if self.n_days == 1: + end_date = start_date + else: + end_date = self.start_date + pd.Timedelta(self.n_days - 1, 'D') + return self._get_this(self.dataset_id, [start_date, end_date]) + + def to_xarray(self): + """ Load and return data as a :class:`xarray.dataset` + Returns + ------- + :class:`xarray.dataset` + """ + return self._loader() + + def __repr__(self): + summary = [""] + summary.append("dataset_id: %s" % self.dataset_id) + summary.append("First day: %s" % self.start_date) + summary.append("N days: %s" % self.n_days) + summary.append("Domain: %s" % self.box) + summary.append("Max depth (m): %s" % self.max_depth) + return "\n".join(summary) + diff --git a/vfrecovery/utilities/__init__.py b/vfrecovery/utilities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vfrecovery/utilities/formatters.py b/vfrecovery/utilities/formatters.py new file mode 100644 index 0000000..6e11468 --- /dev/null +++ b/vfrecovery/utilities/formatters.py @@ -0,0 +1,18 @@ + +def strfdelta(tdelta, fmt): + """ + + Parameters + ---------- + tdelta + fmt + + Returns + ------- + + """ + d = {"days": tdelta.days} + d["hours"], rem = divmod(tdelta.seconds, 3600) + d["minutes"], d["seconds"] = divmod(rem, 60) + return fmt.format(**d) + diff --git a/vfrecovery/utilities/geo.py b/vfrecovery/utilities/geo.py new file mode 100644 index 0000000..678e62f --- /dev/null +++ b/vfrecovery/utilities/geo.py @@ -0,0 +1,63 @@ +from math import radians, cos, sin, asin, sqrt +import pyproj + + +def haversine(lon1, lat1, lon2, lat2): + """ + Calculate the great circle distance (in [km]) between two points + on the earth (specified in decimal degrees) + + see: https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points + + Parameters + ---------- + lon1 + lat1 + lon2 + lat2 + + Returns + ------- + km + """ + # convert decimal degrees to radians + lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) + + # haversine formula + dlon = lon2 - lon1 + dlat = lat2 - lat1 + a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2 + c = 2 * asin(sqrt(a)) + r = 6371 # Radius of earth in kilometers. + return c * r + + +def bearing(lon1, lat1, lon2, lat2): + """ + + Parameters + ---------- + lon1 + lat1 + lon2 + lat2 + + Returns + ------- + + """ + # from math import cos, sin, atan2, degrees + # b = atan2(cos(lat1) * sin(lat2) - sin(lat1) * cos(lat2) * cos(lon2 - lon1), sin(lon2 - lon1) * cos(lat2)) + # b = degrees(b) + # return b + + geodesic = pyproj.Geod(ellps='WGS84') + fwd_azimuth, back_azimuth, distance = geodesic.inv(lon1, lat1, lon2, lat2) + return fwd_azimuth + + +def fixLON(x): + """Ensure a 0-360 longitude""" + if x < 0: + x = 360 + x + return x From caecbac2b5b4cbdf146644977182f84d5de7e39a Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 8 Mar 2024 15:55:42 +0100 Subject: [PATCH 05/38] More dispatch ! --- cli/recovery_prediction.py | 1550 ----------------- .../command_line_interface/group_predict.py | 2 +- vfrecovery/core/__init__.py | 4 + vfrecovery/core/deployment_plan.py | 42 + vfrecovery/core/predict.py | 278 +++ vfrecovery/core/simulation_handler.py | 595 +++++++ vfrecovery/core/trajfile_handler.py | 406 +++++ vfrecovery/core_functions/predict.py | 21 - .../__init__.py | 1 + .../armor3d.py | 0 vfrecovery/downloaders/core.py | 43 + .../glorys.py | 0 .../{json_functions => json}/VFRschema.py | 0 .../VFRschema_meta.py | 0 .../VFRschema_metrics.py | 0 .../VFRschema_profile.py | 0 .../VFRschema_simulation.py | 0 .../{json_functions => json}/__init__.py | 0 .../{core_functions => plots}/__init__.py | 0 vfrecovery/plots/plot_positions.py | 59 + vfrecovery/plots/plot_velocity.py | 42 + vfrecovery/plots/utils.py | 129 ++ vfrecovery/python_interface/predict.py | 6 +- vfrecovery/{utilities => utils}/__init__.py | 0 vfrecovery/{utilities => utils}/formatters.py | 0 vfrecovery/{utilities => utils}/geo.py | 2 + vfrecovery/utils/misc.py | 37 + 27 files changed, 1642 insertions(+), 1575 deletions(-) create mode 100644 vfrecovery/core/__init__.py create mode 100644 vfrecovery/core/deployment_plan.py create mode 100644 vfrecovery/core/predict.py create mode 100644 vfrecovery/core/simulation_handler.py create mode 100644 vfrecovery/core/trajfile_handler.py delete mode 100644 vfrecovery/core_functions/predict.py rename vfrecovery/{download_functions => downloaders}/__init__.py (60%) rename vfrecovery/{download_functions => downloaders}/armor3d.py (100%) create mode 100644 vfrecovery/downloaders/core.py rename vfrecovery/{download_functions => downloaders}/glorys.py (100%) rename vfrecovery/{json_functions => json}/VFRschema.py (100%) rename vfrecovery/{json_functions => json}/VFRschema_meta.py (100%) rename vfrecovery/{json_functions => json}/VFRschema_metrics.py (100%) rename vfrecovery/{json_functions => json}/VFRschema_profile.py (100%) rename vfrecovery/{json_functions => json}/VFRschema_simulation.py (100%) rename vfrecovery/{json_functions => json}/__init__.py (100%) rename vfrecovery/{core_functions => plots}/__init__.py (100%) create mode 100644 vfrecovery/plots/plot_positions.py create mode 100644 vfrecovery/plots/plot_velocity.py create mode 100644 vfrecovery/plots/utils.py rename vfrecovery/{utilities => utils}/__init__.py (100%) rename vfrecovery/{utilities => utils}/formatters.py (100%) rename vfrecovery/{utilities => utils}/geo.py (98%) create mode 100644 vfrecovery/utils/misc.py diff --git a/cli/recovery_prediction.py b/cli/recovery_prediction.py index 6cf0cb9..859710b 100755 --- a/cli/recovery_prediction.py +++ b/cli/recovery_prediction.py @@ -48,1290 +48,6 @@ log = logging.getLogger("virtualfleet.recovery") -def get_package_dir(): - fpath = Path(__file__) - return str(fpath.parent.parent) - - - - - -def get_velocity_field(a_box, a_date, n_days=1, output='.', dataset='ARMOR3D'): - """Return the velocity field as an :class:xr.Dataset, download if needed - - Parameters - ---------- - a_box - a_date - n_days - output - dataset - """ - def get_velocity_filename(dataset, n_days): - download_date = pd.to_datetime('now', utc='now').strftime("%Y%m%d") - fname = os.path.join(output, 'velocity_%s_%idays_%s.nc' % (dataset, n_days, download_date)) - return fname - - velocity_file = get_velocity_filename(dataset, n_days) - if not os.path.exists(velocity_file): - # Define Data loader: - loader = Armor3d if dataset == 'ARMOR3D' else Glorys - loader = loader(a_box, a_date, n_days=n_days) - puts(str(loader), color=COLORS.magenta) - - # Load data from Copernicus Marine Data store: - ds = loader.to_xarray() - - # Save on file for later re-used: - ds.to_netcdf(velocity_file) - else: - ds = xr.open_dataset(velocity_file) - - return ds, velocity_file - - -def get_HBOX(df_sim, dd=1): - """ - - Parameters - ---------- - dd: how much to extend maps outward the deployment 'box' - - Returns - ------- - list - """ - rx = df_sim['deploy_lon'].max() - df_sim['deploy_lon'].min() - ry = df_sim['deploy_lat'].max() - df_sim['deploy_lat'].min() - lonc, latc = df_sim['deploy_lon'].mean(), df_sim['deploy_lat'].mean() - box = [lonc - rx / 2, lonc + rx / 2, latc - ry / 2, latc + ry / 2] - ebox = [box[i] + [-dd, dd, -dd, dd][i] for i in range(0, 4)] # Extended 'box' - - return ebox - - -def get_EBOX(df_sim, df_plan, this_profile, s=1): - """Get a box for maps - - Use all data positions from DF_SIM to make sure all points are visible - Extend the domain by a 's' scaling factor of the deployment plan domain - - Parameters - ---------- - s: float, default:1 - - Returns - ------- - list - """ - box = [np.min([df_sim['deploy_lon'].min(), df_sim['longitude'].min(), df_sim['rel_lon'].min(), this_profile['longitude'].min()]), - np.max([df_sim['deploy_lon'].max(), df_sim['longitude'].max(), df_sim['rel_lon'].max(), this_profile['longitude'].max()]), - np.min([df_sim['deploy_lat'].min(), df_sim['latitude'].min(), df_sim['rel_lat'].min(), this_profile['latitude'].min()]), - np.max([df_sim['deploy_lat'].max(), df_sim['latitude'].max(), df_sim['rel_lat'].max(), this_profile['latitude'].max()])] - rx, ry = df_plan['longitude'].max() - df_plan['longitude'].min(), df_plan['latitude'].max() - df_plan['latitude'].min() - r = np.min([rx, ry]) - ebox = [box[0]-s*r, box[1]+s*r, box[2]-s*r, box[3]+s*r] - - return ebox - - -def get_cfg_str(a_cfg): - txt = "VFloat configuration: (Parking depth: %i [db], Cycle duration: %i [hours], Profile depth: %i [db])" % ( - a_cfg.mission['parking_depth'], - a_cfg.mission['cycle_duration'], - a_cfg.mission['profile_depth'], - ) - return txt - - -def save_figurefile(this_fig, a_name, folder='.'): - """ - - Parameters - ---------- - this_fig - a_name - - Returns - ------- - path - """ - figname = os.path.join(folder, "%s.png" % a_name) - log.debug("Saving %s ..." % figname) - this_fig.savefig(figname) - return figname - - -def map_add_profiles(this_ax, this_profile): - """ - - Parameters - ---------- - this_ax - - Returns - ------- - this_ax - """ - this_ax.plot(this_profile['longitude'][0], this_profile['latitude'][0], 'k.', markersize=10, markeredgecolor='w') - if this_profile.shape[0] > 1: - this_ax.plot(this_profile['longitude'][1], this_profile['latitude'][1], 'r.', markersize=10, markeredgecolor='w') - this_ax.arrow(this_profile['longitude'][0], - this_profile['latitude'][0], - this_profile['longitude'][1] - this_profile['longitude'][0], - this_profile['latitude'][1] - this_profile['latitude'][0], - length_includes_head=True, fc='k', ec='k', head_width=0.025, zorder=10) - - return this_ax - - -def map_add_features(this_ax): - """ - - Parameters - ---------- - this_ax - - Returns - ------- - this_ax - """ - argoplot.utils.latlongrid(this_ax) - this_ax.add_feature(argoplot.utils.land_feature, edgecolor="black") - return this_ax - - -def map_add_cyc_nb(this_ax, this_df, lon='lon', lat='lat', cyc='cyc', pos='bt', fs=6, color='black'): - """ Add cycle number labels next to axis - - Parameters - ---------- - ax - df - - Returns - ------- - list of text label - """ - t = [] - if pos == 'bt': - ha, va, label = 'center', 'top', "\n{}".format - if pos == 'tp': - ha, va, label = 'center', 'bottom', "{}\n".format - for irow, row in this_df.iterrows(): - this_t = this_ax.text(row[lon], row[lat], label(int(row[cyc])), ha=ha, va=va, fontsize=fs, color=color) - t.append(this_t) - return t - - -def figure_velocity(box, - vel, vel_name, this_profile, wmo, cyc, - save_figure=False, workdir='.'): - """ - - Parameters - ---------- - box - - Returns - ------- - None - """ - fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(20, 20), dpi=100, subplot_kw={'projection': ccrs.PlateCarree()}) - ax.set_extent(box) - ax = map_add_features(ax) - ax = map_add_profiles(ax, this_profile) - - vel.field.isel(time=0, depth=0).plot.quiver(x="longitude", y="latitude", - u=vel.var['U'], v=vel.var['V'], ax=ax, color='grey', alpha=0.5, - add_guide=False) - - txt = "starting from cycle %i, predicting cycle %i" % (cyc[0], cyc[1]) - ax.set_title( - "VirtualFleet recovery system for WMO %i: %s\n" - "%s velocity snapshot to illustrate the simulation domain\n" - "Vectors: Velocity field at z=%0.2fm, t=%s" % - (wmo, txt, vel_name, vel.field['depth'][0].values[np.newaxis][0], - pd.to_datetime(vel.field['time'][0].values).strftime("%Y/%m/%d %H:%M")), fontsize=15) - - plt.tight_layout() - if save_figure: - save_figurefile(fig, 'vfrecov_velocity_%s' % vel_name, workdir) - return fig, ax - - -def figure_positions(this_args, vel, df_sim, df_plan, this_profile, cfg, wmo, cyc, vel_name, - dd=1, save_figure=False, workdir='.'): - log.debug("Starts figure_positions") - ebox = get_HBOX(df_sim, dd=dd) - nfloats = df_plan.shape[0] - - fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(25, 7), dpi=120, - subplot_kw={'projection': ccrs.PlateCarree()}, - sharex=True, sharey=True) - ax = ax.flatten() - - for ix in [0, 1, 2]: - ax[ix].set_extent(ebox) - ax[ix] = map_add_features(ax[ix]) - - v = vel.field.isel(time=0).interp(depth=cfg.mission['parking_depth']).plot.quiver(x="longitude", - y="latitude", - u=vel.var['U'], - v=vel.var['V'], - ax=ax[ix], - color='grey', - alpha=0.5, - add_guide=False) - - ax[ix].plot(df_sim['deploy_lon'], df_sim['deploy_lat'], '.', - markersize=3, color='grey', alpha=0.1, markeredgecolor=None, zorder=0) - if ix == 0: - title = 'Velocity field at %0.2fm and deployment plan' % cfg.mission['parking_depth'] - v.set_alpha(1) - # v.set_color('black') - elif ix == 1: - x, y, c = df_sim['longitude'], df_sim['latitude'], df_sim['cyc'] - title = 'Final float positions' - # sc = ax[ix].plot(x, y, '.', markersize=3, color='cyan', alpha=0.9, markeredgecolor=None) - sc = ax[ix].scatter(x, y, c=c, s=3, alpha=0.9, edgecolors=None) - elif ix == 2: - x, y, c = df_sim['rel_lon'], df_sim['rel_lat'], df_sim['cyc'] - title = 'Final floats position relative to last float position' - # sc = ax[ix].plot(x, y, '.', markersize=3, color='cyan', alpha=0.9, markeredgecolor=None) - sc = ax[ix].scatter(x, y, c=c, s=3, alpha=0.9, edgecolors=None) - - ax[ix] = map_add_profiles(ax[ix], this_profile) - ax[ix].set_title(title) - - fig.suptitle("VirtualFleet recovery prediction for WMO %i: starting from cycle %i, predicting cycle %s\n%s" % - (wmo, cyc[0], cyc[1:], get_cfg_str(cfg)), fontsize=15) - plt.tight_layout() - if save_figure: - save_figurefile(fig, "vfrecov_positions_%s" % get_sim_suffix(this_args, cfg), workdir) - return fig, ax - - -def setup_deployment_plan(a_profile, a_date, nfloats=15000): - # We will deploy a collection of virtual floats that are located around the real float with random perturbations in space and time - - # Amplitude of the profile position perturbations in the zonal (deg), meridional (deg), and temporal (hours) directions: - rx = 0.5 - ry = 0.5 - rt = 0 - - # - lonc, latc = a_profile - # box = [lonc - rx / 2, lonc + rx / 2, latc - ry / 2, latc + ry / 2] - - a, b = lonc - rx / 2, lonc + rx / 2 - lon = (b - a) * np.random.random_sample((nfloats,)) + a - - a, b = latc - ry / 2, latc + ry / 2 - lat = (b - a) * np.random.random_sample((nfloats,)) + a - - a, b = 0, rt - dtim = (b - a) * np.random.random_sample((nfloats,)) + a - dtim = np.round(dtim).astype(int) - tim = pd.to_datetime([a_date + np.timedelta64(dt, 'h') for dt in dtim]) - # dtim = (b-a) * np.random.random_sample((nfloats, )) + a - # dtim = np.round(dtim).astype(int) - # tim2 = pd.to_datetime([this_date - np.timedelta64(dt, 'h') for dt in dtim]) - # tim = np.sort(np.concatenate([tim2, tim1])) - - # Round time to the o(5mins), same as step=timedelta(minutes=5) in the simulation params - tim = tim.round(freq='5min') - - # - df = pd.DataFrame( - [tim, lat, lon, np.arange(0, nfloats) + 9000000, np.full_like(lon, 0), ['VF' for l in lon], ['?' for l in lon]], - index=['date', 'latitude', 'longitude', 'wmo', 'cycle_number', 'institution_code', 'file']).T - df['date'] = pd.to_datetime(df['date']) - - return df - - -class Trajectories: - """Trajectory file manager for VFrecovery - - Examples: - --------- - T = Trajectories(traj_zarr_file) - T.n_floats - T.sim_cycles - df = T.to_index() - df = T.get_index().add_distances() - jsdata, fig, ax = T.analyse_pairwise_distances(cycle=1, show_plot=True) - """ - - def __init__(self, zfile): - self.zarr_file = zfile - self.obj = xr.open_zarr(zfile) - self._index = None - - @property - def n_floats(self): - # len(self.obj['trajectory']) - return self.obj['trajectory'].shape[0] - - @property - def sim_cycles(self): - """Return list of cycles simulated""" - cycs = np.unique(self.obj['cycle_number']) - last_obs_phase = \ - self.obj.where(self.obj['cycle_number'] == cycs[-1])['cycle_phase'].isel(trajectory=0).isel(obs=-1).values[ - np.newaxis][0] - if last_obs_phase < 3: - cycs = cycs[0:-1] - return cycs - - def __repr__(self): - summary = [""] - summary.append("Swarm size: %i floats" % self.n_floats) - start_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=0).values) - end_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=-1).values) - summary.append("Simulation length: %s, from %s to %s" % ( - pd.Timedelta(end_date - start_date, 'd'), start_date.strftime("%Y/%m/%d"), end_date.strftime("%Y/%m/%d"))) - return "\n".join(summary) - - def to_index_par(self) -> pd.DataFrame: - # Deployment loc: - deploy_lon, deploy_lat = self.obj.isel(obs=0)['lon'].values, self.obj.isel(obs=0)['lat'].values - - def worker(ds, cyc, x0, y0): - mask = np.logical_and((ds['cycle_number'] == cyc).compute(), - (ds['cycle_phase'] >= 3).compute()) - this_cyc = ds.where(mask, drop=True) - if len(this_cyc['time']) > 0: - data = { - 'date': this_cyc.isel(obs=-1)['time'].values, - 'latitude': this_cyc.isel(obs=-1)['lat'].values, - 'longitude': this_cyc.isel(obs=-1)['lon'].values, - 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, - 'cyc': cyc, - # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, - 'deploy_lon': x0, - 'deploy_lat': y0, - } - return pd.DataFrame(data) - else: - return None - - cycles = np.unique(self.obj['cycle_number']) - rows = [] - with concurrent.futures.ThreadPoolExecutor() as executor: - future_to_url = { - executor.submit( - worker, - self.obj, - cyc, - deploy_lon, - deploy_lat - ): cyc - for cyc in cycles - } - futures = concurrent.futures.as_completed(future_to_url) - for future in futures: - data = None - try: - data = future.result() - except Exception: - raise - finally: - rows.append(data) - - rows = [r for r in rows if r is not None] - df = pd.concat(rows).reset_index() - df['wmo'] = df['wmo'].astype(int) - df['cyc'] = df['cyc'].astype(int) - # df['cycle_phase'] = df['cycle_phase'].astype(int) - self._index = df - - return self._index - - def to_index(self) -> pd.DataFrame: - """Compute and return index (profile dataframe from trajectory dataset) - - Create a Profile index :class:`pandas.dataframe` with columns: [data, latitude ,longitude, wmo, cyc, deploy_lon, deploy_lat] - from a trajectory :class:`xarray.dataset`. - - There is one dataframe row for each dataset trajectory cycle. - - We use the last trajectory point of given cycle number (with cycle phase >= 3) to identify a profile location. - - If they are N trajectories simulating C cycles, there will be about a maximum of N*C rows in the dataframe. - - Returns - ------- - :class:`pandas.dataframe` - """ - if self._index is None: - - # Deployment loc: - deploy_lon, deploy_lat = self.obj.isel(obs=0)['lon'].values, self.obj.isel(obs=0)['lat'].values - - def worker(ds, cyc, x0, y0): - mask = np.logical_and((ds['cycle_number'] == cyc).compute(), - (ds['cycle_phase'] >= 3).compute()) - this_cyc = ds.where(mask, drop=True) - if len(this_cyc['time']) > 0: - data = { - 'date': this_cyc.isel(obs=-1)['time'].values, - 'latitude': this_cyc.isel(obs=-1)['lat'].values, - 'longitude': this_cyc.isel(obs=-1)['lon'].values, - 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, - 'cyc': cyc, - # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, - 'deploy_lon': x0, - 'deploy_lat': y0, - } - return pd.DataFrame(data) - else: - return None - - cycles = np.unique(self.obj['cycle_number']) - rows = [] - for cyc in cycles: - df = worker(self.obj, cyc, deploy_lon, deploy_lat) - rows.append(df) - rows = [r for r in rows if r is not None] - df = pd.concat(rows).reset_index() - df['wmo'] = df['wmo'].astype(int) - df['cyc'] = df['cyc'].astype(int) - # df['cycle_phase'] = df['cycle_phase'].astype(int) - self._index = df - - return self._index - - def get_index(self): - """Compute index and return self""" - self.to_index() - return self - - def add_distances(self, origin: None) -> pd.DataFrame: - """Compute profiles distance to some origin - - Returns - ------- - :class:`pandas.dataframe` - """ - - # Compute distance between the predicted profile and the initial profile location from the deployment plan - # We assume that virtual floats are sequentially taken from the deployment plan - # Since distances are very short, we compute a simple rectangular distance - - # Observed cycles: - # obs_cyc = np.unique(this_profile['cyc']) - - # Simulated cycles: - # sim_cyc = np.unique(this_df['cyc']) - - df = self._index - - x2, y2 = origin # real float initial position - df['distance'] = np.nan - df['rel_lon'] = np.nan - df['rel_lat'] = np.nan - df['distance_origin'] = np.nan - - def worker(row): - # Simulation profile coordinates: - x0, y0 = row['deploy_lon'], row['deploy_lat'] # virtual float initial position - x1, y1 = row['longitude'], row['latitude'] # virtual float position - - # Distance between each pair of cycles of virtual floats: - dist = np.sqrt((y1 - y0) ** 2 + (x1 - x0) ** 2) - row['distance'] = dist - - # Shift between each pair of cycles: - dx, dy = x1 - x0, y1 - y0 - # Get a relative displacement from real float initial position: - row['rel_lon'] = x2 + dx - row['rel_lat'] = y2 + dy - - # Distance between the predicted profile and the observed initial profile - dist = np.sqrt((y2 - y0) ** 2 + (x2 - x0) ** 2) - row['distance_origin'] = dist - - return row - - df = df.apply(worker, axis=1) - self._index = df - - return self._index - - def analyse_pairwise_distances(self, - cycle: int = 1, - show_plot: bool = True, - save_figure: bool = False, - workdir: str = '.', - sim_suffix = None, - this_cfg = None, - this_args: dict = None): - - def get_hist_and_peaks(this_d): - x = this_d.flatten() - x = x[~np.isnan(x)] - x = x[:, np.newaxis] - hist, bin_edges = np.histogram(x, bins=100, density=1) - # dh = np.diff(bin_edges[0:2]) - peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) - return {'pdf': hist, 'bins': bin_edges[0:-1], 'Npeaks': len(peaks)} - - # Squeeze traj file to the first predicted cycle (sim can have more than 1 cycle) - ds = self.obj.where((self.obj['cycle_number'] == cycle).compute(), drop=True) - ds = ds.compute() - - # Compute trajectories relative to the single/only real float initial position: - lon0, lat0 = self.obj.isel(obs=0)['lon'].values[0], self.obj.isel(obs=0)['lat'].values[0] - lon, lat = ds['lon'].values, ds['lat'].values - ds['lonc'] = xr.DataArray(lon - np.broadcast_to(lon[:, 0][:, np.newaxis], lon.shape) + lon0, - dims=['trajectory', 'obs']) - ds['latc'] = xr.DataArray(lat - np.broadcast_to(lat[:, 0][:, np.newaxis], lat.shape) + lat0, - dims=['trajectory', 'obs']) - - # Compute trajectory lengths: - ds['length'] = np.sqrt(ds.diff(dim='obs')['lon'] ** 2 + ds.diff(dim='obs')['lat'] ** 2).sum(dim='obs') - ds['lengthc'] = np.sqrt(ds.diff(dim='obs')['lonc'] ** 2 + ds.diff(dim='obs')['latc'] ** 2).sum(dim='obs') - - # Compute initial points pairwise distances, PDF and nb of peaks: - X = ds.isel(obs=0) - X = X.isel(trajectory=~np.isnan(X['lon'])) - X0 = np.array((X['lon'].values, X['lat'].values)).T - d0 = pairwise_distances(X0, n_jobs=-1) - d0 = np.triu(d0) - d0[d0 == 0] = np.nan - - x0 = d0.flatten() - x0 = x0[~np.isnan(x0)] - x0 = x0[:, np.newaxis] - - hist0, bin_edges0 = np.histogram(x0, bins=100, density=1) - dh0 = np.diff(bin_edges0[0:2]) - peaks0, _ = find_peaks(hist0 / np.max(hist0), height=.4, distance=20) - - # Compute final points pairwise distances, PDF and nb of peaks: - X = ds.isel(obs=-1) - X = X.isel(trajectory=~np.isnan(X['lon'])) - dsf = X - X = np.array((X['lon'].values, X['lat'].values)).T - d = pairwise_distances(X, n_jobs=-1) - d = np.triu(d) - d[d == 0] = np.nan - - x = d.flatten() - x = x[~np.isnan(x)] - x = x[:, np.newaxis] - - hist, bin_edges = np.histogram(x, bins=100, density=1) - dh = np.diff(bin_edges[0:2]) - peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) - - # Compute final points pairwise distances (relative traj), PDF and nb of peaks: - X1 = ds.isel(obs=-1) - X1 = X1.isel(trajectory=~np.isnan(X1['lonc'])) - dsfc = X1 - X1 = np.array((X1['lonc'].values, X1['latc'].values)).T - d1 = pairwise_distances(X1, n_jobs=-1) - d1 = np.triu(d1) - d1[d1 == 0] = np.nan - - x1 = d1.flatten() - x1 = x1[~np.isnan(x1)] - x1 = x1[:, np.newaxis] - - hist1, bin_edges1 = np.histogram(x1, bins=100, density=1) - dh1 = np.diff(bin_edges1[0:2]) - peaks1, _ = find_peaks(hist1 / np.max(hist1), height=.4, distance=20) - - # Compute the overlapping between the initial and relative state PDFs: - bin_unif = np.arange(0, np.max([bin_edges0, bin_edges1]), np.min([dh0, dh1])) - dh_unif = np.diff(bin_unif[0:2]) - hist0_unif = np.interp(bin_unif, bin_edges0[0:-1], hist0) - hist_unif = np.interp(bin_unif, bin_edges[0:-1], hist) - hist1_unif = np.interp(bin_unif, bin_edges1[0:-1], hist1) - - # Area under hist1 AND hist0: - # overlapping = np.sum(hist1_unif[hist0_unif >= hist1_unif]*dh_unif) - overlapping = np.sum(hist_unif[hist0_unif >= hist_unif] * dh_unif) - - # Ratio of the max PDF ranges: - # staggering = np.max(bin_edges1)/np.max(bin_edges0) - staggering = np.max(bin_edges) / np.max(bin_edges0) - - # Store metrics in a dict: - prediction_metrics = {} - - prediction_metrics['trajectory_lengths'] = {'median': np.nanmedian(ds['length'].values), - 'std': np.nanstd(ds['length'].values)} - - prediction_metrics['pairwise_distances'] = { - 'initial_state': {'median': np.nanmedian(d0), 'std': np.nanstd(d0), 'nPDFpeaks': len(peaks0)}, - 'final_state': {'median': np.nanmedian(d), 'std': np.nanstd(d), 'nPDFpeaks': len(peaks)}, - 'relative_state': {'median': np.nanmedian(d1), 'std': np.nanstd(d1), 'nPDFpeaks': len(peaks1)}, - 'overlapping': {'value': overlapping, - 'comment': 'Overlapping area between PDF(initial_state) and PDF(final_state)'}, - 'staggering': {'value': staggering, 'comment': 'Ratio of PDF(initial_state) vs PDF(final_state) ranges'}, - 'score': {'value': overlapping / len(peaks), 'comment': 'overlapping/nPDFpeaks(final_state)'}} - - if np.isinf(overlapping / len(peaks)): - raise ValueError("Can't compute the prediction score, infinity !") - - ratio = prediction_metrics['pairwise_distances']['final_state']['std'] / \ - prediction_metrics['pairwise_distances']['initial_state']['std'] - prediction_metrics['pairwise_distances']['std_ratio'] = ratio - - # Figure: - if show_plot: - backend = matplotlib.get_backend() - if this_args is not None and this_args.json: - matplotlib.use('Agg') - - fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(18, 10), dpi=90) - ax, ix = ax.flatten(), -1 - cmap = plt.cm.coolwarm - - ix += 1 - dd = dsf['length'].values - ax[ix].plot(X0[:, 0], X0[:, 1], '.', markersize=3, color='grey', alpha=0.5, markeredgecolor=None, zorder=0) - ax[ix].scatter(X[:, 0], X[:, 1], c=dd, zorder=10, s=3, cmap=cmap) - ax[ix].grid() - this_traj = int(dsf.isel(trajectory=np.argmax(dd))['trajectory'].values[np.newaxis][0]) - ax[ix].plot(ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lon'], - ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lat'], 'r', - zorder=13, label='Longest traj.') - this_traj = int(dsf.isel(trajectory=np.argmin(dd))['trajectory'].values[np.newaxis][0]) - ax[ix].plot(ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lon'], - ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lat'], 'b', - zorder=13, label='Shortest traj.') - ax[ix].legend() - ax[ix].set_title('Trajectory lengths') - - ix += 1 - ax[ix].plot(bin_edges0[0:-1], hist0, label='Initial (%i peak)' % len(peaks0), color='gray') - ax[ix].plot(bin_edges[0:-1], hist, label='Final (%i peak)' % len(peaks), color='lightblue') - ax[ix].plot(bin_edges[peaks], hist[peaks], "x", label='Peaks') - ax[ix].legend() - ax[ix].grid() - ax[ix].set_xlabel('Pairwise distance [degree]') - line1 = "Staggering: %0.4f" % staggering - line2 = "Overlapping: %0.4f" % overlapping - line3 = "Score: %0.4f" % (overlapping / len(peaks)) - ax[ix].set_title("Pairwise distances PDF: [%s / %s / %s]" % (line1, line2, line3)) - - if this_args is not None: - line0 = "VirtualFleet recovery swarm simulation for WMO %i, starting from cycle %i, predicting cycle %i\n%s" % \ - (this_args.wmo, this_args.cyc[0] - 1, this_args.cyc[0], get_cfg_str(this_cfg)) - line1 = "Simulation made with %s and %i virtual floats" % (this_args.velocity, this_args.nfloats) - else: - line0 = "VirtualFleet recovery swarm simulation for cycle %i" % cycle - line1 = "Simulation made with %i virtual floats" % (self.n_floats) - - fig.suptitle("%s\n%s" % (line0, line1), fontsize=15) - plt.tight_layout() - - if save_figure: - if sim_suffix is not None: - filename = 'vfrecov_metrics01_%s_cyc%i' % (sim_suffix, cycle) - else: - filename = 'vfrecov_metrics01_cyc%i' % (cycle) - save_figurefile(fig, filename, workdir) - - if this_args is not None and this_args.json: - matplotlib.use(backend) - - if show_plot: - return prediction_metrics, fig, ax - else: - return prediction_metrics - - -class SimPredictor_0: - """ - - Examples - -------- - T = Trajectories(traj_zarr_file) - df = T.get_index().add_distances() - - SP = SimPredictor(df) - SP.fit_predict() - SP.add_metrics(VFvelocity) - SP.bbox() - SP.plot_predictions(VFvelocity) - SP.plan - SP.n_cycles - SP.trajectory - SP.prediction - """ - - def __init__(self, df_sim: pd.DataFrame, df_obs: pd.DataFrame): - self.swarm = df_sim - self.obs = df_obs - # self.set_weights() - self.WMO = np.unique(df_obs['wmo'])[0] - self._json = None - - def __repr__(self): - summary = [""] - summary.append("Simulation target: %i / %i" % (self.WMO, self.sim_cycles[0])) - summary.append("Swarm size: %i floats" % len(np.unique(self.swarm['wmo']))) - summary.append("Number of simulated cycles: %i profile(s) for cycle number(s): [%s]" % ( - self.n_cycles, ",".join([str(c) for c in self.sim_cycles]))) - summary.append("Observed reference: %i profile(s) for cycle number(s): [%s]" % ( - self.obs.shape[0], ",".join([str(c) for c in self.obs_cycles]))) - return "\n".join(summary) - - @property - def n_cycles(self): - """Number of simulated cycles""" - return len(np.unique(self.swarm['cyc'])) - # return len(self.sim_cycles) - - @property - def obs_cycles(self): - """Observed cycle numbers""" - return np.unique(self.obs['cyc']) - - @property - def sim_cycles(self): - """Simulated cycle numbers""" - return self.obs_cycles[0] + 1 + range(self.n_cycles) - - @property - def plan(self) -> pd.DataFrame: - if not hasattr(self, '_plan'): - df_plan = self.swarm[self.swarm['cyc'] == 1][['date', 'deploy_lon', 'deploy_lat']] - df_plan = df_plan.rename(columns={'deploy_lon': 'longitude', 'deploy_lat': 'latitude'}) - self._plan = df_plan - return self._plan - - @property - def trajectory(self): - """Return the predicted trajectory as a simple :class:`np.array` - - First row is longitude, 2nd is latitude and 3rd is date of simulated profiles - - Return - ------ - :class:`np.array` - - """ - if self._json is None: - raise ValueError("Please call `fit_predict` first") - - traj_prediction = np.array([self.obs['longitude'].values[0], - self.obs['latitude'].values[0], - self.obs['date'].values[0]])[ - np.newaxis] # Starting point where swarm was deployed - for cyc in self._json['predictions'].keys(): - xpred = self._json['predictions'][cyc]['location']['longitude'] - ypred = self._json['predictions'][cyc]['location']['latitude'] - tpred = pd.to_datetime(self._json['predictions'][cyc]['location']['time']) - traj_prediction = np.concatenate((traj_prediction, - np.array([xpred, ypred, tpred])[np.newaxis]), - axis=0) - return traj_prediction - - @property - def predictions(self): - if self._json is None: - raise ValueError("Please call `fit_predict` first") - return self._json - - def bbox(self, s: float = 1) -> list: - """Get a bounding box for maps - - Parameters - ---------- - s: float, default:1 - - Returns - ------- - list - """ - df_sim = self.swarm - df_obs = self.obs - - box = [np.min([df_sim['deploy_lon'].min(), - df_sim['longitude'].min(), - df_sim['rel_lon'].min(), - df_obs['longitude'].min()]), - np.max([df_sim['deploy_lon'].max(), - df_sim['longitude'].max(), - df_sim['rel_lon'].max(), - df_obs['longitude'].max()]), - np.min([df_sim['deploy_lat'].min(), - df_sim['latitude'].min(), - df_sim['rel_lat'].min(), - df_obs['latitude'].min()]), - np.max([df_sim['deploy_lat'].max(), - df_sim['latitude'].max(), - df_sim['rel_lat'].max(), - df_obs['latitude'].max()])] - rx, ry = box[1] - box[0], box[3] - box[2] - r = np.min([rx, ry]) - ebox = [box[0] - s * r, box[1] + s * r, box[2] - s * r, box[3] + s * r] - - return ebox - -class SimPredictor_1(SimPredictor_0): - - def set_weights(self, scale: float = 20): - """Compute weights for predictions - - Add weights column to swarm :class:`pandas.DataFrame` as a gaussian distance - with a std based on the size of the deployment domain - - Parameters - ---------- - scale: float (default=20.) - """ - rx, ry = self.plan['longitude'].max() - self.plan['longitude'].min(), \ - self.plan['latitude'].max() - self.plan['latitude'].min() - r = np.min([rx, ry]) # Minimal size of the deployment domain - weights = np.exp(-(self.swarm['distance_origin'] ** 2) / (r / scale)) - weights[np.isnan(weights)] = 0 - self.swarm['weights'] = weights - return self - - def fit_predict(self, weights_scale: float = 20.) -> dict: - """Predict profile positions from simulated float swarm - - Prediction is based on a :class:`klearn.neighbors._kde.KernelDensity` estimate of the N_FLOATS - simulated, weighted by their deployment distance to the observed previous cycle position. - - Parameters - ---------- - weights_scale: float (default=20) - Scale (in deg) to use to weight the deployment distance to the observed previous cycle position - - Returns - ------- - dict - """ - - def blank_prediction() -> dict: - return {'location': { - 'longitude': None, - 'latitude': None, - 'time': None}, - 'cycle_number': None, - 'wmo': int(self.WMO), - } - - # Compute weights of the swarm float profiles locations - self.set_weights(scale=weights_scale) - - self._prediction_data = {'weights_scale': weights_scale, 'cyc': {}} - - cycles = np.unique(self.swarm['cyc']).astype(int) # 1, 2, ... - recovery_predictions = {} - for icyc, this_sim_cyc in enumerate(cycles): - this_cyc_df = self.swarm[self.swarm['cyc'] == this_sim_cyc] - weights = this_cyc_df['weights'] - x, y = this_cyc_df['rel_lon'], this_cyc_df['rel_lat'] - - w = weights / np.max(np.abs(weights), axis=0) - X = np.array([x, y]).T - kde = KernelDensity(kernel='gaussian', bandwidth=0.15).fit(X, sample_weight=w) - - xg, yg = (np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 100), - np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), 100)) - xg, yg = np.meshgrid(xg, yg) - Xg = np.array([xg.flatten(), yg.flatten(), ]).T - llh = kde.score_samples(Xg) - xpred = Xg[np.argmax(llh), 0] - ypred = Xg[np.argmax(llh), 1] - tpred = this_cyc_df['date'].mean() - - # Store results - recovery = blank_prediction() - recovery['location']['longitude'] = xpred - recovery['location']['latitude'] = ypred - recovery['location']['time'] = tpred.isoformat() - recovery['cycle_number'] = int(self.sim_cycles[icyc]) - recovery['virtual_cycle_number'] = int(self.sim_cycles[icyc]) - recovery_predictions.update({int(this_sim_cyc): recovery}) - - # - self._prediction_data['cyc'].update({this_sim_cyc: {'weights': this_cyc_df['weights']}}) - - # Store results internally - self._json = {'predictions': recovery_predictions} - - # Add more stuff to internal storage: - self._predict_errors() - self._add_ref() - self.add_metrics() - - # - return self - - -class SimPredictor_2(SimPredictor_1): - - def _predict_errors(self) -> dict: - """Compute error metrics for the predicted positions - - This is for past cycles, for which we have observed positions of the predicted profiles - - This adds more keys to self._json['predictions'] created by the fit_predict method - - Returns - ------- - dict - """ - - def blank_error(): - return {'distance': {'value': None, - 'unit': 'km'}, - 'bearing': {'value': None, - 'unit': 'degree'}, - 'time': {'value': None, - 'unit': 'hour'} - } - - cyc0 = self.obs_cycles[0] - if self._json is None: - raise ValueError("Please call `fit_predict` first") - recovery_predictions = self._json['predictions'] - - for sim_c in recovery_predictions.keys(): - this_prediction = recovery_predictions[sim_c] - if sim_c + cyc0 in self.obs_cycles: - error = blank_error() - - this_obs_profile = self.obs[self.obs['cyc'] == sim_c + cyc0] - xobs = this_obs_profile['longitude'].iloc[0] - yobs = this_obs_profile['latitude'].iloc[0] - tobs = this_obs_profile['date'].iloc[0] - - prev_obs_profile = self.obs[self.obs['cyc'] == sim_c + cyc0 - 1] - xobs0 = prev_obs_profile['longitude'].iloc[0] - yobs0 = prev_obs_profile['latitude'].iloc[0] - - xpred = this_prediction['location']['longitude'] - ypred = this_prediction['location']['latitude'] - tpred = pd.to_datetime(this_prediction['location']['time']) - - dd = haversine(xobs, yobs, xpred, ypred) - error['distance']['value'] = dd - - observed_bearing = bearing(xobs0, yobs0, xobs, yobs) - sim_bearing = bearing(xobs0, yobs0, xpred, ypred) - error['bearing']['value'] = sim_bearing - observed_bearing - - dt = pd.Timedelta(tpred - tobs) / np.timedelta64(1, 's') - # print(tpred, tobs, pd.Timedelta(tpred - tobs)) - error['time']['value'] = dt / 3600 # From seconds to hours - - this_prediction['location_error'] = error - recovery_predictions.update({sim_c: this_prediction}) - - self._json.update({'predictions': recovery_predictions}) - return self - - def _add_ref(self): - """Add observations data to internal data structure - - This adds more keys to self._json['predictions'] created by the fit_predict method - - """ - if self._json is None: - raise ValueError("Please call `predict` first") - - # Observed profiles that were simulated: - profiles_to_predict = [] - for cyc in self.sim_cycles: - this = {'wmo': int(self.WMO), - 'cycle_number': int(cyc), - 'url_float': argoplot.dashboard(self.WMO, url_only=True), - 'url_profile': "", - 'location': {'longitude': None, - 'latitude': None, - 'time': None} - } - if cyc in self.obs_cycles: - this['url_profile'] = get_ea_profile_page_url(self.WMO, cyc) - this_df = self.obs[self.obs['cyc'] == cyc] - this['location']['longitude'] = this_df['longitude'].iloc[0] - this['location']['latitude'] = this_df['latitude'].iloc[0] - this['location']['time'] = this_df['date'].iloc[0].isoformat() - profiles_to_predict.append(this) - - self._json.update({'observations': profiles_to_predict}) - - # Observed profile used as initial conditions to the simulation: - cyc = self.obs_cycles[0] - this_df = self.obs[self.obs['cyc'] == cyc] - self._json.update({'initial_profile': {'wmo': int(self.WMO), - 'cycle_number': int(cyc), - 'url_float': argoplot.dashboard(self.WMO, url_only=True), - 'url_profile': get_ea_profile_page_url(self.WMO, cyc), - 'location': {'longitude': this_df['longitude'].iloc[0], - 'latitude': this_df['latitude'].iloc[0], - 'time': this_df['date'].iloc[0].isoformat() - } - }}) - - # - return self - - def add_metrics(self, VFvel=None): - """Compute more metrics to understand the prediction error - - 1. Compute a transit time to cover the distance error - (assume a 12 kts boat speed with 1 kt = 1.852 km/h) - - 1. Compute the possible drift due to the time lag between the predicted profile timing and the expected one - - This adds more keys to self._json['predictions'] created by the fit_predict method - - """ - cyc0 = self.obs_cycles[0] - if self._json is None: - raise ValueError("Please call `predict` first") - recovery_predictions = self._json['predictions'] - - for sim_c in recovery_predictions.keys(): - this_prediction = recovery_predictions[sim_c] - if sim_c + cyc0 in self.obs_cycles and 'location_error' in this_prediction.keys(): - - error = this_prediction['location_error'] - metrics = {} - - # Compute a transit time to cover the distance error: - metrics['transit'] = {'value': None, - 'unit': 'hour', - 'comment': 'Transit time to cover the distance error ' - '(assume a 12 kts boat speed with 1 kt = 1.852 km/h)'} - - if error['distance']['value'] is not None: - metrics['transit']['value'] = pd.Timedelta(error['distance']['value'] / (12 * 1.852), - 'h').seconds / 3600. - - # Compute the possible drift due to the time lag between the predicted profile timing and the expected one: - if VFvel is not None: - xpred = this_prediction['location']['longitude'] - ypred = this_prediction['location']['latitude'] - tpred = this_prediction['location']['time'] - dsc = VFvel.field.interp( - {VFvel.dim['lon']: xpred, - VFvel.dim['lat']: ypred, - VFvel.dim['time']: tpred, - VFvel.dim['depth']: - VFvel.field[{VFvel.dim['depth']: 0}][VFvel.dim['depth']].values[np.newaxis][0]} - ) - velc = np.sqrt(dsc[VFvel.var['U']] ** 2 + dsc[VFvel.var['V']] ** 2).values[np.newaxis][0] - metrics['surface_drift'] = {'value': None, - 'unit': 'km', - 'surface_currents_speed': None, - 'surface_currents_speed_unit': 'm/s', - 'comment': 'Drift by surface currents due to the float ascent time error ' - '(difference between simulated profile time and the observed one).'} - if error['time']['value'] is not None: - metrics['surface_drift']['value'] = (error['time']['value'] * 3600 * velc / 1e3) - metrics['surface_drift']['surface_currents_speed'] = velc - - # - this_prediction['metrics'] = metrics - recovery_predictions.update({sim_c: this_prediction}) - - self._json.update({"predictions": recovery_predictions}) - return self - - -class SimPredictor_3(SimPredictor_2): - - def plot_predictions(self, - VFvel, - cfg, - sim_suffix='', # get_sim_suffix(this_args, cfg) - s=0.2, - alpha=False, - save_figure=False, - workdir='.', - figsize=None, - dpi=120, - orient='portrait'): - ebox = self.bbox(s=s) - pred_traj = self.trajectory - - if orient == 'portrait': - if self.n_cycles == 1: - nrows, ncols = 2, 1 - if figsize is None: - figsize = (5, 5) - else: - nrows, ncols = self.n_cycles, 2 - if figsize is None: - figsize = (5, (self.n_cycles-1)*5) - else: - if self.n_cycles == 1: - nrows, ncols = 1, 2 - else: - nrows, ncols = 2, self.n_cycles - if figsize is None: - figsize = (ncols*5, 5) - - def plot_this(this_ax, i_cycle, ip): - df_sim = self.swarm[self.swarm['cyc'] == i_cycle + 1] - weights = self._prediction_data['cyc'][i_cycle + 1]['weights'].values - if self.sim_cycles[i_cycle] in self.obs_cycles: - this_profile = self.obs[self.obs['cyc'] == self.sim_cycles[i_cycle]] - else: - this_profile = None - - xpred = self.predictions['predictions'][i_cycle + 1]['location']['longitude'] - ypred = self.predictions['predictions'][i_cycle + 1]['location']['latitude'] - - this_ax.set_extent(ebox) - this_ax = map_add_features(ax[ix]) - - v = VFvel.field.isel(time=0).interp(depth=cfg.mission['parking_depth']) - v.plot.quiver(x="longitude", - y="latitude", - u=VFvel.var['U'], - v=VFvel.var['V'], - ax=this_ax, - color='grey', - alpha=0.5, - scale=5, - add_guide=False) - - this_ax.plot(df_sim['deploy_lon'], df_sim['deploy_lat'], '.', - markersize=3, - color='grey', - alpha=0.1, - markeredgecolor=None, - zorder=0) - - this_ax.plot(pred_traj[:, 0], pred_traj[:, 1], color='k', linewidth=1, marker='+') - this_ax.plot(xpred, ypred, color='g', marker='+') - - w = weights / np.max(np.abs(weights), axis=0) - ii = np.argsort(w) - cmap = plt.cm.cool - # cmap = plt.cm.Reds - - if ip == 0: - x, y = df_sim['deploy_lon'], df_sim['deploy_lat'] - title = 'Initial virtual float positions' - if not alpha: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - else: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - alpha=w[ii], - marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - elif ip == 1: - x, y = df_sim['longitude'], df_sim['latitude'] - title = 'Final virtual float positions' - if not alpha: - this_ax.scatter(x, y, c=w, marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - else: - this_ax.scatter(x, y, c=w, marker='o', s=4, alpha=w, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - elif ip == 2: - x, y = df_sim['rel_lon'], df_sim['rel_lat'] - title = 'Final virtual floats positions relative to observed float' - if not alpha: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - else: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - marker='o', s=4, alpha=w[ii], edgecolor=None, vmin=0, vmax=1, cmap=cmap) - - # Display full trajectory prediction: - if ip != 0 and this_profile is not None: - this_ax.arrow(this_profile['longitude'].iloc[0], - this_profile['latitude'].iloc[0], - xpred - this_profile['longitude'].iloc[0], - ypred - this_profile['latitude'].iloc[0], - length_includes_head=True, fc='k', ec='c', head_width=0.025, zorder=10) - this_ax.plot(xpred, ypred, 'k+', zorder=10) - - this_ax.set_title("") - # this_ax.set_ylabel("Cycle %i predictions" % (i_cycle+1)) - this_ax.set_title("%s\nCycle %i predictions" % (title, self.sim_cycles[i_cycle]), fontsize=6) - - fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, dpi=dpi, - subplot_kw={'projection': ccrs.PlateCarree()}, - sharex=True, sharey=True) - ax, ix = ax.flatten(), -1 - - if orient == 'portrait': - rows = range(self.n_cycles) - cols = [1, 2] - else: - rows = [1, 2] - cols = range(self.n_cycles) - - if orient == 'portrait': - for i_cycle in rows: - for ip in cols: - ix += 1 - plot_this(ax[ix], i_cycle, ip) - else: - for ip in rows: - for i_cycle in cols: - ix += 1 - plot_this(ax[ix], i_cycle, ip) - - # log.debug("Start to write metrics string") - # - # xpred = SP.prediction[i_cycle + 1]['location']['longitude']['value'] - # - # err = recovery['prediction_location_error'] - # met = recovery['prediction_metrics'] - # if this_profile.shape[0] > 1: - # # err_str = "Prediction vs Truth: [%0.2fkm, $%0.2f^o$]" % (err['distance'], err['bearing']) - # err_str = "Prediction errors: [dist=%0.2f%s, bearing=$%0.2f^o$, time=%s]\n" \ - # "Distance error represents %s of transit at 12kt" % (err['distance']['value'], - # err['distance']['unit'], - # err['bearing']['value'], - # strfdelta(pd.Timedelta(err['time']['value'], 'h'), - # "{hours}H{minutes:02d}"), - # strfdelta(pd.Timedelta(met['transit']['value'], 'h'), - # "{hours}H{minutes:02d}")) - # else: - # err_str = "" - # - # fig.suptitle("VirtualFleet recovery prediction for WMO %i: \ - # starting from cycle %i, predicting cycle %i\n%s\n%s\n%s" % - # (wmo, cyc[0], cyc[1], get_cfg_str(cfg), err_str, "Prediction based on %s" % vel_name), fontsize=15) - - plt.tight_layout() - if save_figure: - save_figurefile(fig, 'vfrecov_predictions_%s' % sim_suffix, workdir) - - return fig, ax - - -class SimPredictor(SimPredictor_3): - - def to_json(self, fp=None): - kw = {'indent': 4, 'sort_keys': True, 'default': str} - if fp is not None: - if hasattr(fp, 'write'): - json.dump(self._json, fp, **kw) - else: - with open(fp, 'w') as f: - json.dump(self._json, f, **kw) - else: - results_js = json.dumps(self._json, **kw) - return results_js - - -def get_ea_profile_page_url(wmo, cyc): - try: - url = argoplot.dashboard(wmo, cyc, url_only=True) - except: - log.info("EA dashboard page not available for this profile: %i/%i" % (wmo, cyc)) - url = "404" - return url - - def setup_args(): icons_help_string = """This script can be used to make prediction of a specific float cycle position. This script can be used on past or unknown float cycles. @@ -1366,272 +82,6 @@ def setup_args(): return parser -def get_sim_suffix(this_args, this_cfg): - """Compose a string suffix for output files""" - # suf = '%s_%i' % (this_args.velocity, this_args.nfloats) - suf = 'VEL%s_NF%i_CYCDUR%i_PARKD%i_PROFD%i_SFD%i' % (this_args.velocity, - this_args.nfloats, - int(this_cfg.mission['cycle_duration']), - int(this_cfg.mission['parking_depth']), - int(this_cfg.mission['profile_depth']), - int(this_cfg.mission['reco_free_surface_drift'])) - return suf - - -def predictor(args): - """Prediction manager""" - execution_start = time.time() - process_start = time.process_time() - - if is_wmo(args.wmo): - WMO = args.wmo - if is_cyc(args.cyc): - CYC = [check_cyc(args.cyc)[0]-1] - [CYC.append(c) for c in check_cyc(args.cyc)] - if args.velocity not in ['ARMOR3D', 'GLORYS']: - raise ValueError("Velocity field must be one in: ['ARMOR3D', 'GLORYS']") - else: - VEL_NAME = args.velocity.upper() - - puts('CYC = %s' % CYC, color=COLORS.magenta) - # raise ValueError('stophere') - - if args.save_figure: - mplbackend = matplotlib.get_backend() - matplotlib.use('Agg') - - # Where do we find the VirtualFleet repository ? - if not args.vf: - if os.uname()[1] == 'data-app-virtualfleet-recovery': - euroargodev = os.path.expanduser('/home/ubuntu') - else: - euroargodev = os.path.expanduser('~/git/github/euroargodev') - else: - euroargodev = os.path.abspath(args.vf) - if not os.path.exists(os.path.join(euroargodev, "VirtualFleet")): - raise ValueError("VirtualFleet can't be found at '%s'" % euroargodev) - - # Import the VirtualFleet library - sys.path.insert(0, os.path.join(euroargodev, "VirtualFleet")) - from virtualargofleet import Velocity, VirtualFleet, FloatConfiguration, ConfigParam - # from virtualargofleet.app_parcels import ArgoParticle - - # Set up the working directory: - if not args.output: - WORKDIR = os.path.sep.join([get_package_dir(), "webapi", "myapp", "static", "data", str(WMO), str(CYC[1])]) - else: - WORKDIR = os.path.sep.join([args.output, str(WMO), str(CYC[1])]) - WORKDIR = os.path.abspath(WORKDIR) - if not os.path.exists(WORKDIR): - os.makedirs(WORKDIR) - args.output = WORKDIR - - if not args.json: - puts("\nData will be saved in:") - puts("\t%s" % WORKDIR, color=COLORS.green) - - # Set-up logger - logging.basicConfig( - level=logging.DEBUG, - format=DEBUGFORMATTER, - datefmt='%m/%d/%Y %I:%M:%S %p', - handlers=[logging.FileHandler(os.path.join(WORKDIR, "vfpred.log"), mode='a')] - ) - - # Load these profiles' information: - if not args.json: - puts("\nYou can check this float dashboard while we prepare the prediction:") - puts("\t%s" % argoplot.dashboard(WMO, url_only=True), color=COLORS.green) - puts("\nLoading float profiles index ...") - host = "https://data-argo.ifremer.fr" - # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" - # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" - THIS_PROFILE = store(host=host).search_wmo_cyc(WMO, CYC).to_dataframe() - THIS_DATE = pd.to_datetime(THIS_PROFILE['date'].values[0], utc=True) - CENTER = [THIS_PROFILE['longitude'].values[0], THIS_PROFILE['latitude'].values[0]] - if not args.json: - puts("\nProfiles to work with:") - puts(THIS_PROFILE.to_string(max_colwidth=15), color=COLORS.green) - if THIS_PROFILE.shape[0] == 1: - puts('\nReal-case scenario: True position unknown !', color=COLORS.yellow) - else: - puts('\nEvaluation scenario: historical position known', color=COLORS.yellow) - - # Load real float configuration at the previous cycle: - if not args.json: - puts("\nLoading float configuration...") - try: - CFG = FloatConfiguration([WMO, CYC[0]]) - except: - if not args.json: - puts("Can't load this profile config, falling back on default values", color=COLORS.red) - CFG = FloatConfiguration('default') - - if args.cfg_parking_depth is not None: - puts("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], - float(args.cfg_parking_depth))) - CFG.update('parking_depth', float(args.cfg_parking_depth)) - - if args.cfg_cycle_duration is not None: - puts("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], - float(args.cfg_cycle_duration))) - CFG.update('cycle_duration', float(args.cfg_cycle_duration)) - - if args.cfg_profile_depth is not None: - puts("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], - float(args.cfg_profile_depth))) - CFG.update('profile_depth', float(args.cfg_profile_depth)) - - CFG.params = ConfigParam(key='reco_free_surface_drift', - value=int(args.cfg_free_surface_drift), - unit='cycle', - description='First cycle with free surface drift', - dtype=int) - - # Save virtual float configuration on file: - CFG.to_json(os.path.join(WORKDIR, "floats_configuration_%s.json" % get_sim_suffix(args, CFG))) - - if not args.json: - puts("\n".join(["\t%s" % line for line in CFG.__repr__().split("\n")]), color=COLORS.green) - - # Get the cycling frequency (in days, this is more a period then...): - CYCLING_FREQUENCY = int(np.round(CFG.mission['cycle_duration']/24)) - - # Define domain to load velocity for, and get it: - width = args.domain_size + np.abs(np.ceil(THIS_PROFILE['longitude'].values[-1] - CENTER[0])) - height = args.domain_size + np.abs(np.ceil(THIS_PROFILE['latitude'].values[-1] - CENTER[1])) - VBOX = [CENTER[0] - width / 2, CENTER[0] + width / 2, CENTER[1] - height / 2, CENTER[1] + height / 2] - N_DAYS = (len(CYC)-1)*CYCLING_FREQUENCY+1 - if not args.json: - puts("\nLoading %s velocity field to cover %i days..." % (VEL_NAME, N_DAYS)) - ds_vel, velocity_file = get_velocity_field(VBOX, THIS_DATE, - n_days=N_DAYS, - output=WORKDIR, - dataset=VEL_NAME) - VEL = Velocity(model='GLORYS12V1' if VEL_NAME == 'GLORYS' else VEL_NAME, src=ds_vel) - if not args.json: - puts("\n\t%s" % str(ds_vel), color=COLORS.green) - puts("\n\tLoaded velocity field from %s to %s" % - (pd.to_datetime(ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), - pd.to_datetime(ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")), color=COLORS.green) - figure_velocity(VBOX, VEL, VEL_NAME, THIS_PROFILE, WMO, CYC, save_figure=args.save_figure, workdir=WORKDIR) - - # raise ValueError('stophere') - - # VirtualFleet, get a deployment plan: - if not args.json: - puts("\nVirtualFleet, get a deployment plan...") - DF_PLAN = setup_deployment_plan(CENTER, THIS_DATE, nfloats=args.nfloats) - PLAN = {'lon': DF_PLAN['longitude'], - 'lat': DF_PLAN['latitude'], - 'time': np.array([np.datetime64(t) for t in DF_PLAN['date'].dt.strftime('%Y-%m-%d %H:%M').array]), - } - if not args.json: - puts("\t%i virtual floats to deploy" % DF_PLAN.shape[0], color=COLORS.green) - - # Set up VirtualFleet: - if not args.json: - puts("\nVirtualFleet, set-up the fleet...") - VFleet = VirtualFleet(plan=PLAN, - fieldset=VEL, - mission=CFG) - - # VirtualFleet, execute the simulation: - if not args.json: - puts("\nVirtualFleet, execute the simulation...") - - # Remove traj file if exists: - output_path = os.path.join(WORKDIR, 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) - # if os.path.exists(output_path): - # shutil.rmtree(output_path) - # - # VFleet.simulate(duration=timedelta(hours=N_DAYS*24+1), - # step=timedelta(minutes=5), - # record=timedelta(minutes=30), - # output=True, - # output_folder=WORKDIR, - # output_file='trajectories_%s.zarr' % get_sim_suffix(args, CFG), - # verbose_progress=not args.json, - # ) - - # VirtualFleet, get simulated profiles index: - if not args.json: - puts("\nExtract swarm profiles index...") - - T = Trajectories(WORKDIR + "/" + 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) - DF_SIM = T.get_index().add_distances(origin=[THIS_PROFILE['longitude'].values[0], THIS_PROFILE['latitude'].values[0]]) - if not args.json: - puts(str(T), color=COLORS.magenta) - puts(DF_SIM.head().to_string(), color=COLORS.green) - figure_positions(args, VEL, DF_SIM, DF_PLAN, THIS_PROFILE, CFG, WMO, CYC, VEL_NAME, - dd=1, save_figure=args.save_figure, workdir=WORKDIR) - - # Recovery, make predictions based on simulated profile density: - SP = SimPredictor(DF_SIM, THIS_PROFILE) - if not args.json: - puts("\nPredict float cycle position(s) from swarm simulation...", color=COLORS.white) - puts(str(SP), color=COLORS.magenta) - SP.fit_predict() - SP.add_metrics(VEL) - SP.plot_predictions(VEL, - CFG, - sim_suffix=get_sim_suffix(args, CFG), - save_figure=args.save_figure, - workdir=WORKDIR, - orient='portrait') - results = SP.predictions - - # Recovery, compute more swarm metrics: - for this_cyc in T.sim_cycles: - jsmetrics, fig, ax = T.analyse_pairwise_distances(cycle=this_cyc, - save_figure=True, - this_args=args, - this_cfg=CFG, - sim_suffix=get_sim_suffix(args, CFG), - workdir=WORKDIR, - ) - if 'metrics' in results['predictions'][this_cyc]: - for key in jsmetrics.keys(): - results['predictions'][this_cyc]['metrics'].update({key: jsmetrics[key]}) - else: - results['predictions'][this_cyc].update({'metrics': jsmetrics}) - - # Recovery, finalize JSON output: - execution_end = time.time() - process_end = time.process_time() - computation = { - 'Date': pd.to_datetime('now', utc=True), - 'Wall-time': pd.Timedelta(execution_end - execution_start, 's'), - 'CPU-time': pd.Timedelta(process_end - process_start, 's'), - 'system': getSystemInfo() - } - results['meta'] = {'Velocity field': VEL_NAME, - 'Nfloats': args.nfloats, - 'Computation': computation, - 'VFloats_config': CFG.to_json(), - } - - if not args.json: - puts("\nPredictions:") - results_js = json.dumps(results, indent=4, sort_keys=True, default=str) - - with open(os.path.join(WORKDIR, 'prediction_%s.json' % get_sim_suffix(args, CFG)), 'w', encoding='utf-8') as f: - json.dump(results, f, ensure_ascii=False, indent=4, default=str, sort_keys=True) - - if not args.json: - puts(results_js, color=COLORS.green) - puts("\nCheck results at:") - puts("\t%s" % WORKDIR, color=COLORS.green) - - if args.save_figure: - plt.close('all') - # Restore Matplotlib backend - matplotlib.use(mplbackend) - - if not args.save_sim: - shutil.rmtree(output_path) - - return results_js if __name__ == '__main__': diff --git a/vfrecovery/command_line_interface/group_predict.py b/vfrecovery/command_line_interface/group_predict.py index c3ec036..e6ee196 100644 --- a/vfrecovery/command_line_interface/group_predict.py +++ b/vfrecovery/command_line_interface/group_predict.py @@ -1,6 +1,6 @@ import click from typing import Union, List -from vfrecovery.core_functions.predict import predict_function +from vfrecovery.core.predict import predict_function @click.group() def cli_group_predict() -> None: diff --git a/vfrecovery/core/__init__.py b/vfrecovery/core/__init__.py new file mode 100644 index 0000000..2fbc35d --- /dev/null +++ b/vfrecovery/core/__init__.py @@ -0,0 +1,4 @@ +# from deployment_plan import setup_deployment_plan +# from trajfile_handler import Trajectories +# from simulation_handler import SimPredictor +# from predict import predict_function diff --git a/vfrecovery/core/deployment_plan.py b/vfrecovery/core/deployment_plan.py new file mode 100644 index 0000000..33fc1a9 --- /dev/null +++ b/vfrecovery/core/deployment_plan.py @@ -0,0 +1,42 @@ +import numpy as np +import pandas as pd + + +def setup_deployment_plan(a_profile, a_date, nfloats=15000): + # We will deploy a collection of virtual floats that are located around the real float with random perturbations in space and time + + # Amplitude of the profile position perturbations in the zonal (deg), meridional (deg), and temporal (hours) directions: + rx = 0.5 + ry = 0.5 + rt = 0 + + # + lonc, latc = a_profile + # box = [lonc - rx / 2, lonc + rx / 2, latc - ry / 2, latc + ry / 2] + + a, b = lonc - rx / 2, lonc + rx / 2 + lon = (b - a) * np.random.random_sample((nfloats,)) + a + + a, b = latc - ry / 2, latc + ry / 2 + lat = (b - a) * np.random.random_sample((nfloats,)) + a + + a, b = 0, rt + dtim = (b - a) * np.random.random_sample((nfloats,)) + a + dtim = np.round(dtim).astype(int) + tim = pd.to_datetime([a_date + np.timedelta64(dt, 'h') for dt in dtim]) + # dtim = (b-a) * np.random.random_sample((nfloats, )) + a + # dtim = np.round(dtim).astype(int) + # tim2 = pd.to_datetime([this_date - np.timedelta64(dt, 'h') for dt in dtim]) + # tim = np.sort(np.concatenate([tim2, tim1])) + + # Round time to the o(5mins), same as step=timedelta(minutes=5) in the simulation params + tim = tim.round(freq='5min') + + # + df = pd.DataFrame( + [tim, lat, lon, np.arange(0, nfloats) + 9000000, np.full_like(lon, 0), ['VF' for l in lon], ['?' for l in lon]], + index=['date', 'latitude', 'longitude', 'wmo', 'cycle_number', 'institution_code', 'file']).T + df['date'] = pd.to_datetime(df['date']) + + return df + diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py new file mode 100644 index 0000000..d38f11f --- /dev/null +++ b/vfrecovery/core/predict.py @@ -0,0 +1,278 @@ + +def predict_function( + wmo: int, + cyc: int, + n_predictions: int = 1, +): + """ + Execute VirtualFleet-Recovery predictor and return results as a JSON string + + Parameters + ---------- + wmo + cyc + n_predictions + + Returns + ------- + data + + """ # noqa + return {'wmo': wmo, 'cyc': cyc} + + +def predictor(args): + """Prediction manager""" + execution_start = time.time() + process_start = time.process_time() + + if is_wmo(args.wmo): + WMO = args.wmo + if is_cyc(args.cyc): + CYC = [check_cyc(args.cyc)[0]-1] + [CYC.append(c) for c in check_cyc(args.cyc)] + if args.velocity not in ['ARMOR3D', 'GLORYS']: + raise ValueError("Velocity field must be one in: ['ARMOR3D', 'GLORYS']") + else: + VEL_NAME = args.velocity.upper() + + puts('CYC = %s' % CYC, color=COLORS.magenta) + # raise ValueError('stophere') + + if args.save_figure: + mplbackend = matplotlib.get_backend() + matplotlib.use('Agg') + + # Where do we find the VirtualFleet repository ? + if not args.vf: + if os.uname()[1] == 'data-app-virtualfleet-recovery': + euroargodev = os.path.expanduser('/home/ubuntu') + else: + euroargodev = os.path.expanduser('~/git/github/euroargodev') + else: + euroargodev = os.path.abspath(args.vf) + if not os.path.exists(os.path.join(euroargodev, "VirtualFleet")): + raise ValueError("VirtualFleet can't be found at '%s'" % euroargodev) + + # Import the VirtualFleet library + sys.path.insert(0, os.path.join(euroargodev, "VirtualFleet")) + from virtualargofleet import Velocity, VirtualFleet, FloatConfiguration, ConfigParam + # from virtualargofleet.app_parcels import ArgoParticle + + # Set up the working directory: + if not args.output: + WORKDIR = os.path.sep.join([get_package_dir(), "webapi", "myapp", "static", "data", str(WMO), str(CYC[1])]) + else: + WORKDIR = os.path.sep.join([args.output, str(WMO), str(CYC[1])]) + WORKDIR = os.path.abspath(WORKDIR) + if not os.path.exists(WORKDIR): + os.makedirs(WORKDIR) + args.output = WORKDIR + + if not args.json: + puts("\nData will be saved in:") + puts("\t%s" % WORKDIR, color=COLORS.green) + + # Set-up logger + logging.basicConfig( + level=logging.DEBUG, + format=DEBUGFORMATTER, + datefmt='%m/%d/%Y %I:%M:%S %p', + handlers=[logging.FileHandler(os.path.join(WORKDIR, "vfpred.log"), mode='a')] + ) + + # Load these profiles' information: + if not args.json: + puts("\nYou can check this float dashboard while we prepare the prediction:") + puts("\t%s" % argoplot.dashboard(WMO, url_only=True), color=COLORS.green) + puts("\nLoading float profiles index ...") + host = "https://data-argo.ifremer.fr" + # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" + # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" + THIS_PROFILE = store(host=host).search_wmo_cyc(WMO, CYC).to_dataframe() + THIS_DATE = pd.to_datetime(THIS_PROFILE['date'].values[0], utc=True) + CENTER = [THIS_PROFILE['longitude'].values[0], THIS_PROFILE['latitude'].values[0]] + if not args.json: + puts("\nProfiles to work with:") + puts(THIS_PROFILE.to_string(max_colwidth=15), color=COLORS.green) + if THIS_PROFILE.shape[0] == 1: + puts('\nReal-case scenario: True position unknown !', color=COLORS.yellow) + else: + puts('\nEvaluation scenario: historical position known', color=COLORS.yellow) + + # Load real float configuration at the previous cycle: + if not args.json: + puts("\nLoading float configuration...") + try: + CFG = FloatConfiguration([WMO, CYC[0]]) + except: + if not args.json: + puts("Can't load this profile config, falling back on default values", color=COLORS.red) + CFG = FloatConfiguration('default') + + if args.cfg_parking_depth is not None: + puts("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], + float(args.cfg_parking_depth))) + CFG.update('parking_depth', float(args.cfg_parking_depth)) + + if args.cfg_cycle_duration is not None: + puts("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], + float(args.cfg_cycle_duration))) + CFG.update('cycle_duration', float(args.cfg_cycle_duration)) + + if args.cfg_profile_depth is not None: + puts("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], + float(args.cfg_profile_depth))) + CFG.update('profile_depth', float(args.cfg_profile_depth)) + + CFG.params = ConfigParam(key='reco_free_surface_drift', + value=int(args.cfg_free_surface_drift), + unit='cycle', + description='First cycle with free surface drift', + dtype=int) + + # Save virtual float configuration on file: + CFG.to_json(os.path.join(WORKDIR, "floats_configuration_%s.json" % get_sim_suffix(args, CFG))) + + if not args.json: + puts("\n".join(["\t%s" % line for line in CFG.__repr__().split("\n")]), color=COLORS.green) + + # Get the cycling frequency (in days, this is more a period then...): + CYCLING_FREQUENCY = int(np.round(CFG.mission['cycle_duration']/24)) + + # Define domain to load velocity for, and get it: + width = args.domain_size + np.abs(np.ceil(THIS_PROFILE['longitude'].values[-1] - CENTER[0])) + height = args.domain_size + np.abs(np.ceil(THIS_PROFILE['latitude'].values[-1] - CENTER[1])) + VBOX = [CENTER[0] - width / 2, CENTER[0] + width / 2, CENTER[1] - height / 2, CENTER[1] + height / 2] + N_DAYS = (len(CYC)-1)*CYCLING_FREQUENCY+1 + if not args.json: + puts("\nLoading %s velocity field to cover %i days..." % (VEL_NAME, N_DAYS)) + ds_vel, velocity_file = get_velocity_field(VBOX, THIS_DATE, + n_days=N_DAYS, + output=WORKDIR, + dataset=VEL_NAME) + VEL = Velocity(model='GLORYS12V1' if VEL_NAME == 'GLORYS' else VEL_NAME, src=ds_vel) + if not args.json: + puts("\n\t%s" % str(ds_vel), color=COLORS.green) + puts("\n\tLoaded velocity field from %s to %s" % + (pd.to_datetime(ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), + pd.to_datetime(ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")), color=COLORS.green) + figure_velocity(VBOX, VEL, VEL_NAME, THIS_PROFILE, WMO, CYC, save_figure=args.save_figure, workdir=WORKDIR) + + # raise ValueError('stophere') + + # VirtualFleet, get a deployment plan: + if not args.json: + puts("\nVirtualFleet, get a deployment plan...") + DF_PLAN = setup_deployment_plan(CENTER, THIS_DATE, nfloats=args.nfloats) + PLAN = {'lon': DF_PLAN['longitude'], + 'lat': DF_PLAN['latitude'], + 'time': np.array([np.datetime64(t) for t in DF_PLAN['date'].dt.strftime('%Y-%m-%d %H:%M').array]), + } + if not args.json: + puts("\t%i virtual floats to deploy" % DF_PLAN.shape[0], color=COLORS.green) + + # Set up VirtualFleet: + if not args.json: + puts("\nVirtualFleet, set-up the fleet...") + VFleet = VirtualFleet(plan=PLAN, + fieldset=VEL, + mission=CFG) + + # VirtualFleet, execute the simulation: + if not args.json: + puts("\nVirtualFleet, execute the simulation...") + + # Remove traj file if exists: + output_path = os.path.join(WORKDIR, 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) + # if os.path.exists(output_path): + # shutil.rmtree(output_path) + # + # VFleet.simulate(duration=timedelta(hours=N_DAYS*24+1), + # step=timedelta(minutes=5), + # record=timedelta(minutes=30), + # output=True, + # output_folder=WORKDIR, + # output_file='trajectories_%s.zarr' % get_sim_suffix(args, CFG), + # verbose_progress=not args.json, + # ) + + # VirtualFleet, get simulated profiles index: + if not args.json: + puts("\nExtract swarm profiles index...") + + T = Trajectories(WORKDIR + "/" + 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) + DF_SIM = T.get_index().add_distances(origin=[THIS_PROFILE['longitude'].values[0], THIS_PROFILE['latitude'].values[0]]) + if not args.json: + puts(str(T), color=COLORS.magenta) + puts(DF_SIM.head().to_string(), color=COLORS.green) + figure_positions(args, VEL, DF_SIM, DF_PLAN, THIS_PROFILE, CFG, WMO, CYC, VEL_NAME, + dd=1, save_figure=args.save_figure, workdir=WORKDIR) + + # Recovery, make predictions based on simulated profile density: + SP = SimPredictor(DF_SIM, THIS_PROFILE) + if not args.json: + puts("\nPredict float cycle position(s) from swarm simulation...", color=COLORS.white) + puts(str(SP), color=COLORS.magenta) + SP.fit_predict() + SP.add_metrics(VEL) + SP.plot_predictions(VEL, + CFG, + sim_suffix=get_sim_suffix(args, CFG), + save_figure=args.save_figure, + workdir=WORKDIR, + orient='portrait') + results = SP.predictions + + # Recovery, compute more swarm metrics: + for this_cyc in T.sim_cycles: + jsmetrics, fig, ax = T.analyse_pairwise_distances(cycle=this_cyc, + save_figure=True, + this_args=args, + this_cfg=CFG, + sim_suffix=get_sim_suffix(args, CFG), + workdir=WORKDIR, + ) + if 'metrics' in results['predictions'][this_cyc]: + for key in jsmetrics.keys(): + results['predictions'][this_cyc]['metrics'].update({key: jsmetrics[key]}) + else: + results['predictions'][this_cyc].update({'metrics': jsmetrics}) + + # Recovery, finalize JSON output: + execution_end = time.time() + process_end = time.process_time() + computation = { + 'Date': pd.to_datetime('now', utc=True), + 'Wall-time': pd.Timedelta(execution_end - execution_start, 's'), + 'CPU-time': pd.Timedelta(process_end - process_start, 's'), + 'system': getSystemInfo() + } + results['meta'] = {'Velocity field': VEL_NAME, + 'Nfloats': args.nfloats, + 'Computation': computation, + 'VFloats_config': CFG.to_json(), + } + + if not args.json: + puts("\nPredictions:") + results_js = json.dumps(results, indent=4, sort_keys=True, default=str) + + with open(os.path.join(WORKDIR, 'prediction_%s.json' % get_sim_suffix(args, CFG)), 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=4, default=str, sort_keys=True) + + if not args.json: + puts(results_js, color=COLORS.green) + puts("\nCheck results at:") + puts("\t%s" % WORKDIR, color=COLORS.green) + + if args.save_figure: + plt.close('all') + # Restore Matplotlib backend + matplotlib.use(mplbackend) + + if not args.save_sim: + shutil.rmtree(output_path) + + return results_js + diff --git a/vfrecovery/core/simulation_handler.py b/vfrecovery/core/simulation_handler.py new file mode 100644 index 0000000..2b58e95 --- /dev/null +++ b/vfrecovery/core/simulation_handler.py @@ -0,0 +1,595 @@ +import xarray as xr +import pandas as pd +import numpy as np +import json +import matplotlib +from sklearn.neighbors import KernelDensity +from scipy.signal import find_peaks +from sklearn.metrics import pairwise_distances +import matplotlib.pyplot as plt +import argopy.plot as argoplot +import cartopy.crs as ccrs + +from vfrecovery.utils.misc import get_cfg_str, get_ea_profile_page_url +from vfrecovery.plots.utils import save_figurefile, map_add_features +from vfrecovery.utils.geo import haversine, bearing + + +class SimPredictor_0: + """ + + Examples + -------- + T = Trajectories(traj_zarr_file) + df = T.get_index().add_distances() + + SP = SimPredictor(df) + SP.fit_predict() + SP.add_metrics(VFvelocity) + SP.bbox() + SP.plot_predictions(VFvelocity) + SP.plan + SP.n_cycles + SP.trajectory + SP.prediction + """ + + def __init__(self, df_sim: pd.DataFrame, df_obs: pd.DataFrame): + self.swarm = df_sim + self.obs = df_obs + # self.set_weights() + self.WMO = np.unique(df_obs['wmo'])[0] + self._json = None + + def __repr__(self): + summary = [""] + summary.append("Simulation target: %i / %i" % (self.WMO, self.sim_cycles[0])) + summary.append("Swarm size: %i floats" % len(np.unique(self.swarm['wmo']))) + summary.append("Number of simulated cycles: %i profile(s) for cycle number(s): [%s]" % ( + self.n_cycles, ",".join([str(c) for c in self.sim_cycles]))) + summary.append("Observed reference: %i profile(s) for cycle number(s): [%s]" % ( + self.obs.shape[0], ",".join([str(c) for c in self.obs_cycles]))) + return "\n".join(summary) + + @property + def n_cycles(self): + """Number of simulated cycles""" + return len(np.unique(self.swarm['cyc'])) + # return len(self.sim_cycles) + + @property + def obs_cycles(self): + """Observed cycle numbers""" + return np.unique(self.obs['cyc']) + + @property + def sim_cycles(self): + """Simulated cycle numbers""" + return self.obs_cycles[0] + 1 + range(self.n_cycles) + + @property + def plan(self) -> pd.DataFrame: + if not hasattr(self, '_plan'): + df_plan = self.swarm[self.swarm['cyc'] == 1][['date', 'deploy_lon', 'deploy_lat']] + df_plan = df_plan.rename(columns={'deploy_lon': 'longitude', 'deploy_lat': 'latitude'}) + self._plan = df_plan + return self._plan + + @property + def trajectory(self): + """Return the predicted trajectory as a simple :class:`np.array` + + First row is longitude, 2nd is latitude and 3rd is date of simulated profiles + + Return + ------ + :class:`np.array` + + """ + if self._json is None: + raise ValueError("Please call `fit_predict` first") + + traj_prediction = np.array([self.obs['longitude'].values[0], + self.obs['latitude'].values[0], + self.obs['date'].values[0]])[ + np.newaxis] # Starting point where swarm was deployed + for cyc in self._json['predictions'].keys(): + xpred = self._json['predictions'][cyc]['location']['longitude'] + ypred = self._json['predictions'][cyc]['location']['latitude'] + tpred = pd.to_datetime(self._json['predictions'][cyc]['location']['time']) + traj_prediction = np.concatenate((traj_prediction, + np.array([xpred, ypred, tpred])[np.newaxis]), + axis=0) + return traj_prediction + + @property + def predictions(self): + if self._json is None: + raise ValueError("Please call `fit_predict` first") + return self._json + + def bbox(self, s: float = 1) -> list: + """Get a bounding box for maps + + Parameters + ---------- + s: float, default:1 + + Returns + ------- + list + """ + df_sim = self.swarm + df_obs = self.obs + + box = [np.min([df_sim['deploy_lon'].min(), + df_sim['longitude'].min(), + df_sim['rel_lon'].min(), + df_obs['longitude'].min()]), + np.max([df_sim['deploy_lon'].max(), + df_sim['longitude'].max(), + df_sim['rel_lon'].max(), + df_obs['longitude'].max()]), + np.min([df_sim['deploy_lat'].min(), + df_sim['latitude'].min(), + df_sim['rel_lat'].min(), + df_obs['latitude'].min()]), + np.max([df_sim['deploy_lat'].max(), + df_sim['latitude'].max(), + df_sim['rel_lat'].max(), + df_obs['latitude'].max()])] + rx, ry = box[1] - box[0], box[3] - box[2] + r = np.min([rx, ry]) + ebox = [box[0] - s * r, box[1] + s * r, box[2] - s * r, box[3] + s * r] + + return ebox + + +class SimPredictor_1(SimPredictor_0): + + def set_weights(self, scale: float = 20): + """Compute weights for predictions + + Add weights column to swarm :class:`pandas.DataFrame` as a gaussian distance + with a std based on the size of the deployment domain + + Parameters + ---------- + scale: float (default=20.) + """ + rx, ry = self.plan['longitude'].max() - self.plan['longitude'].min(), \ + self.plan['latitude'].max() - self.plan['latitude'].min() + r = np.min([rx, ry]) # Minimal size of the deployment domain + weights = np.exp(-(self.swarm['distance_origin'] ** 2) / (r / scale)) + weights[np.isnan(weights)] = 0 + self.swarm['weights'] = weights + return self + + def fit_predict(self, weights_scale: float = 20.) -> dict: + """Predict profile positions from simulated float swarm + + Prediction is based on a :class:`klearn.neighbors._kde.KernelDensity` estimate of the N_FLOATS + simulated, weighted by their deployment distance to the observed previous cycle position. + + Parameters + ---------- + weights_scale: float (default=20) + Scale (in deg) to use to weight the deployment distance to the observed previous cycle position + + Returns + ------- + dict + """ + + def blank_prediction() -> dict: + return {'location': { + 'longitude': None, + 'latitude': None, + 'time': None}, + 'cycle_number': None, + 'wmo': int(self.WMO), + } + + # Compute weights of the swarm float profiles locations + self.set_weights(scale=weights_scale) + + self._prediction_data = {'weights_scale': weights_scale, 'cyc': {}} + + cycles = np.unique(self.swarm['cyc']).astype(int) # 1, 2, ... + recovery_predictions = {} + for icyc, this_sim_cyc in enumerate(cycles): + this_cyc_df = self.swarm[self.swarm['cyc'] == this_sim_cyc] + weights = this_cyc_df['weights'] + x, y = this_cyc_df['rel_lon'], this_cyc_df['rel_lat'] + + w = weights / np.max(np.abs(weights), axis=0) + X = np.array([x, y]).T + kde = KernelDensity(kernel='gaussian', bandwidth=0.15).fit(X, sample_weight=w) + + xg, yg = (np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 100), + np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), 100)) + xg, yg = np.meshgrid(xg, yg) + Xg = np.array([xg.flatten(), yg.flatten(), ]).T + llh = kde.score_samples(Xg) + xpred = Xg[np.argmax(llh), 0] + ypred = Xg[np.argmax(llh), 1] + tpred = this_cyc_df['date'].mean() + + # Store results + recovery = blank_prediction() + recovery['location']['longitude'] = xpred + recovery['location']['latitude'] = ypred + recovery['location']['time'] = tpred.isoformat() + recovery['cycle_number'] = int(self.sim_cycles[icyc]) + recovery['virtual_cycle_number'] = int(self.sim_cycles[icyc]) + recovery_predictions.update({int(this_sim_cyc): recovery}) + + # + self._prediction_data['cyc'].update({this_sim_cyc: {'weights': this_cyc_df['weights']}}) + + # Store results internally + self._json = {'predictions': recovery_predictions} + + # Add more stuff to internal storage: + self._predict_errors() + self._add_ref() + self.add_metrics() + + # + return self + + +class SimPredictor_2(SimPredictor_1): + + def _predict_errors(self) -> dict: + """Compute error metrics for the predicted positions + + This is for past cycles, for which we have observed positions of the predicted profiles + + This adds more keys to self._json['predictions'] created by the fit_predict method + + Returns + ------- + dict + """ + + def blank_error(): + return {'distance': {'value': None, + 'unit': 'km'}, + 'bearing': {'value': None, + 'unit': 'degree'}, + 'time': {'value': None, + 'unit': 'hour'} + } + + cyc0 = self.obs_cycles[0] + if self._json is None: + raise ValueError("Please call `fit_predict` first") + recovery_predictions = self._json['predictions'] + + for sim_c in recovery_predictions.keys(): + this_prediction = recovery_predictions[sim_c] + if sim_c + cyc0 in self.obs_cycles: + error = blank_error() + + this_obs_profile = self.obs[self.obs['cyc'] == sim_c + cyc0] + xobs = this_obs_profile['longitude'].iloc[0] + yobs = this_obs_profile['latitude'].iloc[0] + tobs = this_obs_profile['date'].iloc[0] + + prev_obs_profile = self.obs[self.obs['cyc'] == sim_c + cyc0 - 1] + xobs0 = prev_obs_profile['longitude'].iloc[0] + yobs0 = prev_obs_profile['latitude'].iloc[0] + + xpred = this_prediction['location']['longitude'] + ypred = this_prediction['location']['latitude'] + tpred = pd.to_datetime(this_prediction['location']['time']) + + dd = haversine(xobs, yobs, xpred, ypred) + error['distance']['value'] = dd + + observed_bearing = bearing(xobs0, yobs0, xobs, yobs) + sim_bearing = bearing(xobs0, yobs0, xpred, ypred) + error['bearing']['value'] = sim_bearing - observed_bearing + + dt = pd.Timedelta(tpred - tobs) / np.timedelta64(1, 's') + # print(tpred, tobs, pd.Timedelta(tpred - tobs)) + error['time']['value'] = dt / 3600 # From seconds to hours + + this_prediction['location_error'] = error + recovery_predictions.update({sim_c: this_prediction}) + + self._json.update({'predictions': recovery_predictions}) + return self + + def _add_ref(self): + """Add observations data to internal data structure + + This adds more keys to self._json['predictions'] created by the fit_predict method + + """ + if self._json is None: + raise ValueError("Please call `predict` first") + + # Observed profiles that were simulated: + profiles_to_predict = [] + for cyc in self.sim_cycles: + this = {'wmo': int(self.WMO), + 'cycle_number': int(cyc), + 'url_float': argoplot.dashboard(self.WMO, url_only=True), + 'url_profile': "", + 'location': {'longitude': None, + 'latitude': None, + 'time': None} + } + if cyc in self.obs_cycles: + this['url_profile'] = get_ea_profile_page_url(self.WMO, cyc) + this_df = self.obs[self.obs['cyc'] == cyc] + this['location']['longitude'] = this_df['longitude'].iloc[0] + this['location']['latitude'] = this_df['latitude'].iloc[0] + this['location']['time'] = this_df['date'].iloc[0].isoformat() + profiles_to_predict.append(this) + + self._json.update({'observations': profiles_to_predict}) + + # Observed profile used as initial conditions to the simulation: + cyc = self.obs_cycles[0] + this_df = self.obs[self.obs['cyc'] == cyc] + self._json.update({'initial_profile': {'wmo': int(self.WMO), + 'cycle_number': int(cyc), + 'url_float': argoplot.dashboard(self.WMO, url_only=True), + 'url_profile': get_ea_profile_page_url(self.WMO, cyc), + 'location': {'longitude': this_df['longitude'].iloc[0], + 'latitude': this_df['latitude'].iloc[0], + 'time': this_df['date'].iloc[0].isoformat() + } + }}) + + # + return self + + def add_metrics(self, VFvel=None): + """Compute more metrics to understand the prediction error + + 1. Compute a transit time to cover the distance error + (assume a 12 kts boat speed with 1 kt = 1.852 km/h) + + 1. Compute the possible drift due to the time lag between the predicted profile timing and the expected one + + This adds more keys to self._json['predictions'] created by the fit_predict method + + """ + cyc0 = self.obs_cycles[0] + if self._json is None: + raise ValueError("Please call `predict` first") + recovery_predictions = self._json['predictions'] + + for sim_c in recovery_predictions.keys(): + this_prediction = recovery_predictions[sim_c] + if sim_c + cyc0 in self.obs_cycles and 'location_error' in this_prediction.keys(): + + error = this_prediction['location_error'] + metrics = {} + + # Compute a transit time to cover the distance error: + metrics['transit'] = {'value': None, + 'unit': 'hour', + 'comment': 'Transit time to cover the distance error ' + '(assume a 12 kts boat speed with 1 kt = 1.852 km/h)'} + + if error['distance']['value'] is not None: + metrics['transit']['value'] = pd.Timedelta(error['distance']['value'] / (12 * 1.852), + 'h').seconds / 3600. + + # Compute the possible drift due to the time lag between the predicted profile timing and the expected one: + if VFvel is not None: + xpred = this_prediction['location']['longitude'] + ypred = this_prediction['location']['latitude'] + tpred = this_prediction['location']['time'] + dsc = VFvel.field.interp( + {VFvel.dim['lon']: xpred, + VFvel.dim['lat']: ypred, + VFvel.dim['time']: tpred, + VFvel.dim['depth']: + VFvel.field[{VFvel.dim['depth']: 0}][VFvel.dim['depth']].values[np.newaxis][0]} + ) + velc = np.sqrt(dsc[VFvel.var['U']] ** 2 + dsc[VFvel.var['V']] ** 2).values[np.newaxis][0] + metrics['surface_drift'] = {'value': None, + 'unit': 'km', + 'surface_currents_speed': None, + 'surface_currents_speed_unit': 'm/s', + 'comment': 'Drift by surface currents due to the float ascent time error ' + '(difference between simulated profile time and the observed one).'} + if error['time']['value'] is not None: + metrics['surface_drift']['value'] = (error['time']['value'] * 3600 * velc / 1e3) + metrics['surface_drift']['surface_currents_speed'] = velc + + # + this_prediction['metrics'] = metrics + recovery_predictions.update({sim_c: this_prediction}) + + self._json.update({"predictions": recovery_predictions}) + return self + + +class SimPredictor_3(SimPredictor_2): + + def plot_predictions(self, + VFvel, + cfg, + sim_suffix='', # get_sim_suffix(this_args, cfg) + s=0.2, + alpha=False, + save_figure=False, + workdir='.', + figsize=None, + dpi=120, + orient='portrait'): + ebox = self.bbox(s=s) + pred_traj = self.trajectory + + if orient == 'portrait': + if self.n_cycles == 1: + nrows, ncols = 2, 1 + if figsize is None: + figsize = (5, 5) + else: + nrows, ncols = self.n_cycles, 2 + if figsize is None: + figsize = (5, (self.n_cycles-1)*5) + else: + if self.n_cycles == 1: + nrows, ncols = 1, 2 + else: + nrows, ncols = 2, self.n_cycles + if figsize is None: + figsize = (ncols*5, 5) + + def plot_this(this_ax, i_cycle, ip): + df_sim = self.swarm[self.swarm['cyc'] == i_cycle + 1] + weights = self._prediction_data['cyc'][i_cycle + 1]['weights'].values + if self.sim_cycles[i_cycle] in self.obs_cycles: + this_profile = self.obs[self.obs['cyc'] == self.sim_cycles[i_cycle]] + else: + this_profile = None + + xpred = self.predictions['predictions'][i_cycle + 1]['location']['longitude'] + ypred = self.predictions['predictions'][i_cycle + 1]['location']['latitude'] + + this_ax.set_extent(ebox) + this_ax = map_add_features(ax[ix]) + + v = VFvel.field.isel(time=0).interp(depth=cfg.mission['parking_depth']) + v.plot.quiver(x="longitude", + y="latitude", + u=VFvel.var['U'], + v=VFvel.var['V'], + ax=this_ax, + color='grey', + alpha=0.5, + scale=5, + add_guide=False) + + this_ax.plot(df_sim['deploy_lon'], df_sim['deploy_lat'], '.', + markersize=3, + color='grey', + alpha=0.1, + markeredgecolor=None, + zorder=0) + + this_ax.plot(pred_traj[:, 0], pred_traj[:, 1], color='k', linewidth=1, marker='+') + this_ax.plot(xpred, ypred, color='g', marker='+') + + w = weights / np.max(np.abs(weights), axis=0) + ii = np.argsort(w) + cmap = plt.cm.cool + # cmap = plt.cm.Reds + + if ip == 0: + x, y = df_sim['deploy_lon'], df_sim['deploy_lat'] + title = 'Initial virtual float positions' + if not alpha: + this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], + marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) + else: + this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], + alpha=w[ii], + marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) + elif ip == 1: + x, y = df_sim['longitude'], df_sim['latitude'] + title = 'Final virtual float positions' + if not alpha: + this_ax.scatter(x, y, c=w, marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) + else: + this_ax.scatter(x, y, c=w, marker='o', s=4, alpha=w, edgecolor=None, vmin=0, vmax=1, cmap=cmap) + elif ip == 2: + x, y = df_sim['rel_lon'], df_sim['rel_lat'] + title = 'Final virtual floats positions relative to observed float' + if not alpha: + this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], + marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) + else: + this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], + marker='o', s=4, alpha=w[ii], edgecolor=None, vmin=0, vmax=1, cmap=cmap) + + # Display full trajectory prediction: + if ip != 0 and this_profile is not None: + this_ax.arrow(this_profile['longitude'].iloc[0], + this_profile['latitude'].iloc[0], + xpred - this_profile['longitude'].iloc[0], + ypred - this_profile['latitude'].iloc[0], + length_includes_head=True, fc='k', ec='c', head_width=0.025, zorder=10) + this_ax.plot(xpred, ypred, 'k+', zorder=10) + + this_ax.set_title("") + # this_ax.set_ylabel("Cycle %i predictions" % (i_cycle+1)) + this_ax.set_title("%s\nCycle %i predictions" % (title, self.sim_cycles[i_cycle]), fontsize=6) + + fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, dpi=dpi, + subplot_kw={'projection': ccrs.PlateCarree()}, + sharex=True, sharey=True) + ax, ix = ax.flatten(), -1 + + if orient == 'portrait': + rows = range(self.n_cycles) + cols = [1, 2] + else: + rows = [1, 2] + cols = range(self.n_cycles) + + if orient == 'portrait': + for i_cycle in rows: + for ip in cols: + ix += 1 + plot_this(ax[ix], i_cycle, ip) + else: + for ip in rows: + for i_cycle in cols: + ix += 1 + plot_this(ax[ix], i_cycle, ip) + + # log.debug("Start to write metrics string") + # + # xpred = SP.prediction[i_cycle + 1]['location']['longitude']['value'] + # + # err = recovery['prediction_location_error'] + # met = recovery['prediction_metrics'] + # if this_profile.shape[0] > 1: + # # err_str = "Prediction vs Truth: [%0.2fkm, $%0.2f^o$]" % (err['distance'], err['bearing']) + # err_str = "Prediction errors: [dist=%0.2f%s, bearing=$%0.2f^o$, time=%s]\n" \ + # "Distance error represents %s of transit at 12kt" % (err['distance']['value'], + # err['distance']['unit'], + # err['bearing']['value'], + # strfdelta(pd.Timedelta(err['time']['value'], 'h'), + # "{hours}H{minutes:02d}"), + # strfdelta(pd.Timedelta(met['transit']['value'], 'h'), + # "{hours}H{minutes:02d}")) + # else: + # err_str = "" + # + # fig.suptitle("VirtualFleet recovery prediction for WMO %i: \ + # starting from cycle %i, predicting cycle %i\n%s\n%s\n%s" % + # (wmo, cyc[0], cyc[1], get_cfg_str(cfg), err_str, "Prediction based on %s" % vel_name), fontsize=15) + + plt.tight_layout() + if save_figure: + save_figurefile(fig, 'vfrecov_predictions_%s' % sim_suffix, workdir) + + return fig, ax + + +class SimPredictor(SimPredictor_3): + + def to_json(self, fp=None): + kw = {'indent': 4, 'sort_keys': True, 'default': str} + if fp is not None: + if hasattr(fp, 'write'): + json.dump(self._json, fp, **kw) + else: + with open(fp, 'w') as f: + json.dump(self._json, f, **kw) + else: + results_js = json.dumps(self._json, **kw) + return results_js + + diff --git a/vfrecovery/core/trajfile_handler.py b/vfrecovery/core/trajfile_handler.py new file mode 100644 index 0000000..afbbd19 --- /dev/null +++ b/vfrecovery/core/trajfile_handler.py @@ -0,0 +1,406 @@ +import xarray as xr +import pandas as pd +import numpy as np +import matplotlib +from scipy.signal import find_peaks +from sklearn.metrics import pairwise_distances +import matplotlib.pyplot as plt + +from vfrecovery.utils.misc import get_cfg_str +from vfrecovery.plots.utils import save_figurefile + + +class Trajectories: + """Trajectory file manager for VFrecovery + + Examples: + --------- + T = Trajectories(traj_zarr_file) + T.n_floats + T.sim_cycles + df = T.to_index() + df = T.get_index().add_distances() + jsdata, fig, ax = T.analyse_pairwise_distances(cycle=1, show_plot=True) + """ + + def __init__(self, zfile): + self.zarr_file = zfile + self.obj = xr.open_zarr(zfile) + self._index = None + + @property + def n_floats(self): + # len(self.obj['trajectory']) + return self.obj['trajectory'].shape[0] + + @property + def sim_cycles(self): + """Return list of cycles simulated""" + cycs = np.unique(self.obj['cycle_number']) + last_obs_phase = \ + self.obj.where(self.obj['cycle_number'] == cycs[-1])['cycle_phase'].isel(trajectory=0).isel(obs=-1).values[ + np.newaxis][0] + if last_obs_phase < 3: + cycs = cycs[0:-1] + return cycs + + def __repr__(self): + summary = [""] + summary.append("Swarm size: %i floats" % self.n_floats) + start_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=0).values) + end_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=-1).values) + summary.append("Simulation length: %s, from %s to %s" % ( + pd.Timedelta(end_date - start_date, 'd'), start_date.strftime("%Y/%m/%d"), end_date.strftime("%Y/%m/%d"))) + return "\n".join(summary) + + # def to_index_par(self) -> pd.DataFrame: + # # Deployment loc: + # deploy_lon, deploy_lat = self.obj.isel(obs=0)['lon'].values, self.obj.isel(obs=0)['lat'].values + # + # def worker(ds, cyc, x0, y0): + # mask = np.logical_and((ds['cycle_number'] == cyc).compute(), + # (ds['cycle_phase'] >= 3).compute()) + # this_cyc = ds.where(mask, drop=True) + # if len(this_cyc['time']) > 0: + # data = { + # 'date': this_cyc.isel(obs=-1)['time'].values, + # 'latitude': this_cyc.isel(obs=-1)['lat'].values, + # 'longitude': this_cyc.isel(obs=-1)['lon'].values, + # 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, + # 'cyc': cyc, + # # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, + # 'deploy_lon': x0, + # 'deploy_lat': y0, + # } + # return pd.DataFrame(data) + # else: + # return None + # + # cycles = np.unique(self.obj['cycle_number']) + # rows = [] + # with concurrent.futures.ThreadPoolExecutor() as executor: + # future_to_url = { + # executor.submit( + # worker, + # self.obj, + # cyc, + # deploy_lon, + # deploy_lat + # ): cyc + # for cyc in cycles + # } + # futures = concurrent.futures.as_completed(future_to_url) + # for future in futures: + # data = None + # try: + # data = future.result() + # except Exception: + # raise + # finally: + # rows.append(data) + # + # rows = [r for r in rows if r is not None] + # df = pd.concat(rows).reset_index() + # df['wmo'] = df['wmo'].astype(int) + # df['cyc'] = df['cyc'].astype(int) + # # df['cycle_phase'] = df['cycle_phase'].astype(int) + # self._index = df + # + # return self._index + + def to_index(self) -> pd.DataFrame: + """Compute and return index (profile dataframe from trajectory dataset) + + Create a Profile index :class:`pandas.dataframe` with columns: [data, latitude ,longitude, wmo, cyc, deploy_lon, deploy_lat] + from a trajectory :class:`xarray.dataset`. + + There is one dataframe row for each dataset trajectory cycle. + + We use the last trajectory point of given cycle number (with cycle phase >= 3) to identify a profile location. + + If they are N trajectories simulating C cycles, there will be about a maximum of N*C rows in the dataframe. + + Returns + ------- + :class:`pandas.dataframe` + """ + if self._index is None: + + # Deployment loc: + deploy_lon, deploy_lat = self.obj.isel(obs=0)['lon'].values, self.obj.isel(obs=0)['lat'].values + + def worker(ds, cyc, x0, y0): + mask = np.logical_and((ds['cycle_number'] == cyc).compute(), + (ds['cycle_phase'] >= 3).compute()) + this_cyc = ds.where(mask, drop=True) + if len(this_cyc['time']) > 0: + data = { + 'date': this_cyc.isel(obs=-1)['time'].values, + 'latitude': this_cyc.isel(obs=-1)['lat'].values, + 'longitude': this_cyc.isel(obs=-1)['lon'].values, + 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, + 'cyc': cyc, + # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, + 'deploy_lon': x0, + 'deploy_lat': y0, + } + return pd.DataFrame(data) + else: + return None + + cycles = np.unique(self.obj['cycle_number']) + rows = [] + for cyc in cycles: + df = worker(self.obj, cyc, deploy_lon, deploy_lat) + rows.append(df) + rows = [r for r in rows if r is not None] + df = pd.concat(rows).reset_index() + df['wmo'] = df['wmo'].astype(int) + df['cyc'] = df['cyc'].astype(int) + # df['cycle_phase'] = df['cycle_phase'].astype(int) + self._index = df + + return self._index + + def get_index(self): + """Compute index and return self""" + self.to_index() + return self + + def add_distances(self, origin: None) -> pd.DataFrame: + """Compute profiles distance to some origin + + Returns + ------- + :class:`pandas.dataframe` + """ + + # Compute distance between the predicted profile and the initial profile location from the deployment plan + # We assume that virtual floats are sequentially taken from the deployment plan + # Since distances are very short, we compute a simple rectangular distance + + # Observed cycles: + # obs_cyc = np.unique(this_profile['cyc']) + + # Simulated cycles: + # sim_cyc = np.unique(this_df['cyc']) + + df = self._index + + x2, y2 = origin # real float initial position + df['distance'] = np.nan + df['rel_lon'] = np.nan + df['rel_lat'] = np.nan + df['distance_origin'] = np.nan + + def worker(row): + # Simulation profile coordinates: + x0, y0 = row['deploy_lon'], row['deploy_lat'] # virtual float initial position + x1, y1 = row['longitude'], row['latitude'] # virtual float position + + # Distance between each pair of cycles of virtual floats: + dist = np.sqrt((y1 - y0) ** 2 + (x1 - x0) ** 2) + row['distance'] = dist + + # Shift between each pair of cycles: + dx, dy = x1 - x0, y1 - y0 + # Get a relative displacement from real float initial position: + row['rel_lon'] = x2 + dx + row['rel_lat'] = y2 + dy + + # Distance between the predicted profile and the observed initial profile + dist = np.sqrt((y2 - y0) ** 2 + (x2 - x0) ** 2) + row['distance_origin'] = dist + + return row + + df = df.apply(worker, axis=1) + self._index = df + + return self._index + + def analyse_pairwise_distances(self, + cycle: int = 1, + show_plot: bool = True, + save_figure: bool = False, + workdir: str = '.', + sim_suffix = None, + this_cfg = None, + this_args: dict = None): + + def get_hist_and_peaks(this_d): + x = this_d.flatten() + x = x[~np.isnan(x)] + x = x[:, np.newaxis] + hist, bin_edges = np.histogram(x, bins=100, density=1) + # dh = np.diff(bin_edges[0:2]) + peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) + return {'pdf': hist, 'bins': bin_edges[0:-1], 'Npeaks': len(peaks)} + + # Squeeze traj file to the first predicted cycle (sim can have more than 1 cycle) + ds = self.obj.where((self.obj['cycle_number'] == cycle).compute(), drop=True) + ds = ds.compute() + + # Compute trajectories relative to the single/only real float initial position: + lon0, lat0 = self.obj.isel(obs=0)['lon'].values[0], self.obj.isel(obs=0)['lat'].values[0] + lon, lat = ds['lon'].values, ds['lat'].values + ds['lonc'] = xr.DataArray(lon - np.broadcast_to(lon[:, 0][:, np.newaxis], lon.shape) + lon0, + dims=['trajectory', 'obs']) + ds['latc'] = xr.DataArray(lat - np.broadcast_to(lat[:, 0][:, np.newaxis], lat.shape) + lat0, + dims=['trajectory', 'obs']) + + # Compute trajectory lengths: + ds['length'] = np.sqrt(ds.diff(dim='obs')['lon'] ** 2 + ds.diff(dim='obs')['lat'] ** 2).sum(dim='obs') + ds['lengthc'] = np.sqrt(ds.diff(dim='obs')['lonc'] ** 2 + ds.diff(dim='obs')['latc'] ** 2).sum(dim='obs') + + # Compute initial points pairwise distances, PDF and nb of peaks: + X = ds.isel(obs=0) + X = X.isel(trajectory=~np.isnan(X['lon'])) + X0 = np.array((X['lon'].values, X['lat'].values)).T + d0 = pairwise_distances(X0, n_jobs=-1) + d0 = np.triu(d0) + d0[d0 == 0] = np.nan + + x0 = d0.flatten() + x0 = x0[~np.isnan(x0)] + x0 = x0[:, np.newaxis] + + hist0, bin_edges0 = np.histogram(x0, bins=100, density=1) + dh0 = np.diff(bin_edges0[0:2]) + peaks0, _ = find_peaks(hist0 / np.max(hist0), height=.4, distance=20) + + # Compute final points pairwise distances, PDF and nb of peaks: + X = ds.isel(obs=-1) + X = X.isel(trajectory=~np.isnan(X['lon'])) + dsf = X + X = np.array((X['lon'].values, X['lat'].values)).T + d = pairwise_distances(X, n_jobs=-1) + d = np.triu(d) + d[d == 0] = np.nan + + x = d.flatten() + x = x[~np.isnan(x)] + x = x[:, np.newaxis] + + hist, bin_edges = np.histogram(x, bins=100, density=1) + dh = np.diff(bin_edges[0:2]) + peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) + + # Compute final points pairwise distances (relative traj), PDF and nb of peaks: + X1 = ds.isel(obs=-1) + X1 = X1.isel(trajectory=~np.isnan(X1['lonc'])) + dsfc = X1 + X1 = np.array((X1['lonc'].values, X1['latc'].values)).T + d1 = pairwise_distances(X1, n_jobs=-1) + d1 = np.triu(d1) + d1[d1 == 0] = np.nan + + x1 = d1.flatten() + x1 = x1[~np.isnan(x1)] + x1 = x1[:, np.newaxis] + + hist1, bin_edges1 = np.histogram(x1, bins=100, density=1) + dh1 = np.diff(bin_edges1[0:2]) + peaks1, _ = find_peaks(hist1 / np.max(hist1), height=.4, distance=20) + + # Compute the overlapping between the initial and relative state PDFs: + bin_unif = np.arange(0, np.max([bin_edges0, bin_edges1]), np.min([dh0, dh1])) + dh_unif = np.diff(bin_unif[0:2]) + hist0_unif = np.interp(bin_unif, bin_edges0[0:-1], hist0) + hist_unif = np.interp(bin_unif, bin_edges[0:-1], hist) + hist1_unif = np.interp(bin_unif, bin_edges1[0:-1], hist1) + + # Area under hist1 AND hist0: + # overlapping = np.sum(hist1_unif[hist0_unif >= hist1_unif]*dh_unif) + overlapping = np.sum(hist_unif[hist0_unif >= hist_unif] * dh_unif) + + # Ratio of the max PDF ranges: + # staggering = np.max(bin_edges1)/np.max(bin_edges0) + staggering = np.max(bin_edges) / np.max(bin_edges0) + + # Store metrics in a dict: + prediction_metrics = {} + + prediction_metrics['trajectory_lengths'] = {'median': np.nanmedian(ds['length'].values), + 'std': np.nanstd(ds['length'].values)} + + prediction_metrics['pairwise_distances'] = { + 'initial_state': {'median': np.nanmedian(d0), 'std': np.nanstd(d0), 'nPDFpeaks': len(peaks0)}, + 'final_state': {'median': np.nanmedian(d), 'std': np.nanstd(d), 'nPDFpeaks': len(peaks)}, + 'relative_state': {'median': np.nanmedian(d1), 'std': np.nanstd(d1), 'nPDFpeaks': len(peaks1)}, + 'overlapping': {'value': overlapping, + 'comment': 'Overlapping area between PDF(initial_state) and PDF(final_state)'}, + 'staggering': {'value': staggering, 'comment': 'Ratio of PDF(initial_state) vs PDF(final_state) ranges'}, + 'score': {'value': overlapping / len(peaks), 'comment': 'overlapping/nPDFpeaks(final_state)'}} + + if np.isinf(overlapping / len(peaks)): + raise ValueError("Can't compute the prediction score, infinity !") + + ratio = prediction_metrics['pairwise_distances']['final_state']['std'] / \ + prediction_metrics['pairwise_distances']['initial_state']['std'] + prediction_metrics['pairwise_distances']['std_ratio'] = ratio + + # Figure: + if show_plot: + backend = matplotlib.get_backend() + if this_args is not None and this_args.json: + matplotlib.use('Agg') + + fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(18, 10), dpi=90) + ax, ix = ax.flatten(), -1 + cmap = plt.cm.coolwarm + + ix += 1 + dd = dsf['length'].values + ax[ix].plot(X0[:, 0], X0[:, 1], '.', markersize=3, color='grey', alpha=0.5, markeredgecolor=None, zorder=0) + ax[ix].scatter(X[:, 0], X[:, 1], c=dd, zorder=10, s=3, cmap=cmap) + ax[ix].grid() + this_traj = int(dsf.isel(trajectory=np.argmax(dd))['trajectory'].values[np.newaxis][0]) + ax[ix].plot(ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lon'], + ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lat'], 'r', + zorder=13, label='Longest traj.') + this_traj = int(dsf.isel(trajectory=np.argmin(dd))['trajectory'].values[np.newaxis][0]) + ax[ix].plot(ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lon'], + ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lat'], 'b', + zorder=13, label='Shortest traj.') + ax[ix].legend() + ax[ix].set_title('Trajectory lengths') + + ix += 1 + ax[ix].plot(bin_edges0[0:-1], hist0, label='Initial (%i peak)' % len(peaks0), color='gray') + ax[ix].plot(bin_edges[0:-1], hist, label='Final (%i peak)' % len(peaks), color='lightblue') + ax[ix].plot(bin_edges[peaks], hist[peaks], "x", label='Peaks') + ax[ix].legend() + ax[ix].grid() + ax[ix].set_xlabel('Pairwise distance [degree]') + line1 = "Staggering: %0.4f" % staggering + line2 = "Overlapping: %0.4f" % overlapping + line3 = "Score: %0.4f" % (overlapping / len(peaks)) + ax[ix].set_title("Pairwise distances PDF: [%s / %s / %s]" % (line1, line2, line3)) + + if this_args is not None: + line0 = "VirtualFleet recovery swarm simulation for WMO %i, starting from cycle %i, predicting cycle %i\n%s" % \ + (this_args.wmo, this_args.cyc[0] - 1, this_args.cyc[0], get_cfg_str(this_cfg)) + line1 = "Simulation made with %s and %i virtual floats" % (this_args.velocity, this_args.nfloats) + else: + line0 = "VirtualFleet recovery swarm simulation for cycle %i" % cycle + line1 = "Simulation made with %i virtual floats" % (self.n_floats) + + fig.suptitle("%s\n%s" % (line0, line1), fontsize=15) + plt.tight_layout() + + if save_figure: + if sim_suffix is not None: + filename = 'vfrecov_metrics01_%s_cyc%i' % (sim_suffix, cycle) + else: + filename = 'vfrecov_metrics01_cyc%i' % (cycle) + save_figurefile(fig, filename, workdir) + + if this_args is not None and this_args.json: + matplotlib.use(backend) + + if show_plot: + return prediction_metrics, fig, ax + else: + return prediction_metrics + diff --git a/vfrecovery/core_functions/predict.py b/vfrecovery/core_functions/predict.py deleted file mode 100644 index 7049385..0000000 --- a/vfrecovery/core_functions/predict.py +++ /dev/null @@ -1,21 +0,0 @@ - -def predict_function( - wmo: int, - cyc: int, - n_predictions: int = 1, -): - """ - Execute VirtualFleet-Recovery predictor and return results as a JSON string - - Inputs - ------ - wmo - cyc - n_predictions - - Returns - ------- - data - - """ # noqa - return {'wmo': wmo, 'cyc': cyc} diff --git a/vfrecovery/download_functions/__init__.py b/vfrecovery/downloaders/__init__.py similarity index 60% rename from vfrecovery/download_functions/__init__.py rename to vfrecovery/downloaders/__init__.py index 311103f..22b5eb5 100644 --- a/vfrecovery/download_functions/__init__.py +++ b/vfrecovery/downloaders/__init__.py @@ -1,2 +1,3 @@ from armor3d import Armor3d from glorys import Glorys +from core import get_velocity_field diff --git a/vfrecovery/download_functions/armor3d.py b/vfrecovery/downloaders/armor3d.py similarity index 100% rename from vfrecovery/download_functions/armor3d.py rename to vfrecovery/downloaders/armor3d.py diff --git a/vfrecovery/downloaders/core.py b/vfrecovery/downloaders/core.py new file mode 100644 index 0000000..3241c86 --- /dev/null +++ b/vfrecovery/downloaders/core.py @@ -0,0 +1,43 @@ +import pandas as pd +import os +import xarray as xr + +from . import Glorys, Armor3d +# import logging + + +# log = logging.getLogger("vfrecovery.download.core") + + +def get_velocity_field(a_box, a_date, n_days=1, output='.', dataset='ARMOR3D'): + """Return the velocity field as an :class:xr.Dataset, download if needed + + Parameters + ---------- + a_box + a_date + n_days + output + dataset + """ + def get_velocity_filename(dataset, n_days): + download_date = pd.to_datetime('now', utc='now').strftime("%Y%m%d") + fname = os.path.join(output, 'velocity_%s_%idays_%s.nc' % (dataset, n_days, download_date)) + return fname + + velocity_file = get_velocity_filename(dataset, n_days) + if not os.path.exists(velocity_file): + # Define Data loader: + loader = Armor3d if dataset == 'ARMOR3D' else Glorys + loader = loader(a_box, a_date, n_days=n_days) + # puts(str(loader), color=COLORS.magenta) + + # Load data from Copernicus Marine Data store: + ds = loader.to_xarray() + + # Save on file for later re-used: + ds.to_netcdf(velocity_file) + else: + ds = xr.open_dataset(velocity_file) + + return ds, velocity_file diff --git a/vfrecovery/download_functions/glorys.py b/vfrecovery/downloaders/glorys.py similarity index 100% rename from vfrecovery/download_functions/glorys.py rename to vfrecovery/downloaders/glorys.py diff --git a/vfrecovery/json_functions/VFRschema.py b/vfrecovery/json/VFRschema.py similarity index 100% rename from vfrecovery/json_functions/VFRschema.py rename to vfrecovery/json/VFRschema.py diff --git a/vfrecovery/json_functions/VFRschema_meta.py b/vfrecovery/json/VFRschema_meta.py similarity index 100% rename from vfrecovery/json_functions/VFRschema_meta.py rename to vfrecovery/json/VFRschema_meta.py diff --git a/vfrecovery/json_functions/VFRschema_metrics.py b/vfrecovery/json/VFRschema_metrics.py similarity index 100% rename from vfrecovery/json_functions/VFRschema_metrics.py rename to vfrecovery/json/VFRschema_metrics.py diff --git a/vfrecovery/json_functions/VFRschema_profile.py b/vfrecovery/json/VFRschema_profile.py similarity index 100% rename from vfrecovery/json_functions/VFRschema_profile.py rename to vfrecovery/json/VFRschema_profile.py diff --git a/vfrecovery/json_functions/VFRschema_simulation.py b/vfrecovery/json/VFRschema_simulation.py similarity index 100% rename from vfrecovery/json_functions/VFRschema_simulation.py rename to vfrecovery/json/VFRschema_simulation.py diff --git a/vfrecovery/json_functions/__init__.py b/vfrecovery/json/__init__.py similarity index 100% rename from vfrecovery/json_functions/__init__.py rename to vfrecovery/json/__init__.py diff --git a/vfrecovery/core_functions/__init__.py b/vfrecovery/plots/__init__.py similarity index 100% rename from vfrecovery/core_functions/__init__.py rename to vfrecovery/plots/__init__.py diff --git a/vfrecovery/plots/plot_positions.py b/vfrecovery/plots/plot_positions.py new file mode 100644 index 0000000..311b6fc --- /dev/null +++ b/vfrecovery/plots/plot_positions.py @@ -0,0 +1,59 @@ +import matplotlib.pyplot as plt +import pandas as pd +import cartopy.crs as ccrs +import numpy as np + +from .utils import get_HBOX, map_add_features, map_add_profiles, save_figurefile + + +def figure_positions(this_args, vel, df_sim, df_plan, this_profile, cfg, wmo, cyc, vel_name, + dd=1, save_figure=False, workdir='.'): + # log.debug("Starts figure_positions") + ebox = get_HBOX(df_sim, dd=dd) + nfloats = df_plan.shape[0] + + fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(25, 7), dpi=120, + subplot_kw={'projection': ccrs.PlateCarree()}, + sharex=True, sharey=True) + ax = ax.flatten() + + for ix in [0, 1, 2]: + ax[ix].set_extent(ebox) + ax[ix] = map_add_features(ax[ix]) + + v = vel.field.isel(time=0).interp(depth=cfg.mission['parking_depth']).plot.quiver(x="longitude", + y="latitude", + u=vel.var['U'], + v=vel.var['V'], + ax=ax[ix], + color='grey', + alpha=0.5, + add_guide=False) + + ax[ix].plot(df_sim['deploy_lon'], df_sim['deploy_lat'], '.', + markersize=3, color='grey', alpha=0.1, markeredgecolor=None, zorder=0) + if ix == 0: + title = 'Velocity field at %0.2fm and deployment plan' % cfg.mission['parking_depth'] + v.set_alpha(1) + # v.set_color('black') + elif ix == 1: + x, y, c = df_sim['longitude'], df_sim['latitude'], df_sim['cyc'] + title = 'Final float positions' + # sc = ax[ix].plot(x, y, '.', markersize=3, color='cyan', alpha=0.9, markeredgecolor=None) + sc = ax[ix].scatter(x, y, c=c, s=3, alpha=0.9, edgecolors=None) + elif ix == 2: + x, y, c = df_sim['rel_lon'], df_sim['rel_lat'], df_sim['cyc'] + title = 'Final floats position relative to last float position' + # sc = ax[ix].plot(x, y, '.', markersize=3, color='cyan', alpha=0.9, markeredgecolor=None) + sc = ax[ix].scatter(x, y, c=c, s=3, alpha=0.9, edgecolors=None) + + ax[ix] = map_add_profiles(ax[ix], this_profile) + ax[ix].set_title(title) + + fig.suptitle("VirtualFleet recovery prediction for WMO %i: starting from cycle %i, predicting cycle %s\n%s" % + (wmo, cyc[0], cyc[1:], get_cfg_str(cfg)), fontsize=15) + plt.tight_layout() + if save_figure: + save_figurefile(fig, "vfrecov_positions_%s" % get_sim_suffix(this_args, cfg), workdir) + return fig, ax + diff --git a/vfrecovery/plots/plot_velocity.py b/vfrecovery/plots/plot_velocity.py new file mode 100644 index 0000000..2d59bae --- /dev/null +++ b/vfrecovery/plots/plot_velocity.py @@ -0,0 +1,42 @@ +import matplotlib.pyplot as plt +import pandas as pd +import cartopy.crs as ccrs +import numpy as np + +from .utils import map_add_profiles, map_add_features, save_figurefile + + +def figure_velocity(box, + vel, vel_name, this_profile, wmo, cyc, + save_figure=False, workdir='.'): + """ + + Parameters + ---------- + box + + Returns + ------- + None + """ + fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(20, 20), dpi=100, subplot_kw={'projection': ccrs.PlateCarree()}) + ax.set_extent(box) + ax = map_add_features(ax) + ax = map_add_profiles(ax, this_profile) + + vel.field.isel(time=0, depth=0).plot.quiver(x="longitude", y="latitude", + u=vel.var['U'], v=vel.var['V'], ax=ax, color='grey', alpha=0.5, + add_guide=False) + + txt = "starting from cycle %i, predicting cycle %i" % (cyc[0], cyc[1]) + ax.set_title( + "VirtualFleet recovery system for WMO %i: %s\n" + "%s velocity snapshot to illustrate the simulation domain\n" + "Vectors: Velocity field at z=%0.2fm, t=%s" % + (wmo, txt, vel_name, vel.field['depth'][0].values[np.newaxis][0], + pd.to_datetime(vel.field['time'][0].values).strftime("%Y/%m/%d %H:%M")), fontsize=15) + + plt.tight_layout() + if save_figure: + save_figurefile(fig, 'vfrecov_velocity_%s' % vel_name, workdir) + return fig, ax diff --git a/vfrecovery/plots/utils.py b/vfrecovery/plots/utils.py new file mode 100644 index 0000000..a306697 --- /dev/null +++ b/vfrecovery/plots/utils.py @@ -0,0 +1,129 @@ +import os +import numpy as np +import argopy.plot as argoplot + + +def save_figurefile(this_fig, a_name, folder='.'): + """ + + Parameters + ---------- + this_fig + a_name + + Returns + ------- + path + """ + figname = os.path.join(folder, "%s.png" % a_name) + # log.debug("Saving %s ..." % figname) + this_fig.savefig(figname) + return figname + + +def map_add_profiles(this_ax, this_profile): + """ + + Parameters + ---------- + this_ax + + Returns + ------- + this_ax + """ + this_ax.plot(this_profile['longitude'][0], this_profile['latitude'][0], 'k.', markersize=10, markeredgecolor='w') + if this_profile.shape[0] > 1: + this_ax.plot(this_profile['longitude'][1], this_profile['latitude'][1], 'r.', markersize=10, markeredgecolor='w') + this_ax.arrow(this_profile['longitude'][0], + this_profile['latitude'][0], + this_profile['longitude'][1] - this_profile['longitude'][0], + this_profile['latitude'][1] - this_profile['latitude'][0], + length_includes_head=True, fc='k', ec='k', head_width=0.025, zorder=10) + + return this_ax + + +def map_add_features(this_ax): + """ + + Parameters + ---------- + this_ax + + Returns + ------- + this_ax + """ + argoplot.utils.latlongrid(this_ax) + this_ax.add_feature(argoplot.utils.land_feature, edgecolor="black") + return this_ax + + +def map_add_cyc_nb(this_ax, this_df, lon='lon', lat='lat', cyc='cyc', pos='bt', fs=6, color='black'): + """ Add cycle number labels next to axis + + Parameters + ---------- + ax + df + + Returns + ------- + list of text label + """ + t = [] + if pos == 'bt': + ha, va, label = 'center', 'top', "\n{}".format + if pos == 'tp': + ha, va, label = 'center', 'bottom', "{}\n".format + for irow, row in this_df.iterrows(): + this_t = this_ax.text(row[lon], row[lat], label(int(row[cyc])), ha=ha, va=va, fontsize=fs, color=color) + t.append(this_t) + return t + + +def get_HBOX(df_sim, dd=1): + """ + + Parameters + ---------- + dd: how much to extend maps outward the deployment 'box' + + Returns + ------- + list + """ + rx = df_sim['deploy_lon'].max() - df_sim['deploy_lon'].min() + ry = df_sim['deploy_lat'].max() - df_sim['deploy_lat'].min() + lonc, latc = df_sim['deploy_lon'].mean(), df_sim['deploy_lat'].mean() + box = [lonc - rx / 2, lonc + rx / 2, latc - ry / 2, latc + ry / 2] + ebox = [box[i] + [-dd, dd, -dd, dd][i] for i in range(0, 4)] # Extended 'box' + + return ebox + + +def get_EBOX(df_sim, df_plan, this_profile, s=1): + """Get a box for maps + + Use all data positions from DF_SIM to make sure all points are visible + Extend the domain by a 's' scaling factor of the deployment plan domain + + Parameters + ---------- + s: float, default:1 + + Returns + ------- + list + """ + box = [np.min([df_sim['deploy_lon'].min(), df_sim['longitude'].min(), df_sim['rel_lon'].min(), this_profile['longitude'].min()]), + np.max([df_sim['deploy_lon'].max(), df_sim['longitude'].max(), df_sim['rel_lon'].max(), this_profile['longitude'].max()]), + np.min([df_sim['deploy_lat'].min(), df_sim['latitude'].min(), df_sim['rel_lat'].min(), this_profile['latitude'].min()]), + np.max([df_sim['deploy_lat'].max(), df_sim['latitude'].max(), df_sim['rel_lat'].max(), this_profile['latitude'].max()])] + rx, ry = df_plan['longitude'].max() - df_plan['longitude'].min(), df_plan['latitude'].max() - df_plan['latitude'].min() + r = np.min([rx, ry]) + ebox = [box[0]-s*r, box[1]+s*r, box[2]-s*r, box[3]+s*r] + + return ebox + diff --git a/vfrecovery/python_interface/predict.py b/vfrecovery/python_interface/predict.py index da128b9..dabde0c 100644 --- a/vfrecovery/python_interface/predict.py +++ b/vfrecovery/python_interface/predict.py @@ -1,5 +1,5 @@ import json -from vfrecovery.core_functions.predict import predict_function +from vfrecovery.core.predict import predict_function def predict( @@ -10,8 +10,8 @@ def predict( """ Execute VirtualFleet-Recovery predictor and return results as a JSON string - Inputs - ------ + Parameters + ---------- wmo cyc n_predictions diff --git a/vfrecovery/utilities/__init__.py b/vfrecovery/utils/__init__.py similarity index 100% rename from vfrecovery/utilities/__init__.py rename to vfrecovery/utils/__init__.py diff --git a/vfrecovery/utilities/formatters.py b/vfrecovery/utils/formatters.py similarity index 100% rename from vfrecovery/utilities/formatters.py rename to vfrecovery/utils/formatters.py diff --git a/vfrecovery/utilities/geo.py b/vfrecovery/utils/geo.py similarity index 98% rename from vfrecovery/utilities/geo.py rename to vfrecovery/utils/geo.py index 678e62f..67c9fca 100644 --- a/vfrecovery/utilities/geo.py +++ b/vfrecovery/utils/geo.py @@ -1,5 +1,6 @@ from math import radians, cos, sin, asin, sqrt import pyproj +import numpy as np def haversine(lon1, lat1, lon2, lat2): @@ -61,3 +62,4 @@ def fixLON(x): if x < 0: x = 360 + x return x + diff --git a/vfrecovery/utils/misc.py b/vfrecovery/utils/misc.py new file mode 100644 index 0000000..fe0b26d --- /dev/null +++ b/vfrecovery/utils/misc.py @@ -0,0 +1,37 @@ +import argopy.plot as argoplot +from pathlib import Path + + +def get_package_dir(): + fpath = Path(__file__) + return str(fpath.parent.parent) + + +def get_cfg_str(a_cfg): + txt = "VFloat configuration: (Parking depth: %i [db], Cycle duration: %i [hours], Profile depth: %i [db])" % ( + a_cfg.mission['parking_depth'], + a_cfg.mission['cycle_duration'], + a_cfg.mission['profile_depth'], + ) + return txt + + +def get_sim_suffix(this_args, this_cfg): + """Compose a string suffix for output files""" + # suf = '%s_%i' % (this_args.velocity, this_args.nfloats) + suf = 'VEL%s_NF%i_CYCDUR%i_PARKD%i_PROFD%i_SFD%i' % (this_args.velocity, + this_args.nfloats, + int(this_cfg.mission['cycle_duration']), + int(this_cfg.mission['parking_depth']), + int(this_cfg.mission['profile_depth']), + int(this_cfg.mission['reco_free_surface_drift'])) + return suf + + +def get_ea_profile_page_url(wmo, cyc): + try: + url = argoplot.dashboard(wmo, cyc, url_only=True) + except: + # log.info("EA dashboard page not available for this profile: %i/%i" % (wmo, cyc)) + url = "404" + return url From 7255bd117d44cc11e49c662f2a019256a939ba8e Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 15 Mar 2024 16:14:01 +0100 Subject: [PATCH 06/38] let's do this --- vfrecovery/__init__.py | 16 + .../command_line_interface/group_predict.py | 121 +++- vfrecovery/command_line_interface/utils.py | 10 - vfrecovery/core/predict.py | 596 +++++++++++------- vfrecovery/core/utils.py | 47 ++ vfrecovery/json/VFRschema.py | 2 +- vfrecovery/json/VFRschema_meta.py | 2 +- vfrecovery/json/VFRschema_metrics.py | 2 +- vfrecovery/json/VFRschema_profile.py | 45 +- vfrecovery/json/VFRschema_simulation.py | 6 +- vfrecovery/json/__init__.py | 5 +- vfrecovery/logging_conf.json | 60 ++ vfrecovery/python_interface/predict.py | 25 +- vfrecovery/utils/formatters.py | 10 + 14 files changed, 676 insertions(+), 271 deletions(-) create mode 100644 vfrecovery/core/utils.py create mode 100644 vfrecovery/logging_conf.json diff --git a/vfrecovery/__init__.py b/vfrecovery/__init__.py index f6db858..6ae85fb 100644 --- a/vfrecovery/__init__.py +++ b/vfrecovery/__init__.py @@ -1,4 +1,20 @@ +import json +import logging.config +import time +import pathlib from importlib.metadata import version + + __version__ = version("vfrecovery") +log_configuration_dict = json.load( + open( + pathlib.Path( + pathlib.Path(__file__).parent, "logging_conf.json" + ) + ) +) +logging.config.dictConfig(log_configuration_dict) +logging.Formatter.converter = time.gmtime + from vfrecovery.python_interface.predict import predict diff --git a/vfrecovery/command_line_interface/group_predict.py b/vfrecovery/command_line_interface/group_predict.py index e6ee196..50fdc6a 100644 --- a/vfrecovery/command_line_interface/group_predict.py +++ b/vfrecovery/command_line_interface/group_predict.py @@ -1,11 +1,17 @@ import click -from typing import Union, List +import logging + from vfrecovery.core.predict import predict_function +root_logger = logging.getLogger("vfrecovery_root_logger") +blank_logger = logging.getLogger("vfrecovery_blank_logger") + + @click.group() def cli_group_predict() -> None: pass + @cli_group_predict.command( "predict", short_help="Execute VirtualFleet-Recovery predictions", @@ -18,22 +24,111 @@ def cli_group_predict() -> None: \b vfrecovery predict 6903091 112 """, # noqa - ) +) +@click.option( + "--velocity", + type=str, + required=False, + default='GLORYS', + show_default=True, + help="Velocity field to use. Possible values are: 'GLORYS', 'ARMOR3D'", +) +@click.option( + "--output_path", + type=str, + required=False, + default=None, + help="Simulation output folder [default: './vfrecovery_data//']", +) +# @click.option( +# "-v", "--verbose", +# type=bool, +# required=False, +# is_flag=True, +# default=True, +# show_default=True, +# help="Display verbose information along the execution", +# ) +@click.option( + "--cfg-parking-depth", + type=float, + required=False, + default=None, + show_default=False, + help="Virtual floats parking depth in db [default: previous cycle value]", +) +@click.option( + "--cfg-cycle-duration", + type=float, + required=False, + default=None, + show_default=False, + help="Virtual floats cycle duration in hours [default: previous cycle value]", +) +@click.option( + "--cfg-profile-depth", + type=float, + required=False, + default=None, + show_default=False, + help="Virtual floats profile depth in db [default: previous cycle value]", +) +@click.option( + "--cfg-free-surface-drift", + type=int, + required=False, + default=9999, + show_default=True, + help="Virtual cycle number to start free surface drift, inclusive", +) @click.option( "-n", "--n_predictions", type=int, required=False, - default=1, + default=0, show_default=True, - is_flag=False, - help="Number of profiles to simulate", + help="Number of profiles to simulate after cycle specified with argument 'CYC'", ) -@click.argument('WMO') -@click.argument('CYC', nargs=-1) +@click.option( + "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), + default="INFO", + show_default=True, + help=( + "Set the details printed to console by the command " + "(based on standard logging library)." + ), +) +@click.argument('WMO', nargs=1, type=int) +@click.argument('CYC', nargs=1, type=int) def predict( - wmo: int, - cyc: Union[int, List], - n_predictions) -> None: - # click.echo(f"Prediction for {wmo} {cyc}") - results = predict_function(wmo, cyc, n_predictions=n_predictions) - click.echo(results) + wmo, + cyc, + velocity, + output_path, + n_predictions, + cfg_parking_depth, + cfg_cycle_duration, + cfg_profile_depth, + cfg_free_surface_drift, + log_level, +) -> None: + if log_level == "QUIET": + root_logger.disabled = True + log_level = "CRITICAL" + root_logger.setLevel(level=getattr(logging, log_level.upper())) + + if root_logger.isEnabledFor(logging.DEBUG): + root_logger.debug("DEBUG mode activated") + + # + json_dump = predict_function(wmo, cyc, + velocity=velocity, + output_path=output_path, + n_predictions=n_predictions, + cfg_parking_depth=cfg_parking_depth, + cfg_cycle_duration=cfg_cycle_duration, + cfg_profile_depth=cfg_profile_depth, + cfg_free_surface_drift=cfg_free_surface_drift, + log_level=log_level) + blank_logger.info(json_dump) diff --git a/vfrecovery/command_line_interface/utils.py b/vfrecovery/command_line_interface/utils.py index c348565..5d799aa 100644 --- a/vfrecovery/command_line_interface/utils.py +++ b/vfrecovery/command_line_interface/utils.py @@ -6,15 +6,6 @@ PREF = "\033[" RESET = f"{PREF}0m" -class COLORS: - black = "30m" - red = "31m" - green = "32m" - yellow = "33m" - blue = "34m" - magenta = "35m" - cyan = "36m" - white = "37m" def puts(text, color=None, bold=False, file=sys.stdout): @@ -34,4 +25,3 @@ def puts(text, color=None, bold=False, file=sys.stdout): txt = f'{PREF}{1 if bold else 0};{color}' + text + RESET print(txt, file=file) log.info(text) - diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index d38f11f..06f846b 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -1,278 +1,400 @@ +import time +from argopy.utils import is_wmo, is_cyc, check_cyc, check_wmo +import argopy.plot as argoplot +from virtualargofleet import Velocity, VirtualFleet, FloatConfiguration, ConfigParam +from pathlib import Path +from typing import Union +import pandas as pd +import os +import logging +import json + +from vfrecovery.json import Profile, MetaData +from vfrecovery.utils.formatters import COLORS +from .utils import df_obs2jsProfile, ArgoIndex2df, ArgoIndex2JsProfile + +root_logger = logging.getLogger("vfrecovery_root_logger") +sim_logger = logging.getLogger("vfrecovery_simulation") + +class log_this: + + def __init__(self, txt, log_level): + """Log text to simulation and possibly root logger(s)""" + getattr(root_logger, log_level.lower())(txt) + getattr(sim_logger, log_level.lower())(txt) + + @staticmethod + def info(txt) -> 'log_this': + return log_this(txt, 'INFO') + + @staticmethod + def debug(txt) -> 'log_this': + return log_this(txt, 'DEBUG') + + @staticmethod + def warning(txt) -> 'log_this': + return log_this(txt, 'WARNING') + + @staticmethod + def error(txt) -> 'log_this': + return log_this(txt, 'ERROR') + def predict_function( wmo: int, cyc: int, - n_predictions: int = 1, -): + velocity: str, + n_predictions: int, + output_path: Union[str, Path], + cfg_parking_depth: float, + cfg_cycle_duration: float, + cfg_profile_depth: float, + cfg_free_surface_drift: int, + log_level: str, +) -> str: """ - Execute VirtualFleet-Recovery predictor and return results as a JSON string + Execute VirtualFleet-Recovery predictor and save results as a JSON string Parameters ---------- wmo cyc + velocity n_predictions + output_path + log_level Returns ------- - data + str: a JSON formatted str """ # noqa - return {'wmo': wmo, 'cyc': cyc} + if log_level == "QUIET": + root_logger.disabled = True + log_level = "CRITICAL" + root_logger.setLevel(level=getattr(logging, log_level.upper())) + # print('DEBUG', logging.DEBUG) + # print('INFO', logging.INFO) + # print('WARNING', logging.WARNING) + # print('ERROR', logging.ERROR) + # print('root_logger', root_logger.getEffectiveLevel()) + # print(root_logger.isEnabledFor(logging.INFO)) - -def predictor(args): - """Prediction manager""" execution_start = time.time() process_start = time.process_time() - if is_wmo(args.wmo): - WMO = args.wmo - if is_cyc(args.cyc): - CYC = [check_cyc(args.cyc)[0]-1] - [CYC.append(c) for c in check_cyc(args.cyc)] - if args.velocity not in ['ARMOR3D', 'GLORYS']: + # Validate arguments: + assert is_wmo(wmo) + assert is_cyc(cyc) + wmo = check_wmo(wmo)[0] + cyc = check_cyc(cyc)[0] + + if velocity.upper() not in ['ARMOR3D', 'GLORYS']: raise ValueError("Velocity field must be one in: ['ARMOR3D', 'GLORYS']") else: - VEL_NAME = args.velocity.upper() + velocity = velocity.upper() + + # Prepend previous cycle number that will be used as initial conditions for the prediction of `cyc`: + cyc = [cyc - 1, cyc] + + if output_path is None: + # output_path = "vfrecovery_sims" % pd.to_datetime('now', utc=True).strftime("%Y%m%d%H%M%S") + output_path = os.path.sep.join(["vfrecovery_data", str(wmo), str(cyc[1])]) + output_path = Path(output_path) + output_path.mkdir(parents=True, exist_ok=True) + + # Set-up simulation logger + simlogfile = logging.FileHandler(os.path.join(output_path, "vfpred.log"), mode='a') + simlogfile.setFormatter(logging.Formatter("%(asctime)s | %(levelname)s | %(name)s:%(filename)s:%(lineno)d | %(message)s", + datefmt='%Y/%m/%d %I:%M:%S')) + sim_logger.handlers = [] + sim_logger.addHandler(simlogfile) + # log_this.info("This is INFO") + # log_this.warning("This is WARN") + # log_this.debug("This is DEBUG") + # log_this.error("This is ERROR") + log_this.info("\n\nSTARTING NEW SIMULATION: WMO=%i / CYCLE_NUMBER=%i\n" % (wmo, cyc[1])) - puts('CYC = %s' % CYC, color=COLORS.magenta) - # raise ValueError('stophere') - - if args.save_figure: - mplbackend = matplotlib.get_backend() - matplotlib.use('Agg') - - # Where do we find the VirtualFleet repository ? - if not args.vf: - if os.uname()[1] == 'data-app-virtualfleet-recovery': - euroargodev = os.path.expanduser('/home/ubuntu') - else: - euroargodev = os.path.expanduser('~/git/github/euroargodev') - else: - euroargodev = os.path.abspath(args.vf) - if not os.path.exists(os.path.join(euroargodev, "VirtualFleet")): - raise ValueError("VirtualFleet can't be found at '%s'" % euroargodev) - - # Import the VirtualFleet library - sys.path.insert(0, os.path.join(euroargodev, "VirtualFleet")) - from virtualargofleet import Velocity, VirtualFleet, FloatConfiguration, ConfigParam - # from virtualargofleet.app_parcels import ArgoParticle - - # Set up the working directory: - if not args.output: - WORKDIR = os.path.sep.join([get_package_dir(), "webapi", "myapp", "static", "data", str(WMO), str(CYC[1])]) + # + url = argoplot.dashboard(wmo, url_only=True) + txt = "You can check this float dashboard while we prepare the prediction: %s" % url + log_this.info(txt) + + # Load observed float profiles index + log_this.debug("Loading float profiles index ...") + df_obs = ArgoIndex2df(wmo, cyc) + P_obs = df_obs2jsProfile(df_obs) + # P_obs = ArgoIndex2JsProfile(wmo, cyc) + # THIS_DATE = P_obs[0].location.time + # CENTER = [P_obs[0].location.longitude, P_obs[0].location.latitude] + + log_this.debug("Profiles to work with:\n%s" % df_obs[['date', 'latitude', 'longitude', 'wmo', 'cyc', 'institution']].to_string(max_colwidth=35)) + if df_obs.shape[0] == 1: + log_this.info('Real-case scenario: True position unknown !') else: - WORKDIR = os.path.sep.join([args.output, str(WMO), str(CYC[1])]) - WORKDIR = os.path.abspath(WORKDIR) - if not os.path.exists(WORKDIR): - os.makedirs(WORKDIR) - args.output = WORKDIR - - if not args.json: - puts("\nData will be saved in:") - puts("\t%s" % WORKDIR, color=COLORS.green) - - # Set-up logger - logging.basicConfig( - level=logging.DEBUG, - format=DEBUGFORMATTER, - datefmt='%m/%d/%Y %I:%M:%S %p', - handlers=[logging.FileHandler(os.path.join(WORKDIR, "vfpred.log"), mode='a')] - ) - - # Load these profiles' information: - if not args.json: - puts("\nYou can check this float dashboard while we prepare the prediction:") - puts("\t%s" % argoplot.dashboard(WMO, url_only=True), color=COLORS.green) - puts("\nLoading float profiles index ...") - host = "https://data-argo.ifremer.fr" - # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" - # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" - THIS_PROFILE = store(host=host).search_wmo_cyc(WMO, CYC).to_dataframe() - THIS_DATE = pd.to_datetime(THIS_PROFILE['date'].values[0], utc=True) - CENTER = [THIS_PROFILE['longitude'].values[0], THIS_PROFILE['latitude'].values[0]] - if not args.json: - puts("\nProfiles to work with:") - puts(THIS_PROFILE.to_string(max_colwidth=15), color=COLORS.green) - if THIS_PROFILE.shape[0] == 1: - puts('\nReal-case scenario: True position unknown !', color=COLORS.yellow) - else: - puts('\nEvaluation scenario: historical position known', color=COLORS.yellow) + log_this.info('Evaluation scenario: historical position known') # Load real float configuration at the previous cycle: - if not args.json: - puts("\nLoading float configuration...") + log_this.debug("\nLoading float configuration...") try: - CFG = FloatConfiguration([WMO, CYC[0]]) + CFG = FloatConfiguration([wmo, cyc[0]]) except: - if not args.json: - puts("Can't load this profile config, falling back on default values", color=COLORS.red) + log_this.info("Can't load this profile config, falling back on default values") CFG = FloatConfiguration('default') - if args.cfg_parking_depth is not None: - puts("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], - float(args.cfg_parking_depth))) - CFG.update('parking_depth', float(args.cfg_parking_depth)) + if cfg_parking_depth is not None: + log_this.debug("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], + float(cfg_parking_depth))) + CFG.update('parking_depth', float(cfg_parking_depth)) - if args.cfg_cycle_duration is not None: - puts("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], - float(args.cfg_cycle_duration))) - CFG.update('cycle_duration', float(args.cfg_cycle_duration)) + if cfg_cycle_duration is not None: + log_this.debug("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], + float(cfg_cycle_duration))) + CFG.update('cycle_duration', float(cfg_cycle_duration)) - if args.cfg_profile_depth is not None: - puts("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], - float(args.cfg_profile_depth))) - CFG.update('profile_depth', float(args.cfg_profile_depth)) + if cfg_profile_depth is not None: + log_this.debug("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], + float(cfg_profile_depth))) + CFG.update('profile_depth', float(cfg_profile_depth)) CFG.params = ConfigParam(key='reco_free_surface_drift', - value=int(args.cfg_free_surface_drift), + value=int(cfg_free_surface_drift), unit='cycle', description='First cycle with free surface drift', dtype=int) # Save virtual float configuration on file: - CFG.to_json(os.path.join(WORKDIR, "floats_configuration_%s.json" % get_sim_suffix(args, CFG))) - - if not args.json: - puts("\n".join(["\t%s" % line for line in CFG.__repr__().split("\n")]), color=COLORS.green) - - # Get the cycling frequency (in days, this is more a period then...): - CYCLING_FREQUENCY = int(np.round(CFG.mission['cycle_duration']/24)) - - # Define domain to load velocity for, and get it: - width = args.domain_size + np.abs(np.ceil(THIS_PROFILE['longitude'].values[-1] - CENTER[0])) - height = args.domain_size + np.abs(np.ceil(THIS_PROFILE['latitude'].values[-1] - CENTER[1])) - VBOX = [CENTER[0] - width / 2, CENTER[0] + width / 2, CENTER[1] - height / 2, CENTER[1] + height / 2] - N_DAYS = (len(CYC)-1)*CYCLING_FREQUENCY+1 - if not args.json: - puts("\nLoading %s velocity field to cover %i days..." % (VEL_NAME, N_DAYS)) - ds_vel, velocity_file = get_velocity_field(VBOX, THIS_DATE, - n_days=N_DAYS, - output=WORKDIR, - dataset=VEL_NAME) - VEL = Velocity(model='GLORYS12V1' if VEL_NAME == 'GLORYS' else VEL_NAME, src=ds_vel) - if not args.json: - puts("\n\t%s" % str(ds_vel), color=COLORS.green) - puts("\n\tLoaded velocity field from %s to %s" % - (pd.to_datetime(ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), - pd.to_datetime(ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")), color=COLORS.green) - figure_velocity(VBOX, VEL, VEL_NAME, THIS_PROFILE, WMO, CYC, save_figure=args.save_figure, workdir=WORKDIR) - - # raise ValueError('stophere') - - # VirtualFleet, get a deployment plan: - if not args.json: - puts("\nVirtualFleet, get a deployment plan...") - DF_PLAN = setup_deployment_plan(CENTER, THIS_DATE, nfloats=args.nfloats) - PLAN = {'lon': DF_PLAN['longitude'], - 'lat': DF_PLAN['latitude'], - 'time': np.array([np.datetime64(t) for t in DF_PLAN['date'].dt.strftime('%Y-%m-%d %H:%M').array]), - } - if not args.json: - puts("\t%i virtual floats to deploy" % DF_PLAN.shape[0], color=COLORS.green) - - # Set up VirtualFleet: - if not args.json: - puts("\nVirtualFleet, set-up the fleet...") - VFleet = VirtualFleet(plan=PLAN, - fieldset=VEL, - mission=CFG) - - # VirtualFleet, execute the simulation: - if not args.json: - puts("\nVirtualFleet, execute the simulation...") - - # Remove traj file if exists: - output_path = os.path.join(WORKDIR, 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) - # if os.path.exists(output_path): - # shutil.rmtree(output_path) + # CFG.to_json(os.path.join(output_path, "floats_configuration_%s.json" % get_sim_suffix(args, CFG))) + + # if not args.json: + # puts("\n".join(["\t%s" % line for line in CFG.__repr__().split("\n")]), color=COLORS.green) # - # VFleet.simulate(duration=timedelta(hours=N_DAYS*24+1), - # step=timedelta(minutes=5), - # record=timedelta(minutes=30), - # output=True, - # output_folder=WORKDIR, - # output_file='trajectories_%s.zarr' % get_sim_suffix(args, CFG), - # verbose_progress=not args.json, - # ) - - # VirtualFleet, get simulated profiles index: - if not args.json: - puts("\nExtract swarm profiles index...") - - T = Trajectories(WORKDIR + "/" + 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) - DF_SIM = T.get_index().add_distances(origin=[THIS_PROFILE['longitude'].values[0], THIS_PROFILE['latitude'].values[0]]) - if not args.json: - puts(str(T), color=COLORS.magenta) - puts(DF_SIM.head().to_string(), color=COLORS.green) - figure_positions(args, VEL, DF_SIM, DF_PLAN, THIS_PROFILE, CFG, WMO, CYC, VEL_NAME, - dd=1, save_figure=args.save_figure, workdir=WORKDIR) - - # Recovery, make predictions based on simulated profile density: - SP = SimPredictor(DF_SIM, THIS_PROFILE) - if not args.json: - puts("\nPredict float cycle position(s) from swarm simulation...", color=COLORS.white) - puts(str(SP), color=COLORS.magenta) - SP.fit_predict() - SP.add_metrics(VEL) - SP.plot_predictions(VEL, - CFG, - sim_suffix=get_sim_suffix(args, CFG), - save_figure=args.save_figure, - workdir=WORKDIR, - orient='portrait') - results = SP.predictions - - # Recovery, compute more swarm metrics: - for this_cyc in T.sim_cycles: - jsmetrics, fig, ax = T.analyse_pairwise_distances(cycle=this_cyc, - save_figure=True, - this_args=args, - this_cfg=CFG, - sim_suffix=get_sim_suffix(args, CFG), - workdir=WORKDIR, - ) - if 'metrics' in results['predictions'][this_cyc]: - for key in jsmetrics.keys(): - results['predictions'][this_cyc]['metrics'].update({key: jsmetrics[key]}) - else: - results['predictions'][this_cyc].update({'metrics': jsmetrics}) - - # Recovery, finalize JSON output: - execution_end = time.time() - process_end = time.process_time() - computation = { - 'Date': pd.to_datetime('now', utc=True), - 'Wall-time': pd.Timedelta(execution_end - execution_start, 's'), - 'CPU-time': pd.Timedelta(process_end - process_start, 's'), - 'system': getSystemInfo() - } - results['meta'] = {'Velocity field': VEL_NAME, - 'Nfloats': args.nfloats, - 'Computation': computation, - 'VFloats_config': CFG.to_json(), - } - - if not args.json: - puts("\nPredictions:") - results_js = json.dumps(results, indent=4, sort_keys=True, default=str) - - with open(os.path.join(WORKDIR, 'prediction_%s.json' % get_sim_suffix(args, CFG)), 'w', encoding='utf-8') as f: - json.dump(results, f, ensure_ascii=False, indent=4, default=str, sort_keys=True) - - if not args.json: - puts(results_js, color=COLORS.green) - puts("\nCheck results at:") - puts("\t%s" % WORKDIR, color=COLORS.green) - - if args.save_figure: - plt.close('all') - # Restore Matplotlib backend - matplotlib.use(mplbackend) - - if not args.save_sim: - shutil.rmtree(output_path) - - return results_js + + # + MD = MetaData.from_dict({ + 'nfloats': 0, + 'velocity_field': velocity, + 'vfconfig': CFG, + 'computation': None + }) + return MD.to_json() + + output = {'wmo': wmo, 'cyc': cyc, 'velocity': velocity, 'n_predictions': n_predictions, 'cfg': CFG.to_json(indent=0)} + json_dump = json.dumps( + output, sort_keys=False, indent=2 + ) + return json_dump + + + + +# def predictor(args): +# """Prediction manager""" + +# if is_cyc(args.cyc): +# CYC = [check_cyc(args.cyc)[0]-1] +# [CYC.append(c) for c in check_cyc(args.cyc)] +# +# puts('CYC = %s' % CYC, color=COLORS.magenta) +# # raise ValueError('stophere') +# +# if args.save_figure: +# mplbackend = matplotlib.get_backend() +# matplotlib.use('Agg') + +# # Load these profiles' information: +# if not args.json: +# puts("\nYou can check this float dashboard while we prepare the prediction:") +# puts("\t%s" % argoplot.dashboard(WMO, url_only=True), color=COLORS.green) +# puts("\nLoading float profiles index ...") +# host = "https://data-argo.ifremer.fr" +# # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" +# # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" +# THIS_PROFILE = store(host=host).search_wmo_cyc(WMO, CYC).to_dataframe() +# THIS_DATE = pd.to_datetime(THIS_PROFILE['date'].values[0], utc=True) +# CENTER = [THIS_PROFILE['longitude'].values[0], THIS_PROFILE['latitude'].values[0]] +# if not args.json: +# puts("\nProfiles to work with:") +# puts(THIS_PROFILE.to_string(max_colwidth=15), color=COLORS.green) +# if THIS_PROFILE.shape[0] == 1: +# puts('\nReal-case scenario: True position unknown !', color=COLORS.yellow) +# else: +# puts('\nEvaluation scenario: historical position known', color=COLORS.yellow) +# +# # Load real float configuration at the previous cycle: +# if not args.json: +# puts("\nLoading float configuration...") +# try: +# CFG = FloatConfiguration([WMO, CYC[0]]) +# except: +# if not args.json: +# puts("Can't load this profile config, falling back on default values", color=COLORS.red) +# CFG = FloatConfiguration('default') +# +# if args.cfg_parking_depth is not None: +# puts("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], +# float(args.cfg_parking_depth))) +# CFG.update('parking_depth', float(args.cfg_parking_depth)) +# +# if args.cfg_cycle_duration is not None: +# puts("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], +# float(args.cfg_cycle_duration))) +# CFG.update('cycle_duration', float(args.cfg_cycle_duration)) +# +# if args.cfg_profile_depth is not None: +# puts("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], +# float(args.cfg_profile_depth))) +# CFG.update('profile_depth', float(args.cfg_profile_depth)) +# +# CFG.params = ConfigParam(key='reco_free_surface_drift', +# value=int(args.cfg_free_surface_drift), +# unit='cycle', +# description='First cycle with free surface drift', +# dtype=int) +# +# # Save virtual float configuration on file: +# CFG.to_json(os.path.join(WORKDIR, "floats_configuration_%s.json" % get_sim_suffix(args, CFG))) +# +# if not args.json: +# puts("\n".join(["\t%s" % line for line in CFG.__repr__().split("\n")]), color=COLORS.green) +# +# # Get the cycling frequency (in days, this is more a period then...): +# CYCLING_FREQUENCY = int(np.round(CFG.mission['cycle_duration']/24)) +# +# # Define domain to load velocity for, and get it: +# width = args.domain_size + np.abs(np.ceil(THIS_PROFILE['longitude'].values[-1] - CENTER[0])) +# height = args.domain_size + np.abs(np.ceil(THIS_PROFILE['latitude'].values[-1] - CENTER[1])) +# VBOX = [CENTER[0] - width / 2, CENTER[0] + width / 2, CENTER[1] - height / 2, CENTER[1] + height / 2] +# N_DAYS = (len(CYC)-1)*CYCLING_FREQUENCY+1 +# if not args.json: +# puts("\nLoading %s velocity field to cover %i days..." % (VEL_NAME, N_DAYS)) +# ds_vel, velocity_file = get_velocity_field(VBOX, THIS_DATE, +# n_days=N_DAYS, +# output=WORKDIR, +# dataset=VEL_NAME) +# VEL = Velocity(model='GLORYS12V1' if VEL_NAME == 'GLORYS' else VEL_NAME, src=ds_vel) +# if not args.json: +# puts("\n\t%s" % str(ds_vel), color=COLORS.green) +# puts("\n\tLoaded velocity field from %s to %s" % +# (pd.to_datetime(ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), +# pd.to_datetime(ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")), color=COLORS.green) +# figure_velocity(VBOX, VEL, VEL_NAME, THIS_PROFILE, WMO, CYC, save_figure=args.save_figure, workdir=WORKDIR) +# +# # raise ValueError('stophere') +# +# # VirtualFleet, get a deployment plan: +# if not args.json: +# puts("\nVirtualFleet, get a deployment plan...") +# DF_PLAN = setup_deployment_plan(CENTER, THIS_DATE, nfloats=args.nfloats) +# PLAN = {'lon': DF_PLAN['longitude'], +# 'lat': DF_PLAN['latitude'], +# 'time': np.array([np.datetime64(t) for t in DF_PLAN['date'].dt.strftime('%Y-%m-%d %H:%M').array]), +# } +# if not args.json: +# puts("\t%i virtual floats to deploy" % DF_PLAN.shape[0], color=COLORS.green) +# +# # Set up VirtualFleet: +# if not args.json: +# puts("\nVirtualFleet, set-up the fleet...") +# VFleet = VirtualFleet(plan=PLAN, +# fieldset=VEL, +# mission=CFG) +# +# # VirtualFleet, execute the simulation: +# if not args.json: +# puts("\nVirtualFleet, execute the simulation...") +# +# # Remove traj file if exists: +# output_path = os.path.join(WORKDIR, 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) +# # if os.path.exists(output_path): +# # shutil.rmtree(output_path) +# # +# # VFleet.simulate(duration=timedelta(hours=N_DAYS*24+1), +# # step=timedelta(minutes=5), +# # record=timedelta(minutes=30), +# # output=True, +# # output_folder=WORKDIR, +# # output_file='trajectories_%s.zarr' % get_sim_suffix(args, CFG), +# # verbose_progress=not args.json, +# # ) +# +# # VirtualFleet, get simulated profiles index: +# if not args.json: +# puts("\nExtract swarm profiles index...") +# +# T = Trajectories(WORKDIR + "/" + 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) +# DF_SIM = T.get_index().add_distances(origin=[THIS_PROFILE['longitude'].values[0], THIS_PROFILE['latitude'].values[0]]) +# if not args.json: +# puts(str(T), color=COLORS.magenta) +# puts(DF_SIM.head().to_string(), color=COLORS.green) +# figure_positions(args, VEL, DF_SIM, DF_PLAN, THIS_PROFILE, CFG, WMO, CYC, VEL_NAME, +# dd=1, save_figure=args.save_figure, workdir=WORKDIR) +# +# # Recovery, make predictions based on simulated profile density: +# SP = SimPredictor(DF_SIM, THIS_PROFILE) +# if not args.json: +# puts("\nPredict float cycle position(s) from swarm simulation...", color=COLORS.white) +# puts(str(SP), color=COLORS.magenta) +# SP.fit_predict() +# SP.add_metrics(VEL) +# SP.plot_predictions(VEL, +# CFG, +# sim_suffix=get_sim_suffix(args, CFG), +# save_figure=args.save_figure, +# workdir=WORKDIR, +# orient='portrait') +# results = SP.predictions +# +# # Recovery, compute more swarm metrics: +# for this_cyc in T.sim_cycles: +# jsmetrics, fig, ax = T.analyse_pairwise_distances(cycle=this_cyc, +# save_figure=True, +# this_args=args, +# this_cfg=CFG, +# sim_suffix=get_sim_suffix(args, CFG), +# workdir=WORKDIR, +# ) +# if 'metrics' in results['predictions'][this_cyc]: +# for key in jsmetrics.keys(): +# results['predictions'][this_cyc]['metrics'].update({key: jsmetrics[key]}) +# else: +# results['predictions'][this_cyc].update({'metrics': jsmetrics}) +# +# # Recovery, finalize JSON output: +# execution_end = time.time() +# process_end = time.process_time() +# computation = { +# 'Date': pd.to_datetime('now', utc=True), +# 'Wall-time': pd.Timedelta(execution_end - execution_start, 's'), +# 'CPU-time': pd.Timedelta(process_end - process_start, 's'), +# 'system': getSystemInfo() +# } +# results['meta'] = {'Velocity field': VEL_NAME, +# 'Nfloats': args.nfloats, +# 'Computation': computation, +# 'VFloats_config': CFG.to_json(), +# } +# +# if not args.json: +# puts("\nPredictions:") +# results_js = json.dumps(results, indent=4, sort_keys=True, default=str) +# +# with open(os.path.join(WORKDIR, 'prediction_%s.json' % get_sim_suffix(args, CFG)), 'w', encoding='utf-8') as f: +# json.dump(results, f, ensure_ascii=False, indent=4, default=str, sort_keys=True) +# +# if not args.json: +# puts(results_js, color=COLORS.green) +# puts("\nCheck results at:") +# puts("\t%s" % WORKDIR, color=COLORS.green) +# +# if args.save_figure: +# plt.close('all') +# # Restore Matplotlib backend +# matplotlib.use(mplbackend) +# +# if not args.save_sim: +# shutil.rmtree(output_path) +# +# return results_js +# diff --git a/vfrecovery/core/utils.py b/vfrecovery/core/utils.py new file mode 100644 index 0000000..c8b135c --- /dev/null +++ b/vfrecovery/core/utils.py @@ -0,0 +1,47 @@ +import pandas as pd +from typing import List +from argopy import ArgoIndex +import argopy.plot as argoplot +from vfrecovery.json import Profile + + +def ArgoIndex2df(a_wmo, a_cyc) -> pd.DataFrame: + """Retrieve WMO/CYC Argo index entries as :class:`pd.DataFrame` + + Parameters + ---------- + wmo: int + cyc: Union(int, List) + + Returns + ------- + :class:`pd.DataFrame` + """ + host = "https://data-argo.ifremer.fr" + # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" + # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" + df = ArgoIndex(host=host).search_wmo_cyc(a_wmo, a_cyc).to_dataframe() + return df + + +def df_obs2jsProfile(df_obs) -> List[Profile]: + Plist = Profile.from_ArgoIndex(df_obs) + for P in Plist: + P.description = "Observed Argo profile" + return Plist + + +def ArgoIndex2JsProfile(a_wmo, a_cyc) -> List[Profile]: + """Retrieve WMO/CYC Argo index entries as a list of :class:`vfrecovery.json.Profile` + + Parameters + ---------- + wmo: int + cyc: Union(int, List) + + Returns + ------- + :class:`vfrecovery.json.Profile` + """ + df_obs = ArgoIndex2df(a_wmo, a_cyc) + return df_obs2jsProfile(df_obs) diff --git a/vfrecovery/json/VFRschema.py b/vfrecovery/json/VFRschema.py index c0e53f0..09e225b 100644 --- a/vfrecovery/json/VFRschema.py +++ b/vfrecovery/json/VFRschema.py @@ -135,7 +135,7 @@ def _is_datetime(self, x, name='?'): name, type(x)) def _is_integer(self, x, name='?'): - assert isinstance(x, int), "'%s' must be an integer, got '%s'" % (name, type(x)) + assert isinstance(x, (int, np.integer)), "'%s' must be an integer, got '%s'" % (name, type(x)) def _is_timedelta(self, x, name='?'): assert isinstance(x, (pd.Timedelta)), "'%s' must be castable with pd.to_timedelta, got '%s'" % (name, type(x)) diff --git a/vfrecovery/json/VFRschema_meta.py b/vfrecovery/json/VFRschema_meta.py index b0c5cbb..5989ad4 100644 --- a/vfrecovery/json/VFRschema_meta.py +++ b/vfrecovery/json/VFRschema_meta.py @@ -5,7 +5,7 @@ import socket import psutil -from VFRschema import VFvalidators +from .VFRschema import VFvalidators from virtualargofleet.utilities import VFschema_configuration diff --git a/vfrecovery/json/VFRschema_metrics.py b/vfrecovery/json/VFRschema_metrics.py index 37c2e1c..802fda1 100644 --- a/vfrecovery/json/VFRschema_metrics.py +++ b/vfrecovery/json/VFRschema_metrics.py @@ -47,7 +47,7 @@ """ from typing import List, Dict -from VFRschema import VFvalidators +from .VFRschema import VFvalidators class ArrayMetric(VFvalidators): diff --git a/vfrecovery/json/VFRschema_profile.py b/vfrecovery/json/VFRschema_profile.py index 15d6515..73269ae 100644 --- a/vfrecovery/json/VFRschema_profile.py +++ b/vfrecovery/json/VFRschema_profile.py @@ -1,7 +1,9 @@ import pandas as pd from typing import List, Dict -from VFRschema import VFvalidators -from VFRschema_metrics import Metrics +import argopy.plot as argoplot + +from .VFRschema import VFvalidators +from .VFRschema_metrics import Metrics class Location(VFvalidators): @@ -15,6 +17,13 @@ class Location(VFvalidators): required: List = ["longitude", "latitude"] def __init__(self, **kwargs): + """ + Parameters + ---------- + longitude: float + latitude: float + time: pd.Timestamp + """ super().__init__(**kwargs) if 'time' not in kwargs: # setattr(self, 'time', pd.to_datetime('now', utc=True)) @@ -44,11 +53,43 @@ class Profile(VFvalidators): properties: List = ["location", "cycle_number", "wmo", "url_float", "url_profile", "virtual_cycle_number", "metrics", "description"] def __init__(self, **kwargs): + """ + + Parameters + ---------- + location: Location + wmo: int + cycle_number: int + url_float: str + url_profile: str + virtual_cycle_number: int + metrics: Metrics + + """ super().__init__(**kwargs) self._validate_wmo(self.wmo) self._validate_cycle_number(self.cycle_number) self._validate_cycle_number(self.virtual_cycle_number) + if isinstance(kwargs['location'], dict): + self.location = Location.from_dict(kwargs['location']) @staticmethod def from_dict(obj: Dict) -> 'Profile': return Profile(**obj) + + @staticmethod + def from_ArgoIndex(df: pd.DataFrame) -> List['Profile']: + Plist = [] + for irow, this_obs in df.iterrows(): + p = Profile.from_dict({ + 'location': Location.from_dict({'longitude': this_obs['longitude'], + 'latitude': this_obs['latitude'], + 'time': this_obs['date'] + }), + 'wmo': this_obs['wmo'], + 'cyc': this_obs['cyc'], + 'url_float': argoplot.dashboard(wmo=this_obs['wmo'], url_only=True), + 'url_profile': argoplot.dashboard(wmo=this_obs['wmo'], cyc=this_obs['cyc'], url_only=True), + }) + Plist.append(p) + return Plist diff --git a/vfrecovery/json/VFRschema_simulation.py b/vfrecovery/json/VFRschema_simulation.py index b459628..c85a823 100644 --- a/vfrecovery/json/VFRschema_simulation.py +++ b/vfrecovery/json/VFRschema_simulation.py @@ -1,7 +1,7 @@ from typing import List, Dict -from VFRschema import VFvalidators -from VFRschema_profile import Profile -from VFRschema_meta import MetaData +from .VFRschema import VFvalidators +from .VFRschema_profile import Profile +from .VFRschema_meta import MetaData class Simulation(VFvalidators): diff --git a/vfrecovery/json/__init__.py b/vfrecovery/json/__init__.py index e2df9c8..bd2e226 100644 --- a/vfrecovery/json/__init__.py +++ b/vfrecovery/json/__init__.py @@ -1,2 +1,3 @@ -from VFRschema_simulation import Simulation -from VFRschema_profile import Profile +from .VFRschema_profile import Profile +from .VFRschema_simulation import Simulation +from .VFRschema_meta import MetaData \ No newline at end of file diff --git a/vfrecovery/logging_conf.json b/vfrecovery/logging_conf.json new file mode 100644 index 0000000..b5ae834 --- /dev/null +++ b/vfrecovery/logging_conf.json @@ -0,0 +1,60 @@ +{ + "disable_existing_loggers": false, + "formatters": { + "blank": { + "format": "%(message)s" + }, + "simple": { + "datefmt": "%Y-%m-%dT%H:%M:%SZ", + "format": "%(asctime)s [%(levelname)s]: %(message)s" + }, + "detailed": { + "datefmt": "%Y/%m/%d %I:%M:%S", + "format": "%(asctime)s | %(levelname)s | %(name)s:%(filename)s:%(lineno)d | %(message)s" + } + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "simple", + "stream": "ext://sys.stdout" + }, + "console_blank": { + "class": "logging.StreamHandler", + "formatter": "blank", + "stream": "ext://sys.stdout" + }, + "file": { + "class": "logging.handlers.RotatingFileHandler", + "formatter": "detailed", + "filename": "/tmp/junk.log", + "mode": "a", + "maxBytes": 10485760, + "backupCount": 5 + } + }, + "loggers": { + "vfrecovery_blank_logger": { + "handlers": [ + "console_blank" + ], + "level": "INFO", + "propagate": false + }, + "vfrecovery_root_logger": { + "handlers": [ + "console" + ], + "level": "WARNING", + "propagate": false + }, + "vfrecovery_simulation": { + "handlers": [ + "file" + ], + "level": "DEBUG", + "propagate": false + } + }, + "version": 1 +} diff --git a/vfrecovery/python_interface/predict.py b/vfrecovery/python_interface/predict.py index dabde0c..77f0c2b 100644 --- a/vfrecovery/python_interface/predict.py +++ b/vfrecovery/python_interface/predict.py @@ -1,11 +1,20 @@ import json from vfrecovery.core.predict import predict_function +from pathlib import Path +from typing import Union def predict( wmo: int, cyc: int, - n_predictions, + velocity: str = 'GLORYS', + output_path: Union[str, Path] = None, + n_predictions: int = 0, + cfg_parking_depth: float = None, + cfg_cycle_duration: float = None, + cfg_profile_depth: float = None, + cfg_free_surface_drift: int = 9999, + log_level: str = 'INFO', ): """ Execute VirtualFleet-Recovery predictor and return results as a JSON string @@ -14,7 +23,14 @@ def predict( ---------- wmo cyc + velocity + output_path n_predictions + cfg_parking_depth + cfg_cycle_duration + cfg_profile_depth + cfg_free_surface_drift + log_level Returns ------- @@ -23,7 +39,14 @@ def predict( """ # noqa results_json = predict_function( wmo, cyc, + velocity=velocity, + output_path=output_path, n_predictions=n_predictions, + cfg_parking_depth=cfg_parking_depth, + cfg_cycle_duration=cfg_cycle_duration, + cfg_profile_depth=cfg_profile_depth, + cfg_free_surface_drift=cfg_free_surface_drift, + log_level=log_level, ) results = json.loads(results_json) return results diff --git a/vfrecovery/utils/formatters.py b/vfrecovery/utils/formatters.py index 6e11468..98f90fb 100644 --- a/vfrecovery/utils/formatters.py +++ b/vfrecovery/utils/formatters.py @@ -16,3 +16,13 @@ def strfdelta(tdelta, fmt): d["minutes"], d["seconds"] = divmod(rem, 60) return fmt.format(**d) + +class COLORS: + black = "30m" + red = "31m" + green = "32m" + yellow = "33m" + blue = "34m" + magenta = "35m" + cyan = "36m" + white = "37m" From bf2946fd2a7dc6e3b22150229f2f3a7f8c7d570d Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Tue, 19 Mar 2024 15:07:20 +0100 Subject: [PATCH 07/38] Move up system meta-data in json schema, renamed nfloats to n_floats --- schemas/VFrecovery-schema-computation.json | 6 +----- schemas/VFrecovery-schema-metadata.json | 11 +++++++---- vfrecovery/json/VFRschema_meta.py | 13 ++++++------- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/schemas/VFrecovery-schema-computation.json b/schemas/VFrecovery-schema-computation.json index 5405e00..d787152 100644 --- a/schemas/VFrecovery-schema-computation.json +++ b/schemas/VFrecovery-schema-computation.json @@ -13,11 +13,7 @@ "description": "UTC starting datetime of the computation", "type": ["string", "null"], "format": "date-time" - }, - "system": { - "description": "System the computation was executed on", - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-system.json" - }, + } "cpu_time": { "description": "CPU time used by the computation", "type": ["string", "null"], diff --git a/schemas/VFrecovery-schema-metadata.json b/schemas/VFrecovery-schema-metadata.json index f7cd5f0..d40cc3d 100644 --- a/schemas/VFrecovery-schema-metadata.json +++ b/schemas/VFrecovery-schema-metadata.json @@ -6,10 +6,10 @@ "format_version": { "const": "0.1" }, - "required": ["nfloats", "velocity_field", "vfconfig"], + "required": ["n_floats", "velocity_field", "vfconfig"], "type": "object", "properties": { - "nfloats": { + "n_floats": { "description": "Number of virtual floats simulated", "type": "integer" }, @@ -19,12 +19,15 @@ "enum": ["ARMOR3D", "GLORYS"] }, "vfconfig": { - "description": "Configuration of the virtual floats", + "description": "Configuration of virtual floats", "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet/json-schemas-FloatConfiguration/schemas/VF-ArgoFloat-Configuration.json" }, "computation": { "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-computation.json" + }, + "system": { + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-system.json" } }, - "maxProperties": 4 + "maxProperties": 5 } diff --git a/vfrecovery/json/VFRschema_meta.py b/vfrecovery/json/VFRschema_meta.py index 5989ad4..46f9f88 100644 --- a/vfrecovery/json/VFRschema_meta.py +++ b/vfrecovery/json/VFRschema_meta.py @@ -59,7 +59,6 @@ def auto_load() -> 'MetaDataSystem': class MetaDataComputation(VFvalidators): - system: MetaDataSystem = None date: pd.Timestamp = None cpu_time: pd.Timedelta = None wall_time: pd.Timedelta = None @@ -68,7 +67,6 @@ class MetaDataComputation(VFvalidators): description: str = "A set of meta-data to describe one computation run" required: List = [] properties: List = ["description", - "system", "cpu_time", "wall_time", "date"] def __init__(self, **kwargs): @@ -88,21 +86,22 @@ def from_dict(obj: Dict) -> 'MetaDataComputation': class MetaData(VFvalidators): - nfloats: int = None + n_floats: int = None velocity_field: str = None vfconfig: VFschema_configuration = None computation: MetaDataComputation = None + system: MetaDataSystem = None schema: str = "VFrecovery-schema-metadata" description: str = "A set of meta-data to describe one simulation" - required: List = ["nfloats", "velocity_field", "vfconfig"] + required: List = ["n_floats", "velocity_field", "vfconfig"] properties: List = ["description", - "nfloats", "velocity_field", - "vfconfig", "computation"] + "n_floats", "velocity_field", + "vfconfig", "computation", "system"] def __init__(self, **kwargs): super().__init__(**kwargs) - self._is_integer(self.nfloats) + self._is_integer(self.n_floats) if 'vfconfig' not in kwargs: self.vfconfig = None From 9ac819ec7a365f30e460a66370b9249d71c265bd Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Tue, 19 Mar 2024 15:07:53 +0100 Subject: [PATCH 08/38] Implement more CLI options and workflow --- .../command_line_interface/group_predict.py | 14 +++++- vfrecovery/core/predict.py | 44 ++++++++++++------- vfrecovery/core/utils.py | 15 ++++++- vfrecovery/json/__init__.py | 2 +- vfrecovery/python_interface/predict.py | 3 ++ vfrecovery/utils/misc.py | 10 ----- 6 files changed, 58 insertions(+), 30 deletions(-) diff --git a/vfrecovery/command_line_interface/group_predict.py b/vfrecovery/command_line_interface/group_predict.py index 50fdc6a..c0de33f 100644 --- a/vfrecovery/command_line_interface/group_predict.py +++ b/vfrecovery/command_line_interface/group_predict.py @@ -38,7 +38,7 @@ def cli_group_predict() -> None: type=str, required=False, default=None, - help="Simulation output folder [default: './vfrecovery_data//']", + help="Simulation data output folder [default: './vfrecovery_simulations_data//']", ) # @click.option( # "-v", "--verbose", @@ -82,13 +82,21 @@ def cli_group_predict() -> None: help="Virtual cycle number to start free surface drift, inclusive", ) @click.option( - "-n", "--n_predictions", + "-np", "--n_predictions", type=int, required=False, default=0, show_default=True, help="Number of profiles to simulate after cycle specified with argument 'CYC'", ) +@click.option( + "-nf", "--n_floats", + type=int, + required=False, + default=100, + show_default=True, + help="Number of virtual floats simulated to make predictions", +) @click.option( "--log-level", type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), @@ -111,6 +119,7 @@ def predict( cfg_cycle_duration, cfg_profile_depth, cfg_free_surface_drift, + n_floats, log_level, ) -> None: if log_level == "QUIET": @@ -130,5 +139,6 @@ def predict( cfg_cycle_duration=cfg_cycle_duration, cfg_profile_depth=cfg_profile_depth, cfg_free_surface_drift=cfg_free_surface_drift, + n_floats=n_floats, log_level=log_level) blank_logger.info(json_dump) diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index 06f846b..873959e 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -9,9 +9,9 @@ import logging import json -from vfrecovery.json import Profile, MetaData +from vfrecovery.json import Profile, MetaData, MetaDataSystem from vfrecovery.utils.formatters import COLORS -from .utils import df_obs2jsProfile, ArgoIndex2df, ArgoIndex2JsProfile +from .utils import df_obs2jsProfile, ArgoIndex2df, ArgoIndex2JsProfile, get_simulation_suffix root_logger = logging.getLogger("vfrecovery_root_logger") sim_logger = logging.getLogger("vfrecovery_simulation") @@ -50,6 +50,7 @@ def predict_function( cfg_cycle_duration: float, cfg_profile_depth: float, cfg_free_surface_drift: int, + n_floats: int, log_level: str, ) -> str: """ @@ -62,6 +63,11 @@ def predict_function( velocity n_predictions output_path + cfg_parking_depth + cfg_cycle_duration + cfg_profile_depth + cfg_free_surface_drift + n_floats log_level Returns @@ -82,6 +88,7 @@ def predict_function( execution_start = time.time() process_start = time.process_time() + # run_id = pd.to_datetime('now', utc=True).strftime('%Y%m%d%H%M%S') # Validate arguments: assert is_wmo(wmo) @@ -99,12 +106,12 @@ def predict_function( if output_path is None: # output_path = "vfrecovery_sims" % pd.to_datetime('now', utc=True).strftime("%Y%m%d%H%M%S") - output_path = os.path.sep.join(["vfrecovery_data", str(wmo), str(cyc[1])]) + output_path = os.path.sep.join(["vfrecovery_simulations_data", str(wmo), str(cyc[1])]) output_path = Path(output_path) output_path.mkdir(parents=True, exist_ok=True) # Set-up simulation logger - simlogfile = logging.FileHandler(os.path.join(output_path, "vfpred.log"), mode='a') + simlogfile = logging.FileHandler(os.path.join(output_path, "vfrecovery_simulations.log"), mode='a') simlogfile.setFormatter(logging.Formatter("%(asctime)s | %(levelname)s | %(name)s:%(filename)s:%(lineno)d | %(message)s", datefmt='%Y/%m/%d %I:%M:%S')) sim_logger.handlers = [] @@ -115,6 +122,15 @@ def predict_function( # log_this.error("This is ERROR") log_this.info("\n\nSTARTING NEW SIMULATION: WMO=%i / CYCLE_NUMBER=%i\n" % (wmo, cyc[1])) + # Create Simulation Meta-data class holder + MD = MetaData.from_dict({ + 'n_floats': n_floats, + 'velocity_field': velocity, + 'system': MetaDataSystem.auto_load(), + 'vfconfig': None, # will be filled later + 'computation': None, # will be filled later + }) + # url = argoplot.dashboard(wmo, url_only=True) txt = "You can check this float dashboard while we prepare the prediction: %s" % url @@ -162,9 +178,11 @@ def predict_function( unit='cycle', description='First cycle with free surface drift', dtype=int) + MD.vfconfig = CFG # Register floats configuration to simulation meta-data class # Save virtual float configuration on file: - # CFG.to_json(os.path.join(output_path, "floats_configuration_%s.json" % get_sim_suffix(args, CFG))) + log_this.debug("Sim suffix: %s" % get_simulation_suffix(MD)) + # CFG.to_json(os.path.join(output_path, "floats_configuration_%s.json" % get_simulation_suffix(MD))) # if not args.json: # puts("\n".join(["\t%s" % line for line in CFG.__repr__().split("\n")]), color=COLORS.green) @@ -172,19 +190,13 @@ def predict_function( # - MD = MetaData.from_dict({ - 'nfloats': 0, - 'velocity_field': velocity, - 'vfconfig': CFG, - 'computation': None - }) return MD.to_json() - output = {'wmo': wmo, 'cyc': cyc, 'velocity': velocity, 'n_predictions': n_predictions, 'cfg': CFG.to_json(indent=0)} - json_dump = json.dumps( - output, sort_keys=False, indent=2 - ) - return json_dump + # output = {'wmo': wmo, 'cyc': cyc, 'velocity': velocity, 'n_predictions': n_predictions, 'cfg': CFG.to_json(indent=0)} + # json_dump = json.dumps( + # output, sort_keys=False, indent=2 + # ) + # return json_dump diff --git a/vfrecovery/core/utils.py b/vfrecovery/core/utils.py index c8b135c..2aa7c51 100644 --- a/vfrecovery/core/utils.py +++ b/vfrecovery/core/utils.py @@ -2,7 +2,8 @@ from typing import List from argopy import ArgoIndex import argopy.plot as argoplot -from vfrecovery.json import Profile + +from vfrecovery.json import Profile, MetaData def ArgoIndex2df(a_wmo, a_cyc) -> pd.DataFrame: @@ -45,3 +46,15 @@ def ArgoIndex2JsProfile(a_wmo, a_cyc) -> List[Profile]: """ df_obs = ArgoIndex2df(a_wmo, a_cyc) return df_obs2jsProfile(df_obs) + + +def get_simulation_suffix(md: MetaData) -> str: + """Compose a simulation unique ID for output files""" + # suf = '%s_%i' % (this_args.velocity, this_args.nfloats) + suf = 'VEL%s_NFL%i_CYT%i_PKD%i_PFD%i_FSD%i' % (md.velocity_field, + md.n_floats, + int(md.vfconfig.mission['cycle_duration']), + int(md.vfconfig.mission['parking_depth']), + int(md.vfconfig.mission['profile_depth']), + int(md.vfconfig.mission['reco_free_surface_drift'])) + return suf diff --git a/vfrecovery/json/__init__.py b/vfrecovery/json/__init__.py index bd2e226..5c5c2d1 100644 --- a/vfrecovery/json/__init__.py +++ b/vfrecovery/json/__init__.py @@ -1,3 +1,3 @@ from .VFRschema_profile import Profile from .VFRschema_simulation import Simulation -from .VFRschema_meta import MetaData \ No newline at end of file +from .VFRschema_meta import MetaData, MetaDataSystem \ No newline at end of file diff --git a/vfrecovery/python_interface/predict.py b/vfrecovery/python_interface/predict.py index 77f0c2b..be1c01b 100644 --- a/vfrecovery/python_interface/predict.py +++ b/vfrecovery/python_interface/predict.py @@ -14,6 +14,7 @@ def predict( cfg_cycle_duration: float = None, cfg_profile_depth: float = None, cfg_free_surface_drift: int = 9999, + n_floats: int = 100, log_level: str = 'INFO', ): """ @@ -30,6 +31,7 @@ def predict( cfg_cycle_duration cfg_profile_depth cfg_free_surface_drift + n_floats log_level Returns @@ -46,6 +48,7 @@ def predict( cfg_cycle_duration=cfg_cycle_duration, cfg_profile_depth=cfg_profile_depth, cfg_free_surface_drift=cfg_free_surface_drift, + n_floats=n_floats, log_level=log_level, ) results = json.loads(results_json) diff --git a/vfrecovery/utils/misc.py b/vfrecovery/utils/misc.py index fe0b26d..4f23ebe 100644 --- a/vfrecovery/utils/misc.py +++ b/vfrecovery/utils/misc.py @@ -16,16 +16,6 @@ def get_cfg_str(a_cfg): return txt -def get_sim_suffix(this_args, this_cfg): - """Compose a string suffix for output files""" - # suf = '%s_%i' % (this_args.velocity, this_args.nfloats) - suf = 'VEL%s_NF%i_CYCDUR%i_PARKD%i_PROFD%i_SFD%i' % (this_args.velocity, - this_args.nfloats, - int(this_cfg.mission['cycle_duration']), - int(this_cfg.mission['parking_depth']), - int(this_cfg.mission['profile_depth']), - int(this_cfg.mission['reco_free_surface_drift'])) - return suf def get_ea_profile_page_url(wmo, cyc): From ba528203ceb4464bd47cf9a51094ed3da9ac6833 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 22 Mar 2024 12:18:13 +0100 Subject: [PATCH 09/38] More CLI work --- .gitignore | 3 +- .../command_line_interface/group_describe.py | 85 +++++++- .../command_line_interface/group_predict.py | 20 +- vfrecovery/core/describe.py | 55 +++++ vfrecovery/core/predict.py | 191 ++++++++++-------- vfrecovery/core/utils.py | 26 ++- vfrecovery/downloaders/__init__.py | 6 +- vfrecovery/downloaders/armor3d.py | 2 +- vfrecovery/downloaders/core.py | 5 +- vfrecovery/downloaders/glorys.py | 7 +- vfrecovery/json/VFRschema.py | 29 ++- vfrecovery/json/VFRschema_profile.py | 10 +- vfrecovery/python_interface/predict.py | 3 + 13 files changed, 319 insertions(+), 123 deletions(-) create mode 100644 vfrecovery/core/describe.py diff --git a/.gitignore b/.gitignore index dbce53c..c61f7bc 100644 --- a/.gitignore +++ b/.gitignore @@ -142,4 +142,5 @@ cli/build-pypi *.nc *.npi* cli/vfrecov/ -webapi/myapp/static/data \ No newline at end of file +webapi/myapp/static/data +vfrecovery_simulations_data/ \ No newline at end of file diff --git a/vfrecovery/command_line_interface/group_describe.py b/vfrecovery/command_line_interface/group_describe.py index 2b1069f..cca0ca4 100644 --- a/vfrecovery/command_line_interface/group_describe.py +++ b/vfrecovery/command_line_interface/group_describe.py @@ -1,4 +1,16 @@ import click +import logging +from argopy.utils import is_wmo, is_cyc, check_cyc, check_wmo +import argopy.plot as argoplot +from argopy.errors import DataNotFound +from argopy import ArgoIndex + + +from vfrecovery.core.describe import describe_function + +root_logger = logging.getLogger("vfrecovery_root_logger") +blank_logger = logging.getLogger("vfrecovery_blank_logger") + @click.group() @@ -16,13 +28,76 @@ def cli_group_describe() -> None: epilog=""" Examples: + \b + vfrecovery describe 6903091 + \b vfrecovery describe 6903091 112 """, # noqa ) -@click.argument('WMO') -@click.argument('CYC') +@click.option( + "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), + default="INFO", + show_default=True, + help=( + "Set the details printed to console by the command " + "(based on standard logging library)." + ), +) +@click.argument('WMO', nargs=1, type=int) +@click.argument("CYC", nargs=-1, type=int) def describe( - wmo: int, - cyc: int): - click.echo(f"Return description for {wmo} {cyc}") \ No newline at end of file + wmo, + cyc, + log_level, +) -> None: + if log_level == "QUIET": + root_logger.disabled = True + log_level = "CRITICAL" + root_logger.setLevel(level=getattr(logging, log_level.upper())) + + if root_logger.isEnabledFor(logging.DEBUG): + root_logger.debug("DEBUG mode activated") + + # Validate arguments: + assert is_wmo(wmo) + wmo = check_wmo(wmo)[0] + cyc = list(cyc) + if len(cyc) > 0: + assert is_cyc(cyc) + cyc = check_cyc(cyc) + + # + # json_dump = describe_function(wmo, + # cyc=cyc, + # log_level=log_level) + # blank_logger.info(json_dump) + + url = argoplot.dashboard(wmo, url_only=True) + # txt = "You can check this float dashboard while we search for float profiles in the index: %s" % url + click.secho("\nYou can check this float dashboard while we search for float profile(s) in the index:") + click.secho("\t%s" % url, fg='green') + + # Load observed float profiles index: + host = "https://data-argo.ifremer.fr" + # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" + # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" + idx = ArgoIndex(host=host) + if len(cyc) > 0: + idx.search_wmo_cyc(wmo, cyc) + else: + idx.search_wmo(wmo) + + df = idx.to_dataframe() + df = df.sort_values(by='date').reset_index(drop=True) + if df.shape[0] == 1: + click.secho("\nFloat profile data from the index:") + # df_str = "\t%s" % (df.T).to_string() + df_str = "\n".join(["\t%s" % l for l in (df.T).to_string().split("\n")[1:]]) + click.secho(df_str, fg="green") + else: + click.secho("\nFloat profile(s):") + # click.secho(df.to_string(max_colwidth=15), fg="green") + click.secho(df.to_string(), fg="green") + # click.echo_via_pager("\n%s" % df.to_string(max_colwidth=15)) diff --git a/vfrecovery/command_line_interface/group_predict.py b/vfrecovery/command_line_interface/group_predict.py index c0de33f..7cf6d93 100644 --- a/vfrecovery/command_line_interface/group_predict.py +++ b/vfrecovery/command_line_interface/group_predict.py @@ -26,7 +26,7 @@ def cli_group_predict() -> None: """, # noqa ) @click.option( - "--velocity", + "-v", "--velocity", type=str, required=False, default='GLORYS', @@ -50,7 +50,7 @@ def cli_group_predict() -> None: # help="Display verbose information along the execution", # ) @click.option( - "--cfg-parking-depth", + "--cfg_parking_depth", type=float, required=False, default=None, @@ -58,7 +58,7 @@ def cli_group_predict() -> None: help="Virtual floats parking depth in db [default: previous cycle value]", ) @click.option( - "--cfg-cycle-duration", + "--cfg_cycle_duration", type=float, required=False, default=None, @@ -66,7 +66,7 @@ def cli_group_predict() -> None: help="Virtual floats cycle duration in hours [default: previous cycle value]", ) @click.option( - "--cfg-profile-depth", + "--cfg_profile_depth", type=float, required=False, default=None, @@ -74,7 +74,7 @@ def cli_group_predict() -> None: help="Virtual floats profile depth in db [default: previous cycle value]", ) @click.option( - "--cfg-free-surface-drift", + "--cfg_free_surface_drift", type=int, required=False, default=9999, @@ -97,6 +97,14 @@ def cli_group_predict() -> None: show_default=True, help="Number of virtual floats simulated to make predictions", ) +@click.option( + "-s", "--domain_min_size", + type=float, + required=False, + default=12, + show_default=True, + help="Minimal size (deg) of the simulation domain around the initial float position", +) @click.option( "--log-level", type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), @@ -120,6 +128,7 @@ def predict( cfg_profile_depth, cfg_free_surface_drift, n_floats, + domain_min_size, log_level, ) -> None: if log_level == "QUIET": @@ -140,5 +149,6 @@ def predict( cfg_profile_depth=cfg_profile_depth, cfg_free_surface_drift=cfg_free_surface_drift, n_floats=n_floats, + domain_min_size=domain_min_size, log_level=log_level) blank_logger.info(json_dump) diff --git a/vfrecovery/core/describe.py b/vfrecovery/core/describe.py new file mode 100644 index 0000000..57f9798 --- /dev/null +++ b/vfrecovery/core/describe.py @@ -0,0 +1,55 @@ +import logging +import json +from typing import Union +from argopy.utils import is_wmo, is_cyc, check_cyc, check_wmo +import argopy.plot as argoplot +from argopy.errors import DataNotFound +from argopy import ArgoIndex + +from .utils import ArgoIndex2df_obs + +root_logger = logging.getLogger("vfrecovery_root_logger") + + +def describe_function( + wmo: int, + cyc: Union[int, None], + log_level: str, +) -> str: + if log_level == "QUIET": + root_logger.disabled = True + log_level = "CRITICAL" + root_logger.setLevel(level=getattr(logging, log_level.upper())) + + # Validate arguments: + assert is_wmo(wmo) + wmo = check_wmo(wmo)[0] + if cyc is not None: + assert is_cyc(cyc) + cyc = check_cyc(cyc)[0] + + + # + url = argoplot.dashboard(wmo, url_only=True) + txt = "You can check this float dashboard while we search for float profiles in the index: %s" % url + root_logger.info(txt) + + # Load observed float profiles index: + host = "https://data-argo.ifremer.fr" + # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" + # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" + idx = ArgoIndex(host=host) + if cyc is not None: + idx.search_wmo_cyc(wmo, cyc) + else: + idx.search_wmo(wmo) + + df = idx.to_dataframe() + df = df.sort_values(by='date') + root_logger.info("\n%s" % df.to_string(max_colwidth=15)) + + output = {'wmo': wmo, 'cyc': cyc} + json_dump = json.dumps( + output, sort_keys=False, indent=2 + ) + return json_dump diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index 873959e..da4ad40 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -5,17 +5,21 @@ from pathlib import Path from typing import Union import pandas as pd +import numpy as np import os import logging import json +import pprint from vfrecovery.json import Profile, MetaData, MetaDataSystem from vfrecovery.utils.formatters import COLORS -from .utils import df_obs2jsProfile, ArgoIndex2df, ArgoIndex2JsProfile, get_simulation_suffix +from vfrecovery.downloaders import get_velocity_field +from .utils import df_obs2jsProfile, ArgoIndex2df_obs, ArgoIndex2jsProfile, get_simulation_suffix, get_domain root_logger = logging.getLogger("vfrecovery_root_logger") sim_logger = logging.getLogger("vfrecovery_simulation") + class log_this: def __init__(self, txt, log_level): @@ -40,6 +44,46 @@ def error(txt) -> 'log_this': return log_this(txt, 'ERROR') +def setup_floats_config( + wmo: int, + cyc: int, + cfg_parking_depth: float, + cfg_cycle_duration: float, + cfg_profile_depth: float, + cfg_free_surface_drift: int, +) -> FloatConfiguration: + """Load float configuration at a given cycle number and possibly overwrite data with user parameters""" + log_this.debug("Loading float configuration...") + try: + CFG = FloatConfiguration([wmo, cyc]) + except: + log_this.debug("Can't load this profile configuration, fall back on default values") + CFG = FloatConfiguration('default') + + if cfg_parking_depth is not None: + log_this.info("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], + float(cfg_parking_depth))) + CFG.update('parking_depth', float(cfg_parking_depth)) + + if cfg_cycle_duration is not None: + log_this.info("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], + float(cfg_cycle_duration))) + CFG.update('cycle_duration', float(cfg_cycle_duration)) + + if cfg_profile_depth is not None: + log_this.info("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], + float(cfg_profile_depth))) + CFG.update('profile_depth', float(cfg_profile_depth)) + + CFG.params = ConfigParam(key='reco_free_surface_drift', + value=int(cfg_free_surface_drift), + unit='cycle', + description='First cycle with free surface drift', + dtype=int) + + return CFG + + def predict_function( wmo: int, cyc: int, @@ -51,6 +95,7 @@ def predict_function( cfg_profile_depth: float, cfg_free_surface_drift: int, n_floats: int, + domain_min_size: float, log_level: str, ) -> str: """ @@ -68,6 +113,7 @@ def predict_function( cfg_profile_depth cfg_free_surface_drift n_floats + domain_min_size log_level Returns @@ -101,8 +147,13 @@ def predict_function( else: velocity = velocity.upper() + # Build the list of cycle numbers to work with `cyc`: + # The `cyc` list follows this structure: + # [PREVIOUS_CYCLE_USED_AS_INITIAL_CONDITIONS, CYCLE_NUMBER_REQUESTED_BY_USER, ADDITIONAL_CYCLE_i, ADDITIONAL_CYCLE_i+1, ...] # Prepend previous cycle number that will be used as initial conditions for the prediction of `cyc`: cyc = [cyc - 1, cyc] + # Append additional `n_predictions` cycle numbers: + [cyc.append(cyc[1] + n + 1) for n in range(n_predictions)] if output_path is None: # output_path = "vfrecovery_sims" % pd.to_datetime('now', utc=True).strftime("%Y%m%d%H%M%S") @@ -112,7 +163,7 @@ def predict_function( # Set-up simulation logger simlogfile = logging.FileHandler(os.path.join(output_path, "vfrecovery_simulations.log"), mode='a') - simlogfile.setFormatter(logging.Formatter("%(asctime)s | %(levelname)s | %(name)s:%(filename)s:%(lineno)d | %(message)s", + simlogfile.setFormatter(logging.Formatter("%(asctime)s | %(levelname)s | %(name)s:%(filename)s | %(message)s", datefmt='%Y/%m/%d %I:%M:%S')) sim_logger.handlers = [] sim_logger.addHandler(simlogfile) @@ -122,6 +173,9 @@ def predict_function( # log_this.error("This is ERROR") log_this.info("\n\nSTARTING NEW SIMULATION: WMO=%i / CYCLE_NUMBER=%i\n" % (wmo, cyc[1])) + log_this.info("n_predictions: %i" % n_predictions) + log_this.info("Working with cycle numbers array: %s" % str(cyc)) + # Create Simulation Meta-data class holder MD = MetaData.from_dict({ 'n_floats': n_floats, @@ -136,58 +190,61 @@ def predict_function( txt = "You can check this float dashboard while we prepare the prediction: %s" % url log_this.info(txt) - # Load observed float profiles index + # Load observed float profiles index: log_this.debug("Loading float profiles index ...") - df_obs = ArgoIndex2df(wmo, cyc) - P_obs = df_obs2jsProfile(df_obs) - # P_obs = ArgoIndex2JsProfile(wmo, cyc) + # df_obs = ArgoIndex2df_obs(wmo, cyc) + # P_obs = df_obs2jsProfile(df_obs) + P_obs, df_obs = ArgoIndex2jsProfile(wmo, cyc) # THIS_DATE = P_obs[0].location.time - # CENTER = [P_obs[0].location.longitude, P_obs[0].location.latitude] - log_this.debug("Profiles to work with:\n%s" % df_obs[['date', 'latitude', 'longitude', 'wmo', 'cyc', 'institution']].to_string(max_colwidth=35)) - if df_obs.shape[0] == 1: + # log_this.debug( + # "Profiles to work with:\n%s" % df_obs[['date', 'latitude', 'longitude', 'wmo', 'cyc', 'institution']].to_string( + # max_colwidth=35)) + [log_this.debug("Observed %s" % p) for p in P_obs] + if len(P_obs) == 1: log_this.info('Real-case scenario: True position unknown !') else: - log_this.info('Evaluation scenario: historical position known') + log_this.info('Evaluation scenario: Historical position known') - # Load real float configuration at the previous cycle: - log_this.debug("\nLoading float configuration...") - try: - CFG = FloatConfiguration([wmo, cyc[0]]) - except: - log_this.info("Can't load this profile config, falling back on default values") - CFG = FloatConfiguration('default') - - if cfg_parking_depth is not None: - log_this.debug("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], - float(cfg_parking_depth))) - CFG.update('parking_depth', float(cfg_parking_depth)) - - if cfg_cycle_duration is not None: - log_this.debug("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], - float(cfg_cycle_duration))) - CFG.update('cycle_duration', float(cfg_cycle_duration)) - - if cfg_profile_depth is not None: - log_this.debug("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], - float(cfg_profile_depth))) - CFG.update('profile_depth', float(cfg_profile_depth)) - - CFG.params = ConfigParam(key='reco_free_surface_drift', - value=int(cfg_free_surface_drift), - unit='cycle', - description='First cycle with free surface drift', - dtype=int) - MD.vfconfig = CFG # Register floats configuration to simulation meta-data class + # Load real float configuration at the previous cycle, to be used for the simulation as initial conditions. + # (the loaded config is possibly overwritten with user defined cfg_* parameters) + CFG = setup_floats_config(wmo, cyc[0], + cfg_parking_depth, + cfg_cycle_duration, + cfg_profile_depth, + cfg_free_surface_drift) + MD.vfconfig = CFG # Register floats configuration to the simulation meta-data class - # Save virtual float configuration on file: - log_this.debug("Sim suffix: %s" % get_simulation_suffix(MD)) - # CFG.to_json(os.path.join(output_path, "floats_configuration_%s.json" % get_simulation_suffix(MD))) + # and save the final virtual float configuration on file: + CFG.to_json(Path(os.path.join(output_path, "floats_configuration_%s.json" % get_simulation_suffix(MD)))) + log_this.info("\n".join(["\t%s" % line for line in CFG.__repr__().split("\n")])) - # if not args.json: - # puts("\n".join(["\t%s" % line for line in CFG.__repr__().split("\n")]), color=COLORS.green) # - + log_this.debug("Simulation data will be registered with file suffix: '%s'" % get_simulation_suffix(MD)) + + # Define domain to load velocity for: + # In space: + domain, domain_center = get_domain(P_obs, domain_min_size) + # and time: + CYCLING_PERIOD = int(np.round(CFG.mission['cycle_duration']/24)) # Get the float cycle period (in days) + N_DAYS = (len(cyc)-1)*CYCLING_PERIOD+1 + + # log_this.info((domain_min_size, N_DAYS)) + # log_this.info((domain_center, domain)) + log_this.info("Loading %s velocity field to cover %i days starting on %s ..." % (MD.velocity_field, N_DAYS, P_obs[0].location.time)) + + ds_vel, velocity_file = get_velocity_field(domain, P_obs[0].location.time, + n_days=N_DAYS, + output=output_path, + dataset=MD.velocity_field) + VEL = Velocity(model='GLORYS12V1' if MD.velocity_field == 'GLORYS' else MD.velocity_field, src=ds_vel) + # if not args.json: + # puts("\n\t%s" % str(ds_vel), color=COLORS.green) + # puts("\n\tLoaded velocity field from %s to %s" % + # (pd.to_datetime(ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), + # pd.to_datetime(ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")), color=COLORS.green) + # figure_velocity(VBOX, VEL, VEL_NAME, THIS_PROFILE, WMO, CYC, save_figure=args.save_figure, workdir=WORKDIR) + # # return MD.to_json() @@ -198,9 +255,6 @@ def predict_function( # ) # return json_dump - - - # def predictor(args): # """Prediction manager""" @@ -234,51 +288,14 @@ def predict_function( # else: # puts('\nEvaluation scenario: historical position known', color=COLORS.yellow) # -# # Load real float configuration at the previous cycle: -# if not args.json: -# puts("\nLoading float configuration...") -# try: -# CFG = FloatConfiguration([WMO, CYC[0]]) -# except: -# if not args.json: -# puts("Can't load this profile config, falling back on default values", color=COLORS.red) -# CFG = FloatConfiguration('default') -# -# if args.cfg_parking_depth is not None: -# puts("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], -# float(args.cfg_parking_depth))) -# CFG.update('parking_depth', float(args.cfg_parking_depth)) -# -# if args.cfg_cycle_duration is not None: -# puts("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], -# float(args.cfg_cycle_duration))) -# CFG.update('cycle_duration', float(args.cfg_cycle_duration)) -# -# if args.cfg_profile_depth is not None: -# puts("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], -# float(args.cfg_profile_depth))) -# CFG.update('profile_depth', float(args.cfg_profile_depth)) -# -# CFG.params = ConfigParam(key='reco_free_surface_drift', -# value=int(args.cfg_free_surface_drift), -# unit='cycle', -# description='First cycle with free surface drift', -# dtype=int) -# -# # Save virtual float configuration on file: -# CFG.to_json(os.path.join(WORKDIR, "floats_configuration_%s.json" % get_sim_suffix(args, CFG))) -# -# if not args.json: -# puts("\n".join(["\t%s" % line for line in CFG.__repr__().split("\n")]), color=COLORS.green) -# # # Get the cycling frequency (in days, this is more a period then...): -# CYCLING_FREQUENCY = int(np.round(CFG.mission['cycle_duration']/24)) +# CYCLING_PERIOD = int(np.round(CFG.mission['cycle_duration']/24)) # # # Define domain to load velocity for, and get it: # width = args.domain_size + np.abs(np.ceil(THIS_PROFILE['longitude'].values[-1] - CENTER[0])) # height = args.domain_size + np.abs(np.ceil(THIS_PROFILE['latitude'].values[-1] - CENTER[1])) # VBOX = [CENTER[0] - width / 2, CENTER[0] + width / 2, CENTER[1] - height / 2, CENTER[1] + height / 2] -# N_DAYS = (len(CYC)-1)*CYCLING_FREQUENCY+1 +# N_DAYS = (len(CYC)-1)*CYCLING_PERIOD+1 # if not args.json: # puts("\nLoading %s velocity field to cover %i days..." % (VEL_NAME, N_DAYS)) # ds_vel, velocity_file = get_velocity_field(VBOX, THIS_DATE, diff --git a/vfrecovery/core/utils.py b/vfrecovery/core/utils.py index 2aa7c51..c928297 100644 --- a/vfrecovery/core/utils.py +++ b/vfrecovery/core/utils.py @@ -1,12 +1,14 @@ import pandas as pd +import numpy as np from typing import List from argopy import ArgoIndex import argopy.plot as argoplot +from argopy.errors import DataNotFound from vfrecovery.json import Profile, MetaData -def ArgoIndex2df(a_wmo, a_cyc) -> pd.DataFrame: +def ArgoIndex2df_obs(a_wmo, a_cyc) -> pd.DataFrame: """Retrieve WMO/CYC Argo index entries as :class:`pd.DataFrame` Parameters @@ -21,7 +23,12 @@ def ArgoIndex2df(a_wmo, a_cyc) -> pd.DataFrame: host = "https://data-argo.ifremer.fr" # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" - df = ArgoIndex(host=host).search_wmo_cyc(a_wmo, a_cyc).to_dataframe() + idx = ArgoIndex(host=host).search_wmo_cyc(a_wmo, a_cyc) + if idx.N_MATCH == 0: + raise DataNotFound("This float has no cycle %i usable as initial conditions for a simulation of %i" % (a_cyc[0], a_cyc[1])) + else: + df = idx.to_dataframe() + df = df.sort_values(by='date') return df @@ -29,10 +36,11 @@ def df_obs2jsProfile(df_obs) -> List[Profile]: Plist = Profile.from_ArgoIndex(df_obs) for P in Plist: P.description = "Observed Argo profile" + P.location.description = None return Plist -def ArgoIndex2JsProfile(a_wmo, a_cyc) -> List[Profile]: +def ArgoIndex2jsProfile(a_wmo, a_cyc) -> List[Profile]: """Retrieve WMO/CYC Argo index entries as a list of :class:`vfrecovery.json.Profile` Parameters @@ -44,8 +52,8 @@ def ArgoIndex2JsProfile(a_wmo, a_cyc) -> List[Profile]: ------- :class:`vfrecovery.json.Profile` """ - df_obs = ArgoIndex2df(a_wmo, a_cyc) - return df_obs2jsProfile(df_obs) + df_obs = ArgoIndex2df_obs(a_wmo, a_cyc) + return df_obs2jsProfile(df_obs), df_obs def get_simulation_suffix(md: MetaData) -> str: @@ -58,3 +66,11 @@ def get_simulation_suffix(md: MetaData) -> str: int(md.vfconfig.mission['profile_depth']), int(md.vfconfig.mission['reco_free_surface_drift'])) return suf + + +def get_domain(Plist, size): + c = [np.mean([p.location.longitude for p in Plist]), np.mean([p.location.latitude for p in Plist])] + domain = [c[0] - size / 2, c[0] + size / 2, + c[1] - size / 2, c[1] + size / 2] + domain = [np.round(d, 3) for d in domain] + return domain, c \ No newline at end of file diff --git a/vfrecovery/downloaders/__init__.py b/vfrecovery/downloaders/__init__.py index 22b5eb5..1afd68c 100644 --- a/vfrecovery/downloaders/__init__.py +++ b/vfrecovery/downloaders/__init__.py @@ -1,3 +1,3 @@ -from armor3d import Armor3d -from glorys import Glorys -from core import get_velocity_field +from .armor3d import Armor3d +from .glorys import Glorys +from .core import get_velocity_field diff --git a/vfrecovery/downloaders/armor3d.py b/vfrecovery/downloaders/armor3d.py index 9d4d020..a238d2e 100644 --- a/vfrecovery/downloaders/armor3d.py +++ b/vfrecovery/downloaders/armor3d.py @@ -64,7 +64,7 @@ def _get_this(self, dataset_id): end_date = start_date else: end_date = \ - self.time_axis[self.time_axis <= self.start_date + (self.n_days + 1) * pd.Timedelta(1, 'D')].iloc[-1] + self.time_axis[self.time_axis <= self.start_date + (self.n_days + 7) * pd.Timedelta(1, 'D')].iloc[-1] ds = copernicusmarine.open_dataset( dataset_id=dataset_id, diff --git a/vfrecovery/downloaders/core.py b/vfrecovery/downloaders/core.py index 3241c86..0e86d9f 100644 --- a/vfrecovery/downloaders/core.py +++ b/vfrecovery/downloaders/core.py @@ -24,13 +24,12 @@ def get_velocity_filename(dataset, n_days): download_date = pd.to_datetime('now', utc='now').strftime("%Y%m%d") fname = os.path.join(output, 'velocity_%s_%idays_%s.nc' % (dataset, n_days, download_date)) return fname - velocity_file = get_velocity_filename(dataset, n_days) + if not os.path.exists(velocity_file): # Define Data loader: loader = Armor3d if dataset == 'ARMOR3D' else Glorys - loader = loader(a_box, a_date, n_days=n_days) - # puts(str(loader), color=COLORS.magenta) + loader = loader(a_box, a_date - pd.Timedelta(1, 'D'), n_days=n_days) # Load data from Copernicus Marine Data store: ds = loader.to_xarray() diff --git a/vfrecovery/downloaders/glorys.py b/vfrecovery/downloaders/glorys.py index 5daec78..ff404c4 100644 --- a/vfrecovery/downloaders/glorys.py +++ b/vfrecovery/downloaders/glorys.py @@ -105,7 +105,8 @@ def _get_this(self, dataset_id, dates): maximum_depth=self.max_depth, start_datetime=dates[0].strftime("%Y-%m-%dT%H:%M:%S"), end_datetime=dates[1].strftime("%Y-%m-%dT%H:%M:%S"), - variables=['uo', 'vo'] + variables=['uo', 'vo'], + disable_progress_bar=True, ) return ds @@ -119,7 +120,7 @@ def _get_forecast(self): if self.n_days == 1: end_date = start_date else: - end_date = start_date + pd.Timedelta(self.n_days - 1, 'D') + end_date = start_date + pd.Timedelta(self.n_days + 1, 'D') return self._get_this(self.dataset_id, [start_date, end_date]) def _get_reanalysis(self): @@ -132,7 +133,7 @@ def _get_reanalysis(self): if self.n_days == 1: end_date = start_date else: - end_date = self.start_date + pd.Timedelta(self.n_days - 1, 'D') + end_date = self.start_date + pd.Timedelta(self.n_days + 1, 'D') return self._get_this(self.dataset_id, [start_date, end_date]) def to_xarray(self): diff --git a/vfrecovery/json/VFRschema.py b/vfrecovery/json/VFRschema.py index 09e225b..eeb389b 100644 --- a/vfrecovery/json/VFRschema.py +++ b/vfrecovery/json/VFRschema.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd import ipaddress -from typing import List, Dict, Union +from typing import List, Dict, Union, TextIO import jsonschema from jsonschema import Draft202012Validator from referencing import Registry, Resource @@ -35,10 +35,13 @@ def __repr__(self): summary = [] for p in self.properties: if p != 'description': - summary.append("%s=%s" % (p, getattr(self, p))) - if hasattr(self, 'description'): + v = getattr(self, p) + if isinstance(v, (int, float)): + summary.append("%s=%s" % (p, v)) + else: + summary.append("%s='%s'" % (p, v)) + if hasattr(self, 'description') and getattr(self, 'description') is not None: summary.append("%s='%s'" % ('description', getattr(self, 'description'))) - return "%s(%s)" % (name, ", ".join(summary)) def _repr_html_(self): @@ -83,17 +86,27 @@ def __dict__(self): if key != "description": value = getattr(self, key) d.update({key: value}) + if hasattr(self, 'schema'): + d.update({"$schema": "%s/%s.json" % (self.schema_root, getattr(self, 'schema'))}) return d - def to_json(self, fp=None, indent=4): + def to_json(self, fp: Union[str, Path, TextIO] = None, indent=4): """Save to JSON file or return a JSON string that can be loaded with json.loads()""" jsdata = self.__dict__ - if hasattr(self, 'schema'): - jsdata.update({"$schema": "%s/%s.json" % (self.schema_root, getattr(self, 'schema'))}) + # if hasattr(self, 'schema'): + # jsdata.update({"$schema": "%s/%s.json" % (self.schema_root, getattr(self, 'schema'))}) if fp is None: return json.dumps(jsdata, indent=indent, cls=self.JSONEncoder) else: - return json.dump(jsdata, fp, indent=indent, cls=self.JSONEncoder) + if hasattr(fp, 'write'): + return json.dump(jsdata, fp, indent=indent, cls=self.JSONEncoder) + else: + if isinstance(fp, str): + fp = Path(fp) + + with fp.open('w') as fpp: + o = json.dump(jsdata, fpp, indent=indent, cls=self.JSONEncoder) + return o class VFvalidators(VFschema): diff --git a/vfrecovery/json/VFRschema_profile.py b/vfrecovery/json/VFRschema_profile.py index 73269ae..5120f8e 100644 --- a/vfrecovery/json/VFRschema_profile.py +++ b/vfrecovery/json/VFRschema_profile.py @@ -1,4 +1,5 @@ import pandas as pd +import numpy as np from typing import List, Dict import argopy.plot as argoplot @@ -33,6 +34,10 @@ def __init__(self, **kwargs): self._validate_latitude(self.latitude) self._validate_time(self.time) + self.longitude = np.round(self.longitude, 3) + self.latitude = np.round(self.latitude, 3) + + @staticmethod def from_dict(obj: Dict) -> 'Location': return Location(**obj) @@ -80,14 +85,15 @@ def from_dict(obj: Dict) -> 'Profile': @staticmethod def from_ArgoIndex(df: pd.DataFrame) -> List['Profile']: Plist = [] + df = df.sort_values(by='date') for irow, this_obs in df.iterrows(): p = Profile.from_dict({ 'location': Location.from_dict({'longitude': this_obs['longitude'], 'latitude': this_obs['latitude'], - 'time': this_obs['date'] + 'time': this_obs['date'].tz_localize('UTC') }), 'wmo': this_obs['wmo'], - 'cyc': this_obs['cyc'], + 'cycle_number': this_obs['cyc'], 'url_float': argoplot.dashboard(wmo=this_obs['wmo'], url_only=True), 'url_profile': argoplot.dashboard(wmo=this_obs['wmo'], cyc=this_obs['cyc'], url_only=True), }) diff --git a/vfrecovery/python_interface/predict.py b/vfrecovery/python_interface/predict.py index be1c01b..60ac24d 100644 --- a/vfrecovery/python_interface/predict.py +++ b/vfrecovery/python_interface/predict.py @@ -15,6 +15,7 @@ def predict( cfg_profile_depth: float = None, cfg_free_surface_drift: int = 9999, n_floats: int = 100, + domain_min_size: float = 12., log_level: str = 'INFO', ): """ @@ -32,6 +33,7 @@ def predict( cfg_profile_depth cfg_free_surface_drift n_floats + domain_min_size log_level Returns @@ -49,6 +51,7 @@ def predict( cfg_profile_depth=cfg_profile_depth, cfg_free_surface_drift=cfg_free_surface_drift, n_floats=n_floats, + domain_min_size=domain_min_size, log_level=log_level, ) results = json.loads(results_json) From 522b3482d4ac4bd6740014c4d5d82cdc41b7ca85 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 22 Mar 2024 14:23:26 +0100 Subject: [PATCH 10/38] mv forward refactoring --- .../command_line_interface/group_predict.py | 2 +- vfrecovery/core/deployment_plan.py | 8 +- vfrecovery/core/predict.py | 434 ++++++++++-------- vfrecovery/core/trajfile_handler.py | 6 +- vfrecovery/json/__init__.py | 2 +- 5 files changed, 246 insertions(+), 206 deletions(-) diff --git a/vfrecovery/command_line_interface/group_predict.py b/vfrecovery/command_line_interface/group_predict.py index 7cf6d93..94402da 100644 --- a/vfrecovery/command_line_interface/group_predict.py +++ b/vfrecovery/command_line_interface/group_predict.py @@ -106,7 +106,7 @@ def cli_group_predict() -> None: help="Minimal size (deg) of the simulation domain around the initial float position", ) @click.option( - "--log-level", + "--log_level", type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), default="INFO", show_default=True, diff --git a/vfrecovery/core/deployment_plan.py b/vfrecovery/core/deployment_plan.py index 33fc1a9..3737e25 100644 --- a/vfrecovery/core/deployment_plan.py +++ b/vfrecovery/core/deployment_plan.py @@ -1,8 +1,9 @@ import numpy as np import pandas as pd +from vfrecovery.json import Profile -def setup_deployment_plan(a_profile, a_date, nfloats=15000): +def setup_deployment_plan(P: Profile, nfloats: int = 120): # We will deploy a collection of virtual floats that are located around the real float with random perturbations in space and time # Amplitude of the profile position perturbations in the zonal (deg), meridional (deg), and temporal (hours) directions: @@ -11,7 +12,7 @@ def setup_deployment_plan(a_profile, a_date, nfloats=15000): rt = 0 # - lonc, latc = a_profile + lonc, latc = P.location.longitude, P.location.latitude # box = [lonc - rx / 2, lonc + rx / 2, latc - ry / 2, latc + ry / 2] a, b = lonc - rx / 2, lonc + rx / 2 @@ -23,7 +24,7 @@ def setup_deployment_plan(a_profile, a_date, nfloats=15000): a, b = 0, rt dtim = (b - a) * np.random.random_sample((nfloats,)) + a dtim = np.round(dtim).astype(int) - tim = pd.to_datetime([a_date + np.timedelta64(dt, 'h') for dt in dtim]) + tim = pd.to_datetime([P.location.time + np.timedelta64(dt, 'h') for dt in dtim]) # dtim = (b-a) * np.random.random_sample((nfloats, )) + a # dtim = np.round(dtim).astype(int) # tim2 = pd.to_datetime([this_date - np.timedelta64(dt, 'h') for dt in dtim]) @@ -39,4 +40,3 @@ def setup_deployment_plan(a_profile, a_date, nfloats=15000): df['date'] = pd.to_datetime(df['date']) return df - diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index da4ad40..b6e26fd 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -10,15 +10,22 @@ import logging import json import pprint +from datetime import timedelta -from vfrecovery.json import Profile, MetaData, MetaDataSystem +from vfrecovery.json import Profile, MetaData, MetaDataSystem, MetaDataComputation from vfrecovery.utils.formatters import COLORS from vfrecovery.downloaders import get_velocity_field from .utils import df_obs2jsProfile, ArgoIndex2df_obs, ArgoIndex2jsProfile, get_simulation_suffix, get_domain +from .deployment_plan import setup_deployment_plan +from .trajfile_handler import Trajectories +from .simulation_handler import SimPredictor + root_logger = logging.getLogger("vfrecovery_root_logger") sim_logger = logging.getLogger("vfrecovery_simulation") +pp_obj = lambda x: "\n%s" % "\n".join(["\t%s" % line for line in x.__repr__().split("\n")]) + class log_this: @@ -84,6 +91,211 @@ def setup_floats_config( return CFG +class Simulator: + """ + + >>> S = Simulator(wmo, cyc, n_floats=n_floats, velocity=velocity) + >>> S.setup() + >>> S.execute() + >>> S.predict() + >>> S.postprocess() + + """ + + def __init__(self, wmo, cyc, **kwargs): + self.wmo = wmo + self.cyc = cyc + self.output_path = kwargs['output_path'] + log_this.info("\n\nSTARTING NEW SIMULATION: WMO=%i / CYCLE_NUMBER=%i\n" % (wmo, cyc[1])) + + # log_this.info("n_predictions: %i" % n_predictions) + log_this.info("Working with cycle numbers list: %s" % str(cyc)) + + # + url = argoplot.dashboard(wmo, url_only=True) + txt = "You can check this float dashboard while we prepare the prediction: %s" % url + log_this.info(txt) + + # Create Simulation Meta-data class holder + self.MD = MetaData.from_dict({ + 'n_floats': kwargs['n_floats'], + 'velocity_field': kwargs['velocity'], + 'system': MetaDataSystem.auto_load(), + 'vfconfig': None, # will be filled later + 'computation': None, # will be filled later + }) + + def setup_load_observed_profiles(self): + """Load observed float profiles index""" + + log_this.info("Loading float profiles index") + # df_obs = ArgoIndex2df_obs(wmo, cyc) + # P_obs = df_obs2jsProfile(df_obs) + self.P_obs, self.df_obs = ArgoIndex2jsProfile(self.wmo, self.cyc) + # THIS_DATE = P_obs[0].location.time + + [log_this.debug("Observed profiles list: %s" % pp_obj(p)) for p in self.P_obs] + if len(self.P_obs) == 1: + log_this.info('Real-case scenario: True position unknown !') + else: + log_this.info('Evaluation scenario: Historical position known') + + def setup_float_config(self, **kwargs): + """Load and setup float configuration""" + + # Load real float configuration at the previous cycle, to be used for the simulation as initial conditions. + # (the loaded config is possibly overwritten with user defined cfg_* parameters) + self.CFG = setup_floats_config(self.wmo, self.cyc[0], + kwargs['cfg_parking_depth'], + kwargs['cfg_cycle_duration'], + kwargs['cfg_profile_depth'], + kwargs['cfg_free_surface_drift']) + self.MD.vfconfig = self.CFG # Register floats configuration to the simulation meta-data class + + # and save the final virtual float configuration on file: + self.CFG.to_json( + Path(os.path.join(self.output_path, "floats_configuration_%s.json" % get_simulation_suffix(self.MD)))) + log_this.debug(pp_obj(self.CFG)) + + def setup_load_velocity_data(self, **kwargs): + # Define domain to load velocity for: + # In space: + domain, domain_center = get_domain(self.P_obs, kwargs['domain_min_size']) + # and time: + cycle_period = int(np.round(self.CFG.mission['cycle_duration'] / 24)) # Get the float cycle period (in days) + self.n_days = (len(self.cyc) - 1) * cycle_period + 1 + + # log_this.info((domain_min_size, self.n_days)) + # log_this.info((domain_center, domain)) + log_this.info("Loading %s velocity field to cover %i days starting on %s" % ( + self.MD.velocity_field, self.n_days, self.P_obs[0].location.time)) + + self.ds_vel, velocity_file = get_velocity_field(domain, self.P_obs[0].location.time, + n_days=self.n_days, + output=self.output_path, + dataset=self.MD.velocity_field) + log_this.debug(pp_obj(self.ds_vel)) + log_this.info("Loaded %s field from %s to %s" % ( + self.MD.velocity_field, + pd.to_datetime(self.ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), + pd.to_datetime(self.ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")) + ) + + def setup(self, **kwargs): + """Fulfill all requirements for the simulation""" + self.setup_load_observed_profiles() + self.setup_float_config(**kwargs) + self.setup_load_velocity_data(**kwargs) + log_this.info("Simulation data will be registered with file suffix: '%s'" % get_simulation_suffix(self.MD)) + + def execute_get_plan(self): + # VirtualFleet, get a deployment plan: + log_this.info("Deployment plan setup") + df_plan = setup_deployment_plan(self.P_obs[0], nfloats=self.MD.n_floats) + log_this.info("Set %i virtual floats to deploy (i.e. swarm size = %i)" % (df_plan.shape[0], df_plan.shape[0])) + + self.PLAN = {'lon': df_plan['longitude'], + 'lat': df_plan['latitude'], + 'time': np.array([np.datetime64(t) for t in df_plan['date'].dt.strftime('%Y-%m-%d %H:%M').array]), + } + + def execute_get_velocity(self): + self.VEL = Velocity(model='GLORYS12V1' if self.MD.velocity_field == 'GLORYS' else self.MD.velocity_field, + src=self.ds_vel) + # figure_velocity(VBOX, VEL, VEL_NAME, THIS_PROFILE, WMO, CYC, save_figure=args.save_figure, workdir=WORKDIR) + + def execute(self): + """Setup a VirtualFleet and execute simulation""" + + self.execute_get_velocity() + self.execute_get_plan() + + # Set up VirtualFleet: + log_this.info("VirtualFleet instance setup") + self.VFleet = VirtualFleet(plan=self.PLAN, + fieldset=self.VEL, + mission=self.CFG) + + # Execute the simulation: + log_this.info("Starting simulation") + + # Remove traj file if exists: + # output_path = os.path.join(WORKDIR, 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) + # if os.path.exists(output_path): + # shutil.rmtree(output_path) + + self.VFleet.simulate(duration=timedelta(hours=self.n_days * 24 + 1), + step=timedelta(minutes=5), + record=timedelta(minutes=30), + output=True, + output_folder=self.output_path, + output_file='trajectories_%s.zarr' % get_simulation_suffix(self.MD), + verbose_progress=True, + ) + log_this.info("Simulation ended with success") + + def predict_read_trajectories(self): + + # Get simulated profiles index: + log_this.info("Extracting swarm profiles index") + + self.T = Trajectories(self.VFleet.output) + self.T.get_index().add_distances(origin=self.P_obs[0]) + log_this.debug(pp_obj(self.T)) + + # jsdata, fig, ax = self.T.analyse_pairwise_distances(cycle=1, show_plot=True) + + # if not args.json: + # puts(str(T), color=COLORS.magenta) + # puts(DF_SIM.head().to_string(), color=COLORS.green) + # figure_positions(args, VEL, DF_SIM, DF_PLAN, THIS_PROFILE, CFG, WMO, CYC, VEL_NAME, + # dd=1, save_figure=args.save_figure, workdir=WORKDIR) + + def predict_positions(self): + """Make predictions based on simulated profile density""" + self.SP = SimPredictor(self.T.to_index(), self.df_obs) + log_this.info("Predicting float cycle position(s) from swarm simulation") + log_this.debug(pp_obj(self.SP)) + + self.SP.fit_predict() + # SP.plot_predictions(VEL, + # CFG, + # sim_suffix=get_sim_suffix(args, CFG), + # save_figure=args.save_figure, + # workdir=WORKDIR, + # orient='portrait') + # results = self.SP.predictions + + def predict(self): + """Make float profile predictions based on the swarm simulation""" + self.predict_read_trajectories() + self.predict_positions() + + def postprocess_metrics(self): + self.SP.add_metrics(self.VEL) + + def postprocess_swarm_metrics(self): + # Recovery, compute more swarm metrics: + for this_cyc in self.T.sim_cycles: + jsmetrics, fig, ax = self.T.analyse_pairwise_distances(cycle=this_cyc, + save_figure=True, + this_args=args, + this_cfg=self.CFG, + sim_suffix=get_simulation_suffix(self.MD), + workdir=self.output_path, + ) + if 'metrics' in results['predictions'][this_cyc]: + for key in jsmetrics.keys(): + results['predictions'][this_cyc]['metrics'].update({key: jsmetrics[key]}) + else: + results['predictions'][this_cyc].update({'metrics': jsmetrics}) + + def postprocess(self): + self.postprocess_metrics() + self.postprocess_swarm_metrics() + + + def predict_function( wmo: int, cyc: int, @@ -171,83 +383,33 @@ def predict_function( # log_this.warning("This is WARN") # log_this.debug("This is DEBUG") # log_this.error("This is ERROR") - log_this.info("\n\nSTARTING NEW SIMULATION: WMO=%i / CYCLE_NUMBER=%i\n" % (wmo, cyc[1])) - - log_this.info("n_predictions: %i" % n_predictions) - log_this.info("Working with cycle numbers array: %s" % str(cyc)) - - # Create Simulation Meta-data class holder - MD = MetaData.from_dict({ - 'n_floats': n_floats, - 'velocity_field': velocity, - 'system': MetaDataSystem.auto_load(), - 'vfconfig': None, # will be filled later - 'computation': None, # will be filled later - }) # - url = argoplot.dashboard(wmo, url_only=True) - txt = "You can check this float dashboard while we prepare the prediction: %s" % url - log_this.info(txt) - - # Load observed float profiles index: - log_this.debug("Loading float profiles index ...") - # df_obs = ArgoIndex2df_obs(wmo, cyc) - # P_obs = df_obs2jsProfile(df_obs) - P_obs, df_obs = ArgoIndex2jsProfile(wmo, cyc) - # THIS_DATE = P_obs[0].location.time - - # log_this.debug( - # "Profiles to work with:\n%s" % df_obs[['date', 'latitude', 'longitude', 'wmo', 'cyc', 'institution']].to_string( - # max_colwidth=35)) - [log_this.debug("Observed %s" % p) for p in P_obs] - if len(P_obs) == 1: - log_this.info('Real-case scenario: True position unknown !') - else: - log_this.info('Evaluation scenario: Historical position known') - - # Load real float configuration at the previous cycle, to be used for the simulation as initial conditions. - # (the loaded config is possibly overwritten with user defined cfg_* parameters) - CFG = setup_floats_config(wmo, cyc[0], - cfg_parking_depth, - cfg_cycle_duration, - cfg_profile_depth, - cfg_free_surface_drift) - MD.vfconfig = CFG # Register floats configuration to the simulation meta-data class - - # and save the final virtual float configuration on file: - CFG.to_json(Path(os.path.join(output_path, "floats_configuration_%s.json" % get_simulation_suffix(MD)))) - log_this.info("\n".join(["\t%s" % line for line in CFG.__repr__().split("\n")])) + S = Simulator(wmo, cyc, + n_floats=n_floats, + velocity=velocity, + output_path=output_path, + ) + S.setup(cfg_parking_depth=cfg_parking_depth, + cfg_cycle_duration=cfg_cycle_duration, + cfg_profile_depth=cfg_profile_depth, + cfg_free_surface_drift=cfg_free_surface_drift, + domain_min_size=domain_min_size, + ) + S.execute() + S.predict() + S.postprocess() # - log_this.debug("Simulation data will be registered with file suffix: '%s'" % get_simulation_suffix(MD)) - - # Define domain to load velocity for: - # In space: - domain, domain_center = get_domain(P_obs, domain_min_size) - # and time: - CYCLING_PERIOD = int(np.round(CFG.mission['cycle_duration']/24)) # Get the float cycle period (in days) - N_DAYS = (len(cyc)-1)*CYCLING_PERIOD+1 - - # log_this.info((domain_min_size, N_DAYS)) - # log_this.info((domain_center, domain)) - log_this.info("Loading %s velocity field to cover %i days starting on %s ..." % (MD.velocity_field, N_DAYS, P_obs[0].location.time)) - - ds_vel, velocity_file = get_velocity_field(domain, P_obs[0].location.time, - n_days=N_DAYS, - output=output_path, - dataset=MD.velocity_field) - VEL = Velocity(model='GLORYS12V1' if MD.velocity_field == 'GLORYS' else MD.velocity_field, src=ds_vel) - # if not args.json: - # puts("\n\t%s" % str(ds_vel), color=COLORS.green) - # puts("\n\tLoaded velocity field from %s to %s" % - # (pd.to_datetime(ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), - # pd.to_datetime(ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")), color=COLORS.green) - # figure_velocity(VBOX, VEL, VEL_NAME, THIS_PROFILE, WMO, CYC, save_figure=args.save_figure, workdir=WORKDIR) - # + S.MD.computation = MetaDataComputation.from_dict({ + 'date': pd.to_datetime('now', utc=True), + 'wall_time': pd.Timedelta(time.time() - execution_start, 's'), + 'cpu_time': pd.Timedelta(time.process_time() - process_start, 's'), + }) + return S.MD.computation.to_json() + # return S.MD.to_json() - # - return MD.to_json() + # return MD.to_json() # output = {'wmo': wmo, 'cyc': cyc, 'velocity': velocity, 'n_predictions': n_predictions, 'cfg': CFG.to_json(indent=0)} # json_dump = json.dumps( @@ -257,139 +419,17 @@ def predict_function( # def predictor(args): # """Prediction manager""" - -# if is_cyc(args.cyc): -# CYC = [check_cyc(args.cyc)[0]-1] -# [CYC.append(c) for c in check_cyc(args.cyc)] -# -# puts('CYC = %s' % CYC, color=COLORS.magenta) -# # raise ValueError('stophere') # # if args.save_figure: # mplbackend = matplotlib.get_backend() # matplotlib.use('Agg') -# # Load these profiles' information: -# if not args.json: -# puts("\nYou can check this float dashboard while we prepare the prediction:") -# puts("\t%s" % argoplot.dashboard(WMO, url_only=True), color=COLORS.green) -# puts("\nLoading float profiles index ...") -# host = "https://data-argo.ifremer.fr" -# # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" -# # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" -# THIS_PROFILE = store(host=host).search_wmo_cyc(WMO, CYC).to_dataframe() -# THIS_DATE = pd.to_datetime(THIS_PROFILE['date'].values[0], utc=True) -# CENTER = [THIS_PROFILE['longitude'].values[0], THIS_PROFILE['latitude'].values[0]] -# if not args.json: -# puts("\nProfiles to work with:") -# puts(THIS_PROFILE.to_string(max_colwidth=15), color=COLORS.green) -# if THIS_PROFILE.shape[0] == 1: -# puts('\nReal-case scenario: True position unknown !', color=COLORS.yellow) -# else: -# puts('\nEvaluation scenario: historical position known', color=COLORS.yellow) -# -# # Get the cycling frequency (in days, this is more a period then...): -# CYCLING_PERIOD = int(np.round(CFG.mission['cycle_duration']/24)) -# -# # Define domain to load velocity for, and get it: -# width = args.domain_size + np.abs(np.ceil(THIS_PROFILE['longitude'].values[-1] - CENTER[0])) -# height = args.domain_size + np.abs(np.ceil(THIS_PROFILE['latitude'].values[-1] - CENTER[1])) -# VBOX = [CENTER[0] - width / 2, CENTER[0] + width / 2, CENTER[1] - height / 2, CENTER[1] + height / 2] -# N_DAYS = (len(CYC)-1)*CYCLING_PERIOD+1 -# if not args.json: -# puts("\nLoading %s velocity field to cover %i days..." % (VEL_NAME, N_DAYS)) -# ds_vel, velocity_file = get_velocity_field(VBOX, THIS_DATE, -# n_days=N_DAYS, -# output=WORKDIR, -# dataset=VEL_NAME) -# VEL = Velocity(model='GLORYS12V1' if VEL_NAME == 'GLORYS' else VEL_NAME, src=ds_vel) -# if not args.json: -# puts("\n\t%s" % str(ds_vel), color=COLORS.green) -# puts("\n\tLoaded velocity field from %s to %s" % -# (pd.to_datetime(ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), -# pd.to_datetime(ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")), color=COLORS.green) -# figure_velocity(VBOX, VEL, VEL_NAME, THIS_PROFILE, WMO, CYC, save_figure=args.save_figure, workdir=WORKDIR) -# -# # raise ValueError('stophere') -# -# # VirtualFleet, get a deployment plan: -# if not args.json: -# puts("\nVirtualFleet, get a deployment plan...") -# DF_PLAN = setup_deployment_plan(CENTER, THIS_DATE, nfloats=args.nfloats) -# PLAN = {'lon': DF_PLAN['longitude'], -# 'lat': DF_PLAN['latitude'], -# 'time': np.array([np.datetime64(t) for t in DF_PLAN['date'].dt.strftime('%Y-%m-%d %H:%M').array]), -# } -# if not args.json: -# puts("\t%i virtual floats to deploy" % DF_PLAN.shape[0], color=COLORS.green) -# -# # Set up VirtualFleet: -# if not args.json: -# puts("\nVirtualFleet, set-up the fleet...") -# VFleet = VirtualFleet(plan=PLAN, -# fieldset=VEL, -# mission=CFG) -# -# # VirtualFleet, execute the simulation: -# if not args.json: -# puts("\nVirtualFleet, execute the simulation...") -# -# # Remove traj file if exists: -# output_path = os.path.join(WORKDIR, 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) -# # if os.path.exists(output_path): -# # shutil.rmtree(output_path) -# # -# # VFleet.simulate(duration=timedelta(hours=N_DAYS*24+1), -# # step=timedelta(minutes=5), -# # record=timedelta(minutes=30), -# # output=True, -# # output_folder=WORKDIR, -# # output_file='trajectories_%s.zarr' % get_sim_suffix(args, CFG), -# # verbose_progress=not args.json, -# # ) -# # # VirtualFleet, get simulated profiles index: # if not args.json: # puts("\nExtract swarm profiles index...") # -# T = Trajectories(WORKDIR + "/" + 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) -# DF_SIM = T.get_index().add_distances(origin=[THIS_PROFILE['longitude'].values[0], THIS_PROFILE['latitude'].values[0]]) -# if not args.json: -# puts(str(T), color=COLORS.magenta) -# puts(DF_SIM.head().to_string(), color=COLORS.green) -# figure_positions(args, VEL, DF_SIM, DF_PLAN, THIS_PROFILE, CFG, WMO, CYC, VEL_NAME, -# dd=1, save_figure=args.save_figure, workdir=WORKDIR) -# -# # Recovery, make predictions based on simulated profile density: -# SP = SimPredictor(DF_SIM, THIS_PROFILE) -# if not args.json: -# puts("\nPredict float cycle position(s) from swarm simulation...", color=COLORS.white) -# puts(str(SP), color=COLORS.magenta) -# SP.fit_predict() -# SP.add_metrics(VEL) -# SP.plot_predictions(VEL, -# CFG, -# sim_suffix=get_sim_suffix(args, CFG), -# save_figure=args.save_figure, -# workdir=WORKDIR, -# orient='portrait') -# results = SP.predictions -# -# # Recovery, compute more swarm metrics: -# for this_cyc in T.sim_cycles: -# jsmetrics, fig, ax = T.analyse_pairwise_distances(cycle=this_cyc, -# save_figure=True, -# this_args=args, -# this_cfg=CFG, -# sim_suffix=get_sim_suffix(args, CFG), -# workdir=WORKDIR, -# ) -# if 'metrics' in results['predictions'][this_cyc]: -# for key in jsmetrics.keys(): -# results['predictions'][this_cyc]['metrics'].update({key: jsmetrics[key]}) -# else: -# results['predictions'][this_cyc].update({'metrics': jsmetrics}) -# + + # # Recovery, finalize JSON output: # execution_end = time.time() # process_end = time.process_time() diff --git a/vfrecovery/core/trajfile_handler.py b/vfrecovery/core/trajfile_handler.py index afbbd19..106eaa4 100644 --- a/vfrecovery/core/trajfile_handler.py +++ b/vfrecovery/core/trajfile_handler.py @@ -8,7 +8,7 @@ from vfrecovery.utils.misc import get_cfg_str from vfrecovery.plots.utils import save_figurefile - +from vfrecovery.json import Profile class Trajectories: """Trajectory file manager for VFrecovery @@ -167,7 +167,7 @@ def get_index(self): self.to_index() return self - def add_distances(self, origin: None) -> pd.DataFrame: + def add_distances(self, origin: Profile = None) -> pd.DataFrame: """Compute profiles distance to some origin Returns @@ -187,7 +187,7 @@ def add_distances(self, origin: None) -> pd.DataFrame: df = self._index - x2, y2 = origin # real float initial position + x2, y2 = origin.location.longitude, origin.location.latitude # real float initial position df['distance'] = np.nan df['rel_lon'] = np.nan df['rel_lat'] = np.nan diff --git a/vfrecovery/json/__init__.py b/vfrecovery/json/__init__.py index 5c5c2d1..a8dfb80 100644 --- a/vfrecovery/json/__init__.py +++ b/vfrecovery/json/__init__.py @@ -1,3 +1,3 @@ from .VFRschema_profile import Profile from .VFRschema_simulation import Simulation -from .VFRschema_meta import MetaData, MetaDataSystem \ No newline at end of file +from .VFRschema_meta import MetaData, MetaDataSystem, MetaDataComputation \ No newline at end of file From ada3ce87f75662cbe4f61c3529532634bbd7b524 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 22 Mar 2024 14:51:51 +0100 Subject: [PATCH 11/38] misc --- vfrecovery/core/predict.py | 22 +- vfrecovery/core/trajfile_handler.py | 62 ++-- vfrecovery/core/trajfile_handler_legacy.py | 407 +++++++++++++++++++++ vfrecovery/json/VFRschema.py | 2 +- vfrecovery/json/VFRschema_metrics.py | 2 +- vfrecovery/json/__init__.py | 3 +- 6 files changed, 455 insertions(+), 43 deletions(-) create mode 100644 vfrecovery/core/trajfile_handler_legacy.py diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index b6e26fd..9d49418 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -20,7 +20,6 @@ from .trajfile_handler import Trajectories from .simulation_handler import SimPredictor - root_logger = logging.getLogger("vfrecovery_root_logger") sim_logger = logging.getLogger("vfrecovery_simulation") @@ -277,25 +276,20 @@ def postprocess_metrics(self): def postprocess_swarm_metrics(self): # Recovery, compute more swarm metrics: for this_cyc in self.T.sim_cycles: - jsmetrics, fig, ax = self.T.analyse_pairwise_distances(cycle=this_cyc, - save_figure=True, - this_args=args, - this_cfg=self.CFG, - sim_suffix=get_simulation_suffix(self.MD), - workdir=self.output_path, - ) - if 'metrics' in results['predictions'][this_cyc]: - for key in jsmetrics.keys(): - results['predictions'][this_cyc]['metrics'].update({key: jsmetrics[key]}) - else: - results['predictions'][this_cyc].update({'metrics': jsmetrics}) + jsmetrics = self.T.analyse_pairwise_distances(cycle=this_cyc, show_plot=False) + # if 'metrics' in results['predictions'][this_cyc]: + # for key in jsmetrics.keys(): + # results['predictions'][this_cyc]['metrics'].update({key: jsmetrics[key]}) + # else: + # results['predictions'][this_cyc].update({'metrics': jsmetrics}) + log_this.info(pp_obj(jsmetrics)) + return jsmetrics def postprocess(self): self.postprocess_metrics() self.postprocess_swarm_metrics() - def predict_function( wmo: int, cyc: int, diff --git a/vfrecovery/core/trajfile_handler.py b/vfrecovery/core/trajfile_handler.py index 106eaa4..84887ce 100644 --- a/vfrecovery/core/trajfile_handler.py +++ b/vfrecovery/core/trajfile_handler.py @@ -9,6 +9,8 @@ from vfrecovery.utils.misc import get_cfg_str from vfrecovery.plots.utils import save_figurefile from vfrecovery.json import Profile +from vfrecovery.json import Metrics, TrajectoryLengths, PairwiseDistances, PairwiseDistancesState + class Trajectories: """Trajectory file manager for VFrecovery @@ -38,8 +40,8 @@ def sim_cycles(self): """Return list of cycles simulated""" cycs = np.unique(self.obj['cycle_number']) last_obs_phase = \ - self.obj.where(self.obj['cycle_number'] == cycs[-1])['cycle_phase'].isel(trajectory=0).isel(obs=-1).values[ - np.newaxis][0] + self.obj.where(self.obj['cycle_number'] == cycs[-1])['cycle_phase'].isel(trajectory=0).isel(obs=-1).values[ + np.newaxis][0] if last_obs_phase < 3: cycs = cycs[0:-1] return cycs @@ -50,7 +52,7 @@ def __repr__(self): start_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=0).values) end_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=-1).values) summary.append("Simulation length: %s, from %s to %s" % ( - pd.Timedelta(end_date - start_date, 'd'), start_date.strftime("%Y/%m/%d"), end_date.strftime("%Y/%m/%d"))) + pd.Timedelta(end_date - start_date, 'd'), start_date.strftime("%Y/%m/%d"), end_date.strftime("%Y/%m/%d"))) return "\n".join(summary) # def to_index_par(self) -> pd.DataFrame: @@ -224,8 +226,8 @@ def analyse_pairwise_distances(self, show_plot: bool = True, save_figure: bool = False, workdir: str = '.', - sim_suffix = None, - this_cfg = None, + sim_suffix=None, + this_cfg=None, this_args: dict = None): def get_hist_and_peaks(this_d): @@ -318,27 +320,36 @@ def get_hist_and_peaks(this_d): # staggering = np.max(bin_edges1)/np.max(bin_edges0) staggering = np.max(bin_edges) / np.max(bin_edges0) - # Store metrics in a dict: - prediction_metrics = {} - - prediction_metrics['trajectory_lengths'] = {'median': np.nanmedian(ds['length'].values), - 'std': np.nanstd(ds['length'].values)} - - prediction_metrics['pairwise_distances'] = { - 'initial_state': {'median': np.nanmedian(d0), 'std': np.nanstd(d0), 'nPDFpeaks': len(peaks0)}, - 'final_state': {'median': np.nanmedian(d), 'std': np.nanstd(d), 'nPDFpeaks': len(peaks)}, - 'relative_state': {'median': np.nanmedian(d1), 'std': np.nanstd(d1), 'nPDFpeaks': len(peaks1)}, - 'overlapping': {'value': overlapping, - 'comment': 'Overlapping area between PDF(initial_state) and PDF(final_state)'}, - 'staggering': {'value': staggering, 'comment': 'Ratio of PDF(initial_state) vs PDF(final_state) ranges'}, - 'score': {'value': overlapping / len(peaks), 'comment': 'overlapping/nPDFpeaks(final_state)'}} - if np.isinf(overlapping / len(peaks)): raise ValueError("Can't compute the prediction score, infinity !") - ratio = prediction_metrics['pairwise_distances']['final_state']['std'] / \ - prediction_metrics['pairwise_distances']['initial_state']['std'] - prediction_metrics['pairwise_distances']['std_ratio'] = ratio + # Store metrics as VFRschema instance + PD = PairwiseDistances.from_dict({ + 'description': None, + 'initial_state': PairwiseDistancesState.from_dict({ + 'median': np.nanmedian(d0), 'std': np.nanstd(d0), 'nPDFpeaks': len(peaks0), 'description': None, + }), + 'final_state': PairwiseDistancesState.from_dict({ + 'median': np.nanmedian(d), 'std': np.nanstd(d), 'nPDFpeaks': len(peaks), 'description': None, + }), + 'relative_state': PairwiseDistancesState.from_dict({ + 'median': np.nanmedian(d1), 'std': np.nanstd(d1), 'nPDFpeaks': len(peaks1), 'description': None, + }), + 'overlapping': overlapping, + 'staggering': staggering, + 'score': overlapping / len(peaks), + }) + PD.std_ratio = PD.final_state.std / PD.initial_state.std + + M = Metrics.from_dict({ + "description": None, + "trajectory_lengths": TrajectoryLengths.from_dict({ + "median": np.nanmedian(ds['length'].values), + "std": np.nanstd(ds['length'].values), + "description": None, + }), + "pairwise_distances": PD, + }) # Figure: if show_plot: @@ -400,7 +411,6 @@ def get_hist_and_peaks(this_d): matplotlib.use(backend) if show_plot: - return prediction_metrics, fig, ax + return M, fig, ax else: - return prediction_metrics - + return M diff --git a/vfrecovery/core/trajfile_handler_legacy.py b/vfrecovery/core/trajfile_handler_legacy.py new file mode 100644 index 0000000..42943bc --- /dev/null +++ b/vfrecovery/core/trajfile_handler_legacy.py @@ -0,0 +1,407 @@ +import xarray as xr +import pandas as pd +import numpy as np +import matplotlib +from scipy.signal import find_peaks +from sklearn.metrics import pairwise_distances +import matplotlib.pyplot as plt + +from vfrecovery.utils.misc import get_cfg_str +from vfrecovery.plots.utils import save_figurefile +from vfrecovery.json import Profile + + +class Trajectories: + """Trajectory file manager for VFrecovery + + Examples: + --------- + T = Trajectories(traj_zarr_file) + T.n_floats + T.sim_cycles + df = T.to_index() + df = T.get_index().add_distances() + jsdata, fig, ax = T.analyse_pairwise_distances(cycle=1, show_plot=True) + """ + + def __init__(self, zfile): + self.zarr_file = zfile + self.obj = xr.open_zarr(zfile) + self._index = None + + @property + def n_floats(self): + # len(self.obj['trajectory']) + return self.obj['trajectory'].shape[0] + + @property + def sim_cycles(self): + """Return list of cycles simulated""" + cycs = np.unique(self.obj['cycle_number']) + last_obs_phase = \ + self.obj.where(self.obj['cycle_number'] == cycs[-1])['cycle_phase'].isel(trajectory=0).isel(obs=-1).values[ + np.newaxis][0] + if last_obs_phase < 3: + cycs = cycs[0:-1] + return cycs + + def __repr__(self): + summary = [""] + summary.append("Swarm size: %i floats" % self.n_floats) + start_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=0).values) + end_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=-1).values) + summary.append("Simulation length: %s, from %s to %s" % ( + pd.Timedelta(end_date - start_date, 'd'), start_date.strftime("%Y/%m/%d"), end_date.strftime("%Y/%m/%d"))) + return "\n".join(summary) + + # def to_index_par(self) -> pd.DataFrame: + # # Deployment loc: + # deploy_lon, deploy_lat = self.obj.isel(obs=0)['lon'].values, self.obj.isel(obs=0)['lat'].values + # + # def worker(ds, cyc, x0, y0): + # mask = np.logical_and((ds['cycle_number'] == cyc).compute(), + # (ds['cycle_phase'] >= 3).compute()) + # this_cyc = ds.where(mask, drop=True) + # if len(this_cyc['time']) > 0: + # data = { + # 'date': this_cyc.isel(obs=-1)['time'].values, + # 'latitude': this_cyc.isel(obs=-1)['lat'].values, + # 'longitude': this_cyc.isel(obs=-1)['lon'].values, + # 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, + # 'cyc': cyc, + # # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, + # 'deploy_lon': x0, + # 'deploy_lat': y0, + # } + # return pd.DataFrame(data) + # else: + # return None + # + # cycles = np.unique(self.obj['cycle_number']) + # rows = [] + # with concurrent.futures.ThreadPoolExecutor() as executor: + # future_to_url = { + # executor.submit( + # worker, + # self.obj, + # cyc, + # deploy_lon, + # deploy_lat + # ): cyc + # for cyc in cycles + # } + # futures = concurrent.futures.as_completed(future_to_url) + # for future in futures: + # data = None + # try: + # data = future.result() + # except Exception: + # raise + # finally: + # rows.append(data) + # + # rows = [r for r in rows if r is not None] + # df = pd.concat(rows).reset_index() + # df['wmo'] = df['wmo'].astype(int) + # df['cyc'] = df['cyc'].astype(int) + # # df['cycle_phase'] = df['cycle_phase'].astype(int) + # self._index = df + # + # return self._index + + def to_index(self) -> pd.DataFrame: + """Compute and return index (profile dataframe from trajectory dataset) + + Create a Profile index :class:`pandas.dataframe` with columns: [data, latitude ,longitude, wmo, cyc, deploy_lon, deploy_lat] + from a trajectory :class:`xarray.dataset`. + + There is one dataframe row for each dataset trajectory cycle. + + We use the last trajectory point of given cycle number (with cycle phase >= 3) to identify a profile location. + + If they are N trajectories simulating C cycles, there will be about a maximum of N*C rows in the dataframe. + + Returns + ------- + :class:`pandas.dataframe` + """ + if self._index is None: + + # Deployment loc: + deploy_lon, deploy_lat = self.obj.isel(obs=0)['lon'].values, self.obj.isel(obs=0)['lat'].values + + def worker(ds, cyc, x0, y0): + mask = np.logical_and((ds['cycle_number'] == cyc).compute(), + (ds['cycle_phase'] >= 3).compute()) + this_cyc = ds.where(mask, drop=True) + if len(this_cyc['time']) > 0: + data = { + 'date': this_cyc.isel(obs=-1)['time'].values, + 'latitude': this_cyc.isel(obs=-1)['lat'].values, + 'longitude': this_cyc.isel(obs=-1)['lon'].values, + 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, + 'cyc': cyc, + # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, + 'deploy_lon': x0, + 'deploy_lat': y0, + } + return pd.DataFrame(data) + else: + return None + + cycles = np.unique(self.obj['cycle_number']) + rows = [] + for cyc in cycles: + df = worker(self.obj, cyc, deploy_lon, deploy_lat) + rows.append(df) + rows = [r for r in rows if r is not None] + df = pd.concat(rows).reset_index() + df['wmo'] = df['wmo'].astype(int) + df['cyc'] = df['cyc'].astype(int) + # df['cycle_phase'] = df['cycle_phase'].astype(int) + self._index = df + + return self._index + + def get_index(self): + """Compute index and return self""" + self.to_index() + return self + + def add_distances(self, origin: Profile = None) -> pd.DataFrame: + """Compute profiles distance to some origin + + Returns + ------- + :class:`pandas.dataframe` + """ + + # Compute distance between the predicted profile and the initial profile location from the deployment plan + # We assume that virtual floats are sequentially taken from the deployment plan + # Since distances are very short, we compute a simple rectangular distance + + # Observed cycles: + # obs_cyc = np.unique(this_profile['cyc']) + + # Simulated cycles: + # sim_cyc = np.unique(this_df['cyc']) + + df = self._index + + x2, y2 = origin.location.longitude, origin.location.latitude # real float initial position + df['distance'] = np.nan + df['rel_lon'] = np.nan + df['rel_lat'] = np.nan + df['distance_origin'] = np.nan + + def worker(row): + # Simulation profile coordinates: + x0, y0 = row['deploy_lon'], row['deploy_lat'] # virtual float initial position + x1, y1 = row['longitude'], row['latitude'] # virtual float position + + # Distance between each pair of cycles of virtual floats: + dist = np.sqrt((y1 - y0) ** 2 + (x1 - x0) ** 2) + row['distance'] = dist + + # Shift between each pair of cycles: + dx, dy = x1 - x0, y1 - y0 + # Get a relative displacement from real float initial position: + row['rel_lon'] = x2 + dx + row['rel_lat'] = y2 + dy + + # Distance between the predicted profile and the observed initial profile + dist = np.sqrt((y2 - y0) ** 2 + (x2 - x0) ** 2) + row['distance_origin'] = dist + + return row + + df = df.apply(worker, axis=1) + self._index = df + + return self._index + + def analyse_pairwise_distances(self, + cycle: int = 1, + show_plot: bool = True, + save_figure: bool = False, + workdir: str = '.', + sim_suffix = None, + this_cfg = None, + this_args: dict = None): + + def get_hist_and_peaks(this_d): + x = this_d.flatten() + x = x[~np.isnan(x)] + x = x[:, np.newaxis] + hist, bin_edges = np.histogram(x, bins=100, density=1) + # dh = np.diff(bin_edges[0:2]) + peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) + return {'pdf': hist, 'bins': bin_edges[0:-1], 'Npeaks': len(peaks)} + + # Squeeze traj file to the first predicted cycle (sim can have more than 1 cycle) + ds = self.obj.where((self.obj['cycle_number'] == cycle).compute(), drop=True) + ds = ds.compute() + + # Compute trajectories relative to the single/only real float initial position: + lon0, lat0 = self.obj.isel(obs=0)['lon'].values[0], self.obj.isel(obs=0)['lat'].values[0] + lon, lat = ds['lon'].values, ds['lat'].values + ds['lonc'] = xr.DataArray(lon - np.broadcast_to(lon[:, 0][:, np.newaxis], lon.shape) + lon0, + dims=['trajectory', 'obs']) + ds['latc'] = xr.DataArray(lat - np.broadcast_to(lat[:, 0][:, np.newaxis], lat.shape) + lat0, + dims=['trajectory', 'obs']) + + # Compute trajectory lengths: + ds['length'] = np.sqrt(ds.diff(dim='obs')['lon'] ** 2 + ds.diff(dim='obs')['lat'] ** 2).sum(dim='obs') + ds['lengthc'] = np.sqrt(ds.diff(dim='obs')['lonc'] ** 2 + ds.diff(dim='obs')['latc'] ** 2).sum(dim='obs') + + # Compute initial points pairwise distances, PDF and nb of peaks: + X = ds.isel(obs=0) + X = X.isel(trajectory=~np.isnan(X['lon'])) + X0 = np.array((X['lon'].values, X['lat'].values)).T + d0 = pairwise_distances(X0, n_jobs=-1) + d0 = np.triu(d0) + d0[d0 == 0] = np.nan + + x0 = d0.flatten() + x0 = x0[~np.isnan(x0)] + x0 = x0[:, np.newaxis] + + hist0, bin_edges0 = np.histogram(x0, bins=100, density=1) + dh0 = np.diff(bin_edges0[0:2]) + peaks0, _ = find_peaks(hist0 / np.max(hist0), height=.4, distance=20) + + # Compute final points pairwise distances, PDF and nb of peaks: + X = ds.isel(obs=-1) + X = X.isel(trajectory=~np.isnan(X['lon'])) + dsf = X + X = np.array((X['lon'].values, X['lat'].values)).T + d = pairwise_distances(X, n_jobs=-1) + d = np.triu(d) + d[d == 0] = np.nan + + x = d.flatten() + x = x[~np.isnan(x)] + x = x[:, np.newaxis] + + hist, bin_edges = np.histogram(x, bins=100, density=1) + dh = np.diff(bin_edges[0:2]) + peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) + + # Compute final points pairwise distances (relative traj), PDF and nb of peaks: + X1 = ds.isel(obs=-1) + X1 = X1.isel(trajectory=~np.isnan(X1['lonc'])) + dsfc = X1 + X1 = np.array((X1['lonc'].values, X1['latc'].values)).T + d1 = pairwise_distances(X1, n_jobs=-1) + d1 = np.triu(d1) + d1[d1 == 0] = np.nan + + x1 = d1.flatten() + x1 = x1[~np.isnan(x1)] + x1 = x1[:, np.newaxis] + + hist1, bin_edges1 = np.histogram(x1, bins=100, density=1) + dh1 = np.diff(bin_edges1[0:2]) + peaks1, _ = find_peaks(hist1 / np.max(hist1), height=.4, distance=20) + + # Compute the overlapping between the initial and relative state PDFs: + bin_unif = np.arange(0, np.max([bin_edges0, bin_edges1]), np.min([dh0, dh1])) + dh_unif = np.diff(bin_unif[0:2]) + hist0_unif = np.interp(bin_unif, bin_edges0[0:-1], hist0) + hist_unif = np.interp(bin_unif, bin_edges[0:-1], hist) + hist1_unif = np.interp(bin_unif, bin_edges1[0:-1], hist1) + + # Area under hist1 AND hist0: + # overlapping = np.sum(hist1_unif[hist0_unif >= hist1_unif]*dh_unif) + overlapping = np.sum(hist_unif[hist0_unif >= hist_unif] * dh_unif) + + # Ratio of the max PDF ranges: + # staggering = np.max(bin_edges1)/np.max(bin_edges0) + staggering = np.max(bin_edges) / np.max(bin_edges0) + + # Store metrics in a dict: + prediction_metrics = {} + + prediction_metrics['trajectory_lengths'] = {'median': np.nanmedian(ds['length'].values), + 'std': np.nanstd(ds['length'].values)} + + prediction_metrics['pairwise_distances'] = { + 'initial_state': {'median': np.nanmedian(d0), 'std': np.nanstd(d0), 'nPDFpeaks': len(peaks0)}, + 'final_state': {'median': np.nanmedian(d), 'std': np.nanstd(d), 'nPDFpeaks': len(peaks)}, + 'relative_state': {'median': np.nanmedian(d1), 'std': np.nanstd(d1), 'nPDFpeaks': len(peaks1)}, + 'overlapping': {'value': overlapping, + 'comment': 'Overlapping area between PDF(initial_state) and PDF(final_state)'}, + 'staggering': {'value': staggering, 'comment': 'Ratio of PDF(initial_state) vs PDF(final_state) ranges'}, + 'score': {'value': overlapping / len(peaks), 'comment': 'overlapping/nPDFpeaks(final_state)'}} + + if np.isinf(overlapping / len(peaks)): + raise ValueError("Can't compute the prediction score, infinity !") + + ratio = prediction_metrics['pairwise_distances']['final_state']['std'] / \ + prediction_metrics['pairwise_distances']['initial_state']['std'] + prediction_metrics['pairwise_distances']['std_ratio'] = ratio + + # Figure: + if show_plot: + backend = matplotlib.get_backend() + if this_args is not None and this_args.json: + matplotlib.use('Agg') + + fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(18, 10), dpi=90) + ax, ix = ax.flatten(), -1 + cmap = plt.cm.coolwarm + + ix += 1 + dd = dsf['length'].values + ax[ix].plot(X0[:, 0], X0[:, 1], '.', markersize=3, color='grey', alpha=0.5, markeredgecolor=None, zorder=0) + ax[ix].scatter(X[:, 0], X[:, 1], c=dd, zorder=10, s=3, cmap=cmap) + ax[ix].grid() + this_traj = int(dsf.isel(trajectory=np.argmax(dd))['trajectory'].values[np.newaxis][0]) + ax[ix].plot(ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lon'], + ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lat'], 'r', + zorder=13, label='Longest traj.') + this_traj = int(dsf.isel(trajectory=np.argmin(dd))['trajectory'].values[np.newaxis][0]) + ax[ix].plot(ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lon'], + ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lat'], 'b', + zorder=13, label='Shortest traj.') + ax[ix].legend() + ax[ix].set_title('Trajectory lengths') + + ix += 1 + ax[ix].plot(bin_edges0[0:-1], hist0, label='Initial (%i peak)' % len(peaks0), color='gray') + ax[ix].plot(bin_edges[0:-1], hist, label='Final (%i peak)' % len(peaks), color='lightblue') + ax[ix].plot(bin_edges[peaks], hist[peaks], "x", label='Peaks') + ax[ix].legend() + ax[ix].grid() + ax[ix].set_xlabel('Pairwise distance [degree]') + line1 = "Staggering: %0.4f" % staggering + line2 = "Overlapping: %0.4f" % overlapping + line3 = "Score: %0.4f" % (overlapping / len(peaks)) + ax[ix].set_title("Pairwise distances PDF: [%s / %s / %s]" % (line1, line2, line3)) + + if this_args is not None: + line0 = "VirtualFleet recovery swarm simulation for WMO %i, starting from cycle %i, predicting cycle %i\n%s" % \ + (this_args.wmo, this_args.cyc[0] - 1, this_args.cyc[0], get_cfg_str(this_cfg)) + line1 = "Simulation made with %s and %i virtual floats" % (this_args.velocity, this_args.nfloats) + else: + line0 = "VirtualFleet recovery swarm simulation for cycle %i" % cycle + line1 = "Simulation made with %i virtual floats" % (self.n_floats) + + fig.suptitle("%s\n%s" % (line0, line1), fontsize=15) + plt.tight_layout() + + if save_figure: + if sim_suffix is not None: + filename = 'vfrecov_metrics01_%s_cyc%i' % (sim_suffix, cycle) + else: + filename = 'vfrecov_metrics01_cyc%i' % (cycle) + save_figurefile(fig, filename, workdir) + + if this_args is not None and this_args.json: + matplotlib.use(backend) + + if show_plot: + return prediction_metrics, fig, ax + else: + return prediction_metrics + diff --git a/vfrecovery/json/VFRschema.py b/vfrecovery/json/VFRschema.py index eeb389b..b93732c 100644 --- a/vfrecovery/json/VFRschema.py +++ b/vfrecovery/json/VFRschema.py @@ -140,7 +140,7 @@ def validate(data, schema) -> Union[bool, List]: return True if len(errors) == 0 else errors def _is_numeric(self, x, name='?'): - assert isinstance(x, (int, float)), "'%s' must be a float, got '%s'" % (name, type(x)) + assert np.asarray(x).dtype.kind in set('buifc'), "'%s' must be numeric, got '%s'" % (name, type(x)) def _is_datetime(self, x, name='?'): assert isinstance(x, ( diff --git a/vfrecovery/json/VFRschema_metrics.py b/vfrecovery/json/VFRschema_metrics.py index 802fda1..4f9b77c 100644 --- a/vfrecovery/json/VFRschema_metrics.py +++ b/vfrecovery/json/VFRschema_metrics.py @@ -139,9 +139,9 @@ def from_dict(obj: Dict) -> 'Transit': class Metrics(VFvalidators): + trajectory_lengths: TrajectoryLengths = None pairwise_distances: PairwiseDistances = None surface_drift: SurfaceDrift = None - trajectory_lengths: TrajectoryLengths = None transit: Transit = None schema: str = "VFrecovery-schema-metrics" diff --git a/vfrecovery/json/__init__.py b/vfrecovery/json/__init__.py index a8dfb80..d74baa9 100644 --- a/vfrecovery/json/__init__.py +++ b/vfrecovery/json/__init__.py @@ -1,3 +1,4 @@ from .VFRschema_profile import Profile from .VFRschema_simulation import Simulation -from .VFRschema_meta import MetaData, MetaDataSystem, MetaDataComputation \ No newline at end of file +from .VFRschema_meta import MetaData, MetaDataSystem, MetaDataComputation +from .VFRschema_metrics import Metrics, TrajectoryLengths, PairwiseDistances, PairwiseDistancesState \ No newline at end of file From 645d5664bba9f6852965c5e78392f0036dcfc55e Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Mon, 25 Mar 2024 10:37:38 +0100 Subject: [PATCH 12/38] Update schemas and hamdlers --- schemas/VFrecovery-schema-computation.json | 2 +- schemas/VFrecovery-schema-metrics.json | 11 ++++- vfrecovery/json/VFRschema.py | 4 +- vfrecovery/json/VFRschema_metrics.py | 47 ++++++++++++++++------ vfrecovery/json/VFRschema_profile.py | 18 ++++----- vfrecovery/json/VFRschema_simulation.py | 2 +- vfrecovery/json/__init__.py | 4 +- 7 files changed, 60 insertions(+), 28 deletions(-) diff --git a/schemas/VFrecovery-schema-computation.json b/schemas/VFrecovery-schema-computation.json index d787152..1891e7a 100644 --- a/schemas/VFrecovery-schema-computation.json +++ b/schemas/VFrecovery-schema-computation.json @@ -13,7 +13,7 @@ "description": "UTC starting datetime of the computation", "type": ["string", "null"], "format": "date-time" - } + }, "cpu_time": { "description": "CPU time used by the computation", "type": ["string", "null"], diff --git a/schemas/VFrecovery-schema-metrics.json b/schemas/VFrecovery-schema-metrics.json index 16be612..c7a4e1a 100644 --- a/schemas/VFrecovery-schema-metrics.json +++ b/schemas/VFrecovery-schema-metrics.json @@ -86,7 +86,16 @@ "unit": {"type": "string"}, "value": {"type": "number"} } + }, + "error": { + "description": "Error amplitude in space/time", + "type": "object", + "properties": { + "distance": {"type": "number", "unit": "km"}, + "bearing": {"type": "number", "unit": "degree"}, + "time": {"type": ["string", "null"], "format": "time-delta"} + } } }, - "maxProperties": 4 + "maxProperties": 5 } \ No newline at end of file diff --git a/vfrecovery/json/VFRschema.py b/vfrecovery/json/VFRschema.py index b93732c..dedccd2 100644 --- a/vfrecovery/json/VFRschema.py +++ b/vfrecovery/json/VFRschema.py @@ -36,7 +36,7 @@ def __repr__(self): for p in self.properties: if p != 'description': v = getattr(self, p) - if isinstance(v, (int, float)): + if np.asarray(v).dtype.kind in set('buifc'): summary.append("%s=%s" % (p, v)) else: summary.append("%s='%s'" % (p, v)) @@ -69,7 +69,7 @@ def default(self, obj): return obj.isoformat() if getattr(type(obj), '__name__') in ['Location', 'Profile', 'Metrics', 'TrajectoryLengths', 'PairwiseDistances', 'PairwiseDistancesState', - 'SurfaceDrift', 'Transit', + 'SurfaceDrift', 'Transit', 'Location_error', 'MetaDataSystem', 'MetaDataComputation', 'MetaData']: # We use "getattr(type(obj), '__name__')" in order to avoid circular import return obj.__dict__ diff --git a/vfrecovery/json/VFRschema_metrics.py b/vfrecovery/json/VFRschema_metrics.py index 4f9b77c..89ffa2d 100644 --- a/vfrecovery/json/VFRschema_metrics.py +++ b/vfrecovery/json/VFRschema_metrics.py @@ -5,7 +5,6 @@ - trajectory_lengths: ArrayMetric - pairwise_distances: PairwiseDistances - surface_drift: SurfaceDrift - - trajectory_lengths: TrajectoryLengths - transit: Transit ``` @@ -20,14 +19,6 @@ - std_ratio ``` - ``` - SurfaceDrift(VFvalidators) - - surface_currents_speed - - surface_currents_speed_unit - - unit - - value - ``` - ``` TrajectoryLengths(ArrayMetric) - median @@ -39,6 +30,15 @@ - nPDFpeaks ``` + + ``` + SurfaceDrift(VFvalidators) + - surface_currents_speed + - surface_currents_speed_unit + - unit + - value + ``` + ``` Transit(VFvalidators) - value @@ -47,6 +47,7 @@ """ from typing import List, Dict +import pandas as pd from .VFRschema import VFvalidators @@ -138,15 +139,37 @@ def from_dict(obj: Dict) -> 'Transit': return Transit(**obj) +class Location_error(VFvalidators): + distance: float = None + bearing: float = None + time: pd.Timedelta = None + + description: str = "Location error" + properties: List = ["distance", "bearing", "time", "description"] + required: List = [] + + def __init__(self, **kwargs): + super().__init__(**kwargs) + if 'time' not in kwargs: + setattr(self, 'time', pd.NaT) + else: + self._is_timedelta(kwargs['time'], 'time') + + @staticmethod + def from_dict(obj: Dict) -> 'Location_error': + return Location_error(**obj) + + class Metrics(VFvalidators): + error: Location_error = None + transit: Transit = None + surface_drift: SurfaceDrift = None trajectory_lengths: TrajectoryLengths = None pairwise_distances: PairwiseDistances = None - surface_drift: SurfaceDrift = None - transit: Transit = None schema: str = "VFrecovery-schema-metrics" description: str = "A set of metrics to describe/interpret one predicted VFrecovery profile location" - properties: List = ["trajectory_lengths", "pairwise_distances", "surface_drift", "trajectory_lengths", "transit", "description"] + properties: List = ["trajectory_lengths", "pairwise_distances", "surface_drift", "trajectory_lengths", "transit", "error", "description"] required: List = [] @staticmethod diff --git a/vfrecovery/json/VFRschema_profile.py b/vfrecovery/json/VFRschema_profile.py index 5120f8e..262b3ca 100644 --- a/vfrecovery/json/VFRschema_profile.py +++ b/vfrecovery/json/VFRschema_profile.py @@ -58,6 +58,15 @@ class Profile(VFvalidators): properties: List = ["location", "cycle_number", "wmo", "url_float", "url_profile", "virtual_cycle_number", "metrics", "description"] def __init__(self, **kwargs): + super().__init__(**kwargs) + self._validate_wmo(self.wmo) + self._validate_cycle_number(self.cycle_number) + self._validate_cycle_number(self.virtual_cycle_number) + if isinstance(kwargs['location'], dict): + self.location = Location.from_dict(kwargs['location']) + + @staticmethod + def from_dict(obj: Dict) -> 'Profile': """ Parameters @@ -71,15 +80,6 @@ def __init__(self, **kwargs): metrics: Metrics """ - super().__init__(**kwargs) - self._validate_wmo(self.wmo) - self._validate_cycle_number(self.cycle_number) - self._validate_cycle_number(self.virtual_cycle_number) - if isinstance(kwargs['location'], dict): - self.location = Location.from_dict(kwargs['location']) - - @staticmethod - def from_dict(obj: Dict) -> 'Profile': return Profile(**obj) @staticmethod diff --git a/vfrecovery/json/VFRschema_simulation.py b/vfrecovery/json/VFRschema_simulation.py index c85a823..7bcd181 100644 --- a/vfrecovery/json/VFRschema_simulation.py +++ b/vfrecovery/json/VFRschema_simulation.py @@ -12,7 +12,7 @@ class Simulation(VFvalidators): schema: str = "VFrecovery-schema-simulation" description: str = "This document records the details of one VirtualFleet-Recovery simulation and Argo float profile predictions" - required: List = ["initial_profile", "observations", "predictions"] + required: List = ["initial_profile", "predictions"] properties: List = ["initial_profile", "observations", "predictions", "meta_data", "description"] @staticmethod diff --git a/vfrecovery/json/__init__.py b/vfrecovery/json/__init__.py index d74baa9..d7153c1 100644 --- a/vfrecovery/json/__init__.py +++ b/vfrecovery/json/__init__.py @@ -1,4 +1,4 @@ -from .VFRschema_profile import Profile +from .VFRschema_profile import Profile, Location from .VFRschema_simulation import Simulation from .VFRschema_meta import MetaData, MetaDataSystem, MetaDataComputation -from .VFRschema_metrics import Metrics, TrajectoryLengths, PairwiseDistances, PairwiseDistancesState \ No newline at end of file +from .VFRschema_metrics import Metrics, TrajectoryLengths, PairwiseDistances, PairwiseDistancesState, Transit, SurfaceDrift, Location_error \ No newline at end of file From e76fdb62951aa46eb815ed418215b313f88e4303 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Mon, 25 Mar 2024 10:38:23 +0100 Subject: [PATCH 13/38] Complete predict --- vfrecovery/core/predict.py | 70 ++-- .../{simulation_handler.py => run_handler.py} | 349 +++++++----------- 2 files changed, 180 insertions(+), 239 deletions(-) rename vfrecovery/core/{simulation_handler.py => run_handler.py} (59%) diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index 9d49418..6f1483e 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -18,7 +18,7 @@ from .utils import df_obs2jsProfile, ArgoIndex2df_obs, ArgoIndex2jsProfile, get_simulation_suffix, get_domain from .deployment_plan import setup_deployment_plan from .trajfile_handler import Trajectories -from .simulation_handler import SimPredictor +from .simulation_handler import RunAnalyser root_logger = logging.getLogger("vfrecovery_root_logger") sim_logger = logging.getLogger("vfrecovery_simulation") @@ -90,14 +90,15 @@ def setup_floats_config( return CFG -class Simulator: +class Simulation: """ - >>> S = Simulator(wmo, cyc, n_floats=n_floats, velocity=velocity) + >>> S = Simulation(wmo, cyc, n_floats=n_floats, velocity=velocity) >>> S.setup() >>> S.execute() >>> S.predict() >>> S.postprocess() + >>> S.to_json() """ @@ -105,7 +106,9 @@ def __init__(self, wmo, cyc, **kwargs): self.wmo = wmo self.cyc = cyc self.output_path = kwargs['output_path'] - log_this.info("\n\nSTARTING NEW SIMULATION: WMO=%i / CYCLE_NUMBER=%i\n" % (wmo, cyc[1])) + log_this.info("=" * 55) + log_this.info("STARTING NEW SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (wmo, cyc[1])) + log_this.info("=" * 55) # log_this.info("n_predictions: %i" % n_predictions) log_this.info("Working with cycle numbers list: %s" % str(cyc)) @@ -223,26 +226,31 @@ def execute(self): # if os.path.exists(output_path): # shutil.rmtree(output_path) - self.VFleet.simulate(duration=timedelta(hours=self.n_days * 24 + 1), - step=timedelta(minutes=5), - record=timedelta(minutes=30), - output=True, - output_folder=self.output_path, - output_file='trajectories_%s.zarr' % get_simulation_suffix(self.MD), - verbose_progress=True, - ) - log_this.info("Simulation ended with success") + self.traj_file = os.path.join(self.output_path, 'trajectories_%s.zarr' % get_simulation_suffix(self.MD)) + if os.path.exists(self.traj_file): + log_this.info("Using data from a previous similar run (no simulation executed)") + else: + self.VFleet.simulate(duration=timedelta(hours=self.n_days * 24 + 1), + step=timedelta(minutes=5), + record=timedelta(minutes=30), + output=True, + output_folder=self.output_path, + output_file='trajectories_%s.zarr' % get_simulation_suffix(self.MD), + verbose_progress=True, + ) + log_this.info("Simulation ended with success") def predict_read_trajectories(self): # Get simulated profiles index: log_this.info("Extracting swarm profiles index") - self.T = Trajectories(self.VFleet.output) - self.T.get_index().add_distances(origin=self.P_obs[0]) - log_this.debug(pp_obj(self.T)) + # self.traj = Trajectories(self.VFleet.output) + self.traj = Trajectories(self.traj_file) + self.traj.get_index().add_distances(origin=self.P_obs[0]) + log_this.debug(pp_obj(self.traj)) - # jsdata, fig, ax = self.T.analyse_pairwise_distances(cycle=1, show_plot=True) + # jsdata, fig, ax = self.traj.analyse_pairwise_distances(cycle=1, show_plot=True) # if not args.json: # puts(str(T), color=COLORS.magenta) @@ -252,18 +260,18 @@ def predict_read_trajectories(self): def predict_positions(self): """Make predictions based on simulated profile density""" - self.SP = SimPredictor(self.T.to_index(), self.df_obs) + self.run = RunAnalyser(self.traj.to_index(), self.df_obs) log_this.info("Predicting float cycle position(s) from swarm simulation") - log_this.debug(pp_obj(self.SP)) + log_this.debug(pp_obj(self.run)) - self.SP.fit_predict() + self.run.fit_predict() # SP.plot_predictions(VEL, # CFG, # sim_suffix=get_sim_suffix(args, CFG), # save_figure=args.save_figure, # workdir=WORKDIR, # orient='portrait') - # results = self.SP.predictions + # results = self.run.predictions def predict(self): """Make float profile predictions based on the swarm simulation""" @@ -271,12 +279,12 @@ def predict(self): self.predict_positions() def postprocess_metrics(self): - self.SP.add_metrics(self.VEL) + self.run.add_metrics(self.VEL) def postprocess_swarm_metrics(self): # Recovery, compute more swarm metrics: - for this_cyc in self.T.sim_cycles: - jsmetrics = self.T.analyse_pairwise_distances(cycle=this_cyc, show_plot=False) + for this_cyc in self.traj.sim_cycles: + jsmetrics = self.traj.analyse_pairwise_distances(cycle=this_cyc, show_plot=False) # if 'metrics' in results['predictions'][this_cyc]: # for key in jsmetrics.keys(): # results['predictions'][this_cyc]['metrics'].update({key: jsmetrics[key]}) @@ -287,7 +295,12 @@ def postprocess_swarm_metrics(self): def postprocess(self): self.postprocess_metrics() - self.postprocess_swarm_metrics() + # self.postprocess_swarm_metrics() + + def to_json(self, fp=None): + y = self.run._jsdata # Simulation instance + y.meta_data = self.MD + return y.to_json(fp=fp) def predict_function( @@ -379,7 +392,7 @@ def predict_function( # log_this.error("This is ERROR") # - S = Simulator(wmo, cyc, + S = Simulation(wmo, cyc, n_floats=n_floats, velocity=velocity, output_path=output_path, @@ -400,10 +413,9 @@ def predict_function( 'wall_time': pd.Timedelta(time.time() - execution_start, 's'), 'cpu_time': pd.Timedelta(time.process_time() - process_start, 's'), }) - return S.MD.computation.to_json() - # return S.MD.to_json() - + # return S.MD.computation.to_json() # return MD.to_json() + return S.to_json() # output = {'wmo': wmo, 'cyc': cyc, 'velocity': velocity, 'n_predictions': n_predictions, 'cfg': CFG.to_json(indent=0)} # json_dump = json.dumps( diff --git a/vfrecovery/core/simulation_handler.py b/vfrecovery/core/run_handler.py similarity index 59% rename from vfrecovery/core/simulation_handler.py rename to vfrecovery/core/run_handler.py index 2b58e95..d6b4e43 100644 --- a/vfrecovery/core/simulation_handler.py +++ b/vfrecovery/core/run_handler.py @@ -1,21 +1,25 @@ -import xarray as xr import pandas as pd import numpy as np -import json -import matplotlib +from typing import List + from sklearn.neighbors import KernelDensity from scipy.signal import find_peaks from sklearn.metrics import pairwise_distances + +import matplotlib import matplotlib.pyplot as plt import argopy.plot as argoplot import cartopy.crs as ccrs -from vfrecovery.utils.misc import get_cfg_str, get_ea_profile_page_url from vfrecovery.plots.utils import save_figurefile, map_add_features from vfrecovery.utils.geo import haversine, bearing +from vfrecovery.json import Simulation, Profile, Location, Metrics, Transit, SurfaceDrift, Location_error -class SimPredictor_0: +pp_obj = lambda x: "\n%s" % "\n".join(["\t%s" % line for line in x.__repr__().split("\n")]) + + +class RunAnalyser_core: """ Examples @@ -23,7 +27,7 @@ class SimPredictor_0: T = Trajectories(traj_zarr_file) df = T.get_index().add_distances() - SP = SimPredictor(df) + SP = RunAnalyser(df) SP.fit_predict() SP.add_metrics(VFvelocity) SP.bbox() @@ -31,7 +35,6 @@ class SimPredictor_0: SP.plan SP.n_cycles SP.trajectory - SP.prediction """ def __init__(self, df_sim: pd.DataFrame, df_obs: pd.DataFrame): @@ -39,16 +42,16 @@ def __init__(self, df_sim: pd.DataFrame, df_obs: pd.DataFrame): self.obs = df_obs # self.set_weights() self.WMO = np.unique(df_obs['wmo'])[0] - self._json = None + self._jsdata = [] def __repr__(self): summary = [""] summary.append("Simulation target: %i / %i" % (self.WMO, self.sim_cycles[0])) summary.append("Swarm size: %i floats" % len(np.unique(self.swarm['wmo']))) summary.append("Number of simulated cycles: %i profile(s) for cycle number(s): [%s]" % ( - self.n_cycles, ",".join([str(c) for c in self.sim_cycles]))) + self.n_cycles, ",".join([str(c) for c in self.sim_cycles]))) summary.append("Observed reference: %i profile(s) for cycle number(s): [%s]" % ( - self.obs.shape[0], ",".join([str(c) for c in self.obs_cycles]))) + self.obs.shape[0], ",".join([str(c) for c in self.obs_cycles]))) return "\n".join(summary) @property @@ -86,27 +89,20 @@ def trajectory(self): :class:`np.array` """ - if self._json is None: + if len(self._jsdata.predictions) == 0: raise ValueError("Please call `fit_predict` first") traj_prediction = np.array([self.obs['longitude'].values[0], self.obs['latitude'].values[0], self.obs['date'].values[0]])[ np.newaxis] # Starting point where swarm was deployed - for cyc in self._json['predictions'].keys(): - xpred = self._json['predictions'][cyc]['location']['longitude'] - ypred = self._json['predictions'][cyc]['location']['latitude'] - tpred = pd.to_datetime(self._json['predictions'][cyc]['location']['time']) + for p in self._jsdata.predictions: + xpred, ypred, tpred = p.location.longitude, p.location.latitude, p.location.time traj_prediction = np.concatenate((traj_prediction, np.array([xpred, ypred, tpred])[np.newaxis]), axis=0) return traj_prediction - @property - def predictions(self): - if self._json is None: - raise ValueError("Please call `fit_predict` first") - return self._json def bbox(self, s: float = 1) -> list: """Get a bounding box for maps @@ -145,7 +141,7 @@ def bbox(self, s: float = 1) -> list: return ebox -class SimPredictor_1(SimPredictor_0): +class RunAnalyser_predictor(RunAnalyser_core): def set_weights(self, scale: float = 20): """Compute weights for predictions @@ -165,7 +161,7 @@ def set_weights(self, scale: float = 20): self.swarm['weights'] = weights return self - def fit_predict(self, weights_scale: float = 20.) -> dict: + def fit_predict(self, weights_scale: float = 20.) -> List[Profile]: """Predict profile positions from simulated float swarm Prediction is based on a :class:`klearn.neighbors._kde.KernelDensity` estimate of the N_FLOATS @@ -178,25 +174,16 @@ def fit_predict(self, weights_scale: float = 20.) -> dict: Returns ------- - dict + List[Profile] """ - def blank_prediction() -> dict: - return {'location': { - 'longitude': None, - 'latitude': None, - 'time': None}, - 'cycle_number': None, - 'wmo': int(self.WMO), - } - # Compute weights of the swarm float profiles locations self.set_weights(scale=weights_scale) - self._prediction_data = {'weights_scale': weights_scale, 'cyc': {}} + # self._prediction_data = {'weights_scale': weights_scale, 'cyc': {}} cycles = np.unique(self.swarm['cyc']).astype(int) # 1, 2, ... - recovery_predictions = {} + Plist = [] for icyc, this_sim_cyc in enumerate(cycles): this_cyc_df = self.swarm[self.swarm['cyc'] == this_sim_cyc] weights = this_cyc_df['weights'] @@ -215,177 +202,160 @@ def blank_prediction() -> dict: ypred = Xg[np.argmax(llh), 1] tpred = this_cyc_df['date'].mean() - # Store results - recovery = blank_prediction() - recovery['location']['longitude'] = xpred - recovery['location']['latitude'] = ypred - recovery['location']['time'] = tpred.isoformat() - recovery['cycle_number'] = int(self.sim_cycles[icyc]) - recovery['virtual_cycle_number'] = int(self.sim_cycles[icyc]) - recovery_predictions.update({int(this_sim_cyc): recovery}) - - # - self._prediction_data['cyc'].update({this_sim_cyc: {'weights': this_cyc_df['weights']}}) + # Store results in a Profile instance: + p = Profile.from_dict({ + 'location': Location.from_dict({ + 'longitude': xpred, + 'latitude': ypred, + 'time': tpred, + 'description': None, + }), + 'wmo': int(self.WMO), + 'cycle_number': int(self.sim_cycles[icyc]), + 'virtual_cycle_number': int(this_sim_cyc), + 'description': "Simulated profile #%i" % this_sim_cyc, + 'metrics': Metrics.from_dict({'description': None}), + 'url_float': argoplot.dashboard(self.WMO, url_only=True), + }) + Plist.append(p) # Store results internally - self._json = {'predictions': recovery_predictions} + obs_cyc = self.obs_cycles[0] + this_df = self.obs[self.obs['cyc'] == obs_cyc] + + self._jsdata = Simulation.from_dict({ + "initial_profile": Profile.from_dict({ + 'location': Location.from_dict({ + 'longitude': this_df['longitude'].iloc[0], + 'latitude': this_df['latitude'].iloc[0], + 'time': this_df['date'].iloc[0], + 'description': None, + }), + 'wmo': int(self.WMO), + 'cycle_number': int(obs_cyc), + 'description': "Initial profile (observed)", + 'url_float': argoplot.dashboard(self.WMO, url_only=True), + 'url_profile': argoplot.dashboard(self.WMO, obs_cyc, url_only=True), + }), + "predictions": Plist, + "observations": None, + "meta_data": None, + }) # Add more stuff to internal storage: - self._predict_errors() - self._add_ref() - self.add_metrics() - + self._add_ref() # Fill: self._jsdata.observations + self._predict_errors() # Fill: self._jsdata.predictions.Metrics.error + # self.add_metrics() # return self -class SimPredictor_2(SimPredictor_1): +class RunAnalyser_diagnostics(RunAnalyser_predictor): - def _predict_errors(self) -> dict: - """Compute error metrics for the predicted positions + def _add_ref(self): + """Possibly add observations data to internal data structure This is for past cycles, for which we have observed positions of the predicted profiles - This adds more keys to self._json['predictions'] created by the fit_predict method + This populates the ``self._jsdata.observations`` property (``self._jsdata`` was created by the ``fit_predict`` method) - Returns - ------- - dict """ - - def blank_error(): - return {'distance': {'value': None, - 'unit': 'km'}, - 'bearing': {'value': None, - 'unit': 'degree'}, - 'time': {'value': None, - 'unit': 'hour'} - } - - cyc0 = self.obs_cycles[0] - if self._json is None: + if len(self._jsdata.predictions) == 0: raise ValueError("Please call `fit_predict` first") - recovery_predictions = self._json['predictions'] - for sim_c in recovery_predictions.keys(): - this_prediction = recovery_predictions[sim_c] - if sim_c + cyc0 in self.obs_cycles: - error = blank_error() + # Observed profiles that were simulated: + Plist = [] + for cyc in self.sim_cycles: + if cyc in self.obs_cycles: + this_df = self.obs[self.obs['cyc'] == cyc] + p = Profile.from_dict({ + 'wmo': int(self.WMO), + 'cycle_number': int(cyc), + 'url_float': argoplot.dashboard(self.WMO, url_only=True), + 'url_profile': argoplot.dashboard(self.WMO, cyc, url_only=True), + 'location': Location.from_dict({'longitude': this_df['longitude'].iloc[0], + 'latitude': this_df['latitude'].iloc[0], + 'time': this_df['date'].iloc[0]}) + }) + Plist.append(p) + + self._jsdata.observations = Plist + + return self + + def _predict_errors(self): + """Possibly compute error metrics for the predicted positions + + This is for past cycles, for which we have observed positions of the predicted profiles + + This populates the ``self._jsdata.predictions.Metrics.error`` property (``self._jsdata`` was created by the ``fit_predict`` method) + + """ + if len(self._jsdata.predictions) == 0: + raise ValueError("Please call `fit_predict` first") - this_obs_profile = self.obs[self.obs['cyc'] == sim_c + cyc0] + Plist_updated = [] + for p in self._jsdata.predictions: + if p.cycle_number in self.obs_cycles: + this_obs_profile = self.obs[self.obs['cyc'] == p.cycle_number] xobs = this_obs_profile['longitude'].iloc[0] yobs = this_obs_profile['latitude'].iloc[0] tobs = this_obs_profile['date'].iloc[0] - prev_obs_profile = self.obs[self.obs['cyc'] == sim_c + cyc0 - 1] + prev_obs_profile = self.obs[self.obs['cyc'] == p.cycle_number - 1] xobs0 = prev_obs_profile['longitude'].iloc[0] yobs0 = prev_obs_profile['latitude'].iloc[0] - xpred = this_prediction['location']['longitude'] - ypred = this_prediction['location']['latitude'] - tpred = pd.to_datetime(this_prediction['location']['time']) + xpred = p.location.longitude + ypred = p.location.latitude + tpred = p.location.time dd = haversine(xobs, yobs, xpred, ypred) - error['distance']['value'] = dd observed_bearing = bearing(xobs0, yobs0, xobs, yobs) sim_bearing = bearing(xobs0, yobs0, xpred, ypred) - error['bearing']['value'] = sim_bearing - observed_bearing dt = pd.Timedelta(tpred - tobs) / np.timedelta64(1, 's') - # print(tpred, tobs, pd.Timedelta(tpred - tobs)) - error['time']['value'] = dt / 3600 # From seconds to hours - this_prediction['location_error'] = error - recovery_predictions.update({sim_c: this_prediction}) + p.metrics.error = Location_error.from_dict({ + 'distance': np.round(dd, 3), + 'bearing': np.round(sim_bearing - observed_bearing, 3), + 'time': pd.Timedelta(dt / 3600, 'h') # From seconds to hours + }) + Plist_updated.append(p) - self._json.update({'predictions': recovery_predictions}) + self._jsdata.predictions = Plist_updated return self - def _add_ref(self): - """Add observations data to internal data structure - - This adds more keys to self._json['predictions'] created by the fit_predict method - - """ - if self._json is None: - raise ValueError("Please call `predict` first") - - # Observed profiles that were simulated: - profiles_to_predict = [] - for cyc in self.sim_cycles: - this = {'wmo': int(self.WMO), - 'cycle_number': int(cyc), - 'url_float': argoplot.dashboard(self.WMO, url_only=True), - 'url_profile': "", - 'location': {'longitude': None, - 'latitude': None, - 'time': None} - } - if cyc in self.obs_cycles: - this['url_profile'] = get_ea_profile_page_url(self.WMO, cyc) - this_df = self.obs[self.obs['cyc'] == cyc] - this['location']['longitude'] = this_df['longitude'].iloc[0] - this['location']['latitude'] = this_df['latitude'].iloc[0] - this['location']['time'] = this_df['date'].iloc[0].isoformat() - profiles_to_predict.append(this) - - self._json.update({'observations': profiles_to_predict}) - - # Observed profile used as initial conditions to the simulation: - cyc = self.obs_cycles[0] - this_df = self.obs[self.obs['cyc'] == cyc] - self._json.update({'initial_profile': {'wmo': int(self.WMO), - 'cycle_number': int(cyc), - 'url_float': argoplot.dashboard(self.WMO, url_only=True), - 'url_profile': get_ea_profile_page_url(self.WMO, cyc), - 'location': {'longitude': this_df['longitude'].iloc[0], - 'latitude': this_df['latitude'].iloc[0], - 'time': this_df['date'].iloc[0].isoformat() - } - }}) + def add_metrics(self, VFvel=None): + """Possibly compute more metrics to interpret the prediction error - # - return self + This is for past cycles, for which we have observed positions of the predicted profiles - def add_metrics(self, VFvel=None): - """Compute more metrics to understand the prediction error + This populates the ``self._jsdata.predictions.Metrics.transit`` and ``self._jsdata.predictions.Metrics.surface_drift`` properties (``self._jsdata`` was created by the ``fit_predict`` method) 1. Compute a transit time to cover the distance error (assume a 12 kts boat speed with 1 kt = 1.852 km/h) - 1. Compute the possible drift due to the time lag between the predicted profile timing and the expected one - - This adds more keys to self._json['predictions'] created by the fit_predict method + 1. Compute surface drift due to the time lag between the predicted profile timing and the expected one """ - cyc0 = self.obs_cycles[0] - if self._json is None: + # cyc0 = self.obs_cycles[0] + if len(self._jsdata.predictions) == 0: raise ValueError("Please call `predict` first") - recovery_predictions = self._json['predictions'] - - for sim_c in recovery_predictions.keys(): - this_prediction = recovery_predictions[sim_c] - if sim_c + cyc0 in self.obs_cycles and 'location_error' in this_prediction.keys(): - - error = this_prediction['location_error'] - metrics = {} + Plist_updated = [] + for p in self._jsdata.predictions: + if p.cycle_number in self.obs_cycles and isinstance(p.metrics.error, Location_error): # Compute a transit time to cover the distance error: - metrics['transit'] = {'value': None, - 'unit': 'hour', - 'comment': 'Transit time to cover the distance error ' - '(assume a 12 kts boat speed with 1 kt = 1.852 km/h)'} - - if error['distance']['value'] is not None: - metrics['transit']['value'] = pd.Timedelta(error['distance']['value'] / (12 * 1.852), - 'h').seconds / 3600. + p.metrics.transit = Transit.from_dict({ + 'value': + pd.Timedelta(p.metrics.error.distance / (12 * 1.852), 'h').seconds / 3600. + }) # Compute the possible drift due to the time lag between the predicted profile timing and the expected one: if VFvel is not None: - xpred = this_prediction['location']['longitude'] - ypred = this_prediction['location']['latitude'] - tpred = this_prediction['location']['time'] + xpred, ypred, tpred = p.location.longitude, p.location.latitude, p.location.time dsc = VFvel.field.interp( {VFvel.dim['lon']: xpred, VFvel.dim['lat']: ypred, @@ -394,25 +364,18 @@ def add_metrics(self, VFvel=None): VFvel.field[{VFvel.dim['depth']: 0}][VFvel.dim['depth']].values[np.newaxis][0]} ) velc = np.sqrt(dsc[VFvel.var['U']] ** 2 + dsc[VFvel.var['V']] ** 2).values[np.newaxis][0] - metrics['surface_drift'] = {'value': None, - 'unit': 'km', - 'surface_currents_speed': None, - 'surface_currents_speed_unit': 'm/s', - 'comment': 'Drift by surface currents due to the float ascent time error ' - '(difference between simulated profile time and the observed one).'} - if error['time']['value'] is not None: - metrics['surface_drift']['value'] = (error['time']['value'] * 3600 * velc / 1e3) - metrics['surface_drift']['surface_currents_speed'] = velc - - # - this_prediction['metrics'] = metrics - recovery_predictions.update({sim_c: this_prediction}) - - self._json.update({"predictions": recovery_predictions}) + p.metrics.surface_drift = SurfaceDrift.from_dict({ + "surface_currents_speed": velc, # m/s by default + "value": (p.metrics.error.time * 3600 * velc / 1e3) # km by default + }) + + Plist_updated.append(p) + + self._jsdata.predictions = Plist_updated return self -class SimPredictor_3(SimPredictor_2): +class RunAnalyser_viz(RunAnalyser_diagnostics): def plot_predictions(self, VFvel, @@ -436,14 +399,14 @@ def plot_predictions(self, else: nrows, ncols = self.n_cycles, 2 if figsize is None: - figsize = (5, (self.n_cycles-1)*5) + figsize = (5, (self.n_cycles - 1) * 5) else: if self.n_cycles == 1: nrows, ncols = 1, 2 else: nrows, ncols = 2, self.n_cycles if figsize is None: - figsize = (ncols*5, 5) + figsize = (ncols * 5, 5) def plot_this(this_ax, i_cycle, ip): df_sim = self.swarm[self.swarm['cyc'] == i_cycle + 1] @@ -548,29 +511,6 @@ def plot_this(this_ax, i_cycle, ip): ix += 1 plot_this(ax[ix], i_cycle, ip) - # log.debug("Start to write metrics string") - # - # xpred = SP.prediction[i_cycle + 1]['location']['longitude']['value'] - # - # err = recovery['prediction_location_error'] - # met = recovery['prediction_metrics'] - # if this_profile.shape[0] > 1: - # # err_str = "Prediction vs Truth: [%0.2fkm, $%0.2f^o$]" % (err['distance'], err['bearing']) - # err_str = "Prediction errors: [dist=%0.2f%s, bearing=$%0.2f^o$, time=%s]\n" \ - # "Distance error represents %s of transit at 12kt" % (err['distance']['value'], - # err['distance']['unit'], - # err['bearing']['value'], - # strfdelta(pd.Timedelta(err['time']['value'], 'h'), - # "{hours}H{minutes:02d}"), - # strfdelta(pd.Timedelta(met['transit']['value'], 'h'), - # "{hours}H{minutes:02d}")) - # else: - # err_str = "" - # - # fig.suptitle("VirtualFleet recovery prediction for WMO %i: \ - # starting from cycle %i, predicting cycle %i\n%s\n%s\n%s" % - # (wmo, cyc[0], cyc[1], get_cfg_str(cfg), err_str, "Prediction based on %s" % vel_name), fontsize=15) - plt.tight_layout() if save_figure: save_figurefile(fig, 'vfrecov_predictions_%s' % sim_suffix, workdir) @@ -578,18 +518,7 @@ def plot_this(this_ax, i_cycle, ip): return fig, ax -class SimPredictor(SimPredictor_3): +class RunAnalyser(RunAnalyser_viz): def to_json(self, fp=None): - kw = {'indent': 4, 'sort_keys': True, 'default': str} - if fp is not None: - if hasattr(fp, 'write'): - json.dump(self._json, fp, **kw) - else: - with open(fp, 'w') as f: - json.dump(self._json, f, **kw) - else: - results_js = json.dumps(self._json, **kw) - return results_js - - + return self._jsdata.to_json(fp=fp) \ No newline at end of file From 2ce8944cd99c480577f005c6dfca323b23e54c08 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Mon, 25 Mar 2024 15:18:03 +0100 Subject: [PATCH 14/38] Use correct branch for schemas --- schemas/VFrecovery-schema-computation.json | 2 +- schemas/VFrecovery-schema-location.json | 2 +- schemas/VFrecovery-schema-metadata.json | 6 +++--- schemas/VFrecovery-schema-metrics.json | 2 +- schemas/VFrecovery-schema-profile.json | 6 +++--- schemas/VFrecovery-schema-simulation.json | 12 ++++++------ schemas/VFrecovery-schema-system.json | 2 +- vfrecovery/json/VFRschema.py | 13 ++++++++----- 8 files changed, 24 insertions(+), 21 deletions(-) diff --git a/schemas/VFrecovery-schema-computation.json b/schemas/VFrecovery-schema-computation.json index 1891e7a..f225fd6 100644 --- a/schemas/VFrecovery-schema-computation.json +++ b/schemas/VFrecovery-schema-computation.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-computation.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-computation.json", "title": "VirtualFleet-Recovery Simulation Computation", "description": "A set of meta-data documenting one computation run", "format_version": { diff --git a/schemas/VFrecovery-schema-location.json b/schemas/VFrecovery-schema-location.json index c4a3e2c..a09438f 100644 --- a/schemas/VFrecovery-schema-location.json +++ b/schemas/VFrecovery-schema-location.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-location.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-location.json", "title": "VirtualFleet-Recovery location", "description": "A set of longitude/latitude/time coordinates on Earth", "format_version": { diff --git a/schemas/VFrecovery-schema-metadata.json b/schemas/VFrecovery-schema-metadata.json index d40cc3d..dc365a6 100644 --- a/schemas/VFrecovery-schema-metadata.json +++ b/schemas/VFrecovery-schema-metadata.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-metadata.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-metadata.json", "title": "VirtualFleet-Recovery Simulation Meta-data", "description": "A set of meta-data documenting one simulation", "format_version": { @@ -23,10 +23,10 @@ "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet/json-schemas-FloatConfiguration/schemas/VF-ArgoFloat-Configuration.json" }, "computation": { - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-computation.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-computation.json" }, "system": { - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-system.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-system.json" } }, "maxProperties": 5 diff --git a/schemas/VFrecovery-schema-metrics.json b/schemas/VFrecovery-schema-metrics.json index c7a4e1a..15ba42c 100644 --- a/schemas/VFrecovery-schema-metrics.json +++ b/schemas/VFrecovery-schema-metrics.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-metrics.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-metrics.json", "title": "VirtualFleet-Recovery Predicted profile metrics", "description": "A set of metrics to describe/interpret one VFrecovery predicted profile location", "format_version": { diff --git a/schemas/VFrecovery-schema-profile.json b/schemas/VFrecovery-schema-profile.json index 7afa9d9..ee55d75 100644 --- a/schemas/VFrecovery-schema-profile.json +++ b/schemas/VFrecovery-schema-profile.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-profile.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-profile.json", "title": "VirtualFleet-Recovery Argo float profile location", "description": "A set of meta-data and longitude/latitude/time coordinates on Earth, for an Argo float vertical profile location", "format_version": { @@ -11,7 +11,7 @@ "properties": { "location": { "description": "Space/time coordinates of the profile", - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-location.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-location.json" }, "cycle_number":{ "description": "Cycle number of the profile", @@ -36,7 +36,7 @@ "minimum": 0 }, "metrics": { - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-metrics.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-metrics.json" }, "dependencies": { "virtual_cycle_number": ["metrics"]} diff --git a/schemas/VFrecovery-schema-simulation.json b/schemas/VFrecovery-schema-simulation.json index 3f38a2b..7dc0be3 100644 --- a/schemas/VFrecovery-schema-simulation.json +++ b/schemas/VFrecovery-schema-simulation.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-simulation.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-simulation.json", "title": "VirtualFleet-Recovery Simulation", "description": "This document records details of one VirtualFleet-Recovery simulation and Argo float profile predictions", "format_version": { @@ -16,17 +16,17 @@ "properties": { "initial_profile": { "description": "Argo float profile used as initial conditions to the simulation", - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-profile.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-profile.json" }, "meta_data": { "description": "Meta-data of the simulation", - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-metadata.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-metadata.json" }, "observations": { "description": "Data from observed Argo float profiles relevant to the simulation predictions", "type": "array", "items": { - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-profile.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-profile.json" }, "minItems": 1, "uniqueItems": true @@ -35,11 +35,11 @@ "description": "Data from the simulated virtual float profiles", "type": "array", "items": { - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-profile.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-profile.json" }, "minItems": 1, "uniqueItems": true } }, - "maxProperties": 4 + "maxProperties": 5 } \ No newline at end of file diff --git a/schemas/VFrecovery-schema-system.json b/schemas/VFrecovery-schema-system.json index d266254..9e6505e 100644 --- a/schemas/VFrecovery-schema-system.json +++ b/schemas/VFrecovery-schema-system.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-system.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-system.json", "title": "VirtualFleet-Recovery Simulation System", "description": "A set of meta-data documenting a system where a simulation is executed", "format_version": { diff --git a/vfrecovery/json/VFRschema.py b/vfrecovery/json/VFRschema.py index dedccd2..0a4b0ca 100644 --- a/vfrecovery/json/VFRschema.py +++ b/vfrecovery/json/VFRschema.py @@ -1,6 +1,6 @@ """ -Re-usable base class +Re-usable base class to handle JSON schema compliance """ @@ -8,9 +8,9 @@ import numpy as np import pandas as pd import ipaddress -from typing import List, Dict, Union, TextIO +from typing import List, Union, TextIO import jsonschema -from jsonschema import Draft202012Validator +# from jsonschema import Draft202012Validator from referencing import Registry, Resource from pathlib import Path import logging @@ -19,8 +19,9 @@ class VFschema: - """A base class to export json files following a schema""" - schema_root: str = "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas" + """A base class to export json files complying to a public schema""" + # schema_root: str = "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/main/schemas" + schema_root: str = "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas" def __init__(self, **kwargs): for key in self.required: @@ -67,6 +68,8 @@ def default(self, obj): return obj.isoformat() if isinstance(obj, pd.Timedelta): return obj.isoformat() + if isinstance(obj, np.float32): + return float(obj) if getattr(type(obj), '__name__') in ['Location', 'Profile', 'Metrics', 'TrajectoryLengths', 'PairwiseDistances', 'PairwiseDistancesState', 'SurfaceDrift', 'Transit', 'Location_error', From 4e997f228c0b0703b55dd0c94f1016bf4c321824 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Mon, 25 Mar 2024 15:18:30 +0100 Subject: [PATCH 15/38] Rename stuff and support to the end of workflow --- .../command_line_interface/group_predict.py | 4 +- vfrecovery/core/__init__.py | 4 +- vfrecovery/core/predict.py | 128 ++-- vfrecovery/core/run_handler.py | 69 +- vfrecovery/core/simulation_handler_legacy.py | 595 ++++++++++++++++++ vfrecovery/core/trajfile_handler.py | 92 ++- 6 files changed, 727 insertions(+), 165 deletions(-) create mode 100644 vfrecovery/core/simulation_handler_legacy.py diff --git a/vfrecovery/command_line_interface/group_predict.py b/vfrecovery/command_line_interface/group_predict.py index 94402da..e4ba56b 100644 --- a/vfrecovery/command_line_interface/group_predict.py +++ b/vfrecovery/command_line_interface/group_predict.py @@ -16,7 +16,7 @@ def cli_group_predict() -> None: "predict", short_help="Execute VirtualFleet-Recovery predictions", help=""" - Execute VirtualFleet-Recovery predictor and return results as a JSON string + Execute VirtualFleet-Recovery predictor """, epilog=""" Examples: @@ -151,4 +151,4 @@ def predict( n_floats=n_floats, domain_min_size=domain_min_size, log_level=log_level) - blank_logger.info(json_dump) + # blank_logger.info(json_dump) diff --git a/vfrecovery/core/__init__.py b/vfrecovery/core/__init__.py index 2fbc35d..40b5947 100644 --- a/vfrecovery/core/__init__.py +++ b/vfrecovery/core/__init__.py @@ -1,4 +1,4 @@ # from deployment_plan import setup_deployment_plan -# from trajfile_handler import Trajectories -# from simulation_handler import SimPredictor +from .trajfile_handler import Trajectories +from .run_handler import Simulation # from predict import predict_function diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index 6f1483e..1ba18c1 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -18,7 +18,7 @@ from .utils import df_obs2jsProfile, ArgoIndex2df_obs, ArgoIndex2jsProfile, get_simulation_suffix, get_domain from .deployment_plan import setup_deployment_plan from .trajfile_handler import Trajectories -from .simulation_handler import RunAnalyser +from .run_handler import RunAnalyser root_logger = logging.getLogger("vfrecovery_root_logger") sim_logger = logging.getLogger("vfrecovery_simulation") @@ -106,9 +106,9 @@ def __init__(self, wmo, cyc, **kwargs): self.wmo = wmo self.cyc = cyc self.output_path = kwargs['output_path'] - log_this.info("=" * 55) - log_this.info("STARTING NEW SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (wmo, cyc[1])) - log_this.info("=" * 55) + log_this.info("=" * 50) + log_this.info("STARTING SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (wmo, cyc[1])) + log_this.info("=" * 50) # log_this.info("n_predictions: %i" % n_predictions) log_this.info("Working with cycle numbers list: %s" % str(cyc)) @@ -127,22 +127,19 @@ def __init__(self, wmo, cyc, **kwargs): 'computation': None, # will be filled later }) - def setup_load_observed_profiles(self): + def _setup_load_observed_profiles(self): """Load observed float profiles index""" log_this.info("Loading float profiles index") - # df_obs = ArgoIndex2df_obs(wmo, cyc) - # P_obs = df_obs2jsProfile(df_obs) self.P_obs, self.df_obs = ArgoIndex2jsProfile(self.wmo, self.cyc) - # THIS_DATE = P_obs[0].location.time - [log_this.debug("Observed profiles list: %s" % pp_obj(p)) for p in self.P_obs] + if len(self.P_obs) == 1: - log_this.info('Real-case scenario: True position unknown !') + log_this.info('Real-time scenario: True position unknown !') else: log_this.info('Evaluation scenario: Historical position known') - def setup_float_config(self, **kwargs): + def _setup_float_config(self, **kwargs): """Load and setup float configuration""" # Load real float configuration at the previous cycle, to be used for the simulation as initial conditions. @@ -159,7 +156,7 @@ def setup_float_config(self, **kwargs): Path(os.path.join(self.output_path, "floats_configuration_%s.json" % get_simulation_suffix(self.MD)))) log_this.debug(pp_obj(self.CFG)) - def setup_load_velocity_data(self, **kwargs): + def _setup_load_velocity_data(self, **kwargs): # Define domain to load velocity for: # In space: domain, domain_center = get_domain(self.P_obs, kwargs['domain_min_size']) @@ -185,12 +182,13 @@ def setup_load_velocity_data(self, **kwargs): def setup(self, **kwargs): """Fulfill all requirements for the simulation""" - self.setup_load_observed_profiles() - self.setup_float_config(**kwargs) - self.setup_load_velocity_data(**kwargs) + self._setup_load_observed_profiles() + self._setup_float_config(**kwargs) + self._setup_load_velocity_data(**kwargs) log_this.info("Simulation data will be registered with file suffix: '%s'" % get_simulation_suffix(self.MD)) + return self - def execute_get_plan(self): + def _execute_get_plan(self): # VirtualFleet, get a deployment plan: log_this.info("Deployment plan setup") df_plan = setup_deployment_plan(self.P_obs[0], nfloats=self.MD.n_floats) @@ -201,16 +199,16 @@ def execute_get_plan(self): 'time': np.array([np.datetime64(t) for t in df_plan['date'].dt.strftime('%Y-%m-%d %H:%M').array]), } - def execute_get_velocity(self): + def _execute_get_velocity(self): self.VEL = Velocity(model='GLORYS12V1' if self.MD.velocity_field == 'GLORYS' else self.MD.velocity_field, src=self.ds_vel) # figure_velocity(VBOX, VEL, VEL_NAME, THIS_PROFILE, WMO, CYC, save_figure=args.save_figure, workdir=WORKDIR) def execute(self): - """Setup a VirtualFleet and execute simulation""" + """Execute a VirtualFleet simulation""" - self.execute_get_velocity() - self.execute_get_plan() + self._execute_get_velocity() + self._execute_get_plan() # Set up VirtualFleet: log_this.info("VirtualFleet instance setup") @@ -239,8 +237,9 @@ def execute(self): verbose_progress=True, ) log_this.info("Simulation ended with success") + return self - def predict_read_trajectories(self): + def _predict_read_trajectories(self): # Get simulated profiles index: log_this.info("Extracting swarm profiles index") @@ -252,13 +251,10 @@ def predict_read_trajectories(self): # jsdata, fig, ax = self.traj.analyse_pairwise_distances(cycle=1, show_plot=True) - # if not args.json: - # puts(str(T), color=COLORS.magenta) - # puts(DF_SIM.head().to_string(), color=COLORS.green) # figure_positions(args, VEL, DF_SIM, DF_PLAN, THIS_PROFILE, CFG, WMO, CYC, VEL_NAME, # dd=1, save_figure=args.save_figure, workdir=WORKDIR) - def predict_positions(self): + def _predict_positions(self): """Make predictions based on simulated profile density""" self.run = RunAnalyser(self.traj.to_index(), self.df_obs) log_this.info("Predicting float cycle position(s) from swarm simulation") @@ -275,30 +271,47 @@ def predict_positions(self): def predict(self): """Make float profile predictions based on the swarm simulation""" - self.predict_read_trajectories() - self.predict_positions() + self._predict_read_trajectories() + self._predict_positions() + return self - def postprocess_metrics(self): + def _postprocess_metrics(self): + log_this.info("Computing prediction metrics for past cycles with observed ground truth (possibly)") self.run.add_metrics(self.VEL) - def postprocess_swarm_metrics(self): - # Recovery, compute more swarm metrics: - for this_cyc in self.traj.sim_cycles: - jsmetrics = self.traj.analyse_pairwise_distances(cycle=this_cyc, show_plot=False) - # if 'metrics' in results['predictions'][this_cyc]: - # for key in jsmetrics.keys(): - # results['predictions'][this_cyc]['metrics'].update({key: jsmetrics[key]}) - # else: - # results['predictions'][this_cyc].update({'metrics': jsmetrics}) - log_this.info(pp_obj(jsmetrics)) - return jsmetrics + def _postprocess_swarm_metrics(self): + log_this.info("Computing swarm metrics") + Plist_updated = [] + for p in self.run.jsobj.predictions: + this_cyc = p.virtual_cycle_number + swarm_metrics = self.traj.analyse_pairwise_distances(virtual_cycle_number=this_cyc, show_plot=False) + p.metrics.trajectory_lengths = swarm_metrics.trajectory_lengths + p.metrics.pairwise_distances = swarm_metrics.pairwise_distances + Plist_updated.append(p) + self.run.jsobj.predictions = Plist_updated def postprocess(self): - self.postprocess_metrics() - # self.postprocess_swarm_metrics() + self._postprocess_metrics() + self._postprocess_swarm_metrics() + return self + + def finish(self, execution_start: float, process_start: float): + """Click timers and save results to finish""" + self.MD.computation = MetaDataComputation.from_dict({ + 'date': pd.to_datetime('now', utc=True), + 'wall_time': pd.Timedelta(time.time() - execution_start, 's'), + 'cpu_time': pd.Timedelta(time.process_time() - process_start, 's'), + }) + + self.run_file = os.path.join(self.output_path, 'prediction_%s.json' % get_simulation_suffix(self.MD)) + self.to_json(fp=self.run_file) + log_this.info("Simulation results and analysis saved in: %s" % self.run_file) + + log_this.info("VirtualFleet-Recovery simulation finished") + return self def to_json(self, fp=None): - y = self.run._jsdata # Simulation instance + y = self.run.jsobj # :class:`Simulation` instance y.meta_data = self.MD return y.to_json(fp=fp) @@ -406,13 +419,9 @@ def predict_function( S.execute() S.predict() S.postprocess() + S.finish(execution_start, process_start) # - S.MD.computation = MetaDataComputation.from_dict({ - 'date': pd.to_datetime('now', utc=True), - 'wall_time': pd.Timedelta(time.time() - execution_start, 's'), - 'cpu_time': pd.Timedelta(time.process_time() - process_start, 's'), - }) # return S.MD.computation.to_json() # return MD.to_json() return S.to_json() @@ -430,31 +439,6 @@ def predict_function( # mplbackend = matplotlib.get_backend() # matplotlib.use('Agg') -# # VirtualFleet, get simulated profiles index: -# if not args.json: -# puts("\nExtract swarm profiles index...") -# - - -# # Recovery, finalize JSON output: -# execution_end = time.time() -# process_end = time.process_time() -# computation = { -# 'Date': pd.to_datetime('now', utc=True), -# 'Wall-time': pd.Timedelta(execution_end - execution_start, 's'), -# 'CPU-time': pd.Timedelta(process_end - process_start, 's'), -# 'system': getSystemInfo() -# } -# results['meta'] = {'Velocity field': VEL_NAME, -# 'Nfloats': args.nfloats, -# 'Computation': computation, -# 'VFloats_config': CFG.to_json(), -# } -# -# if not args.json: -# puts("\nPredictions:") -# results_js = json.dumps(results, indent=4, sort_keys=True, default=str) -# # with open(os.path.join(WORKDIR, 'prediction_%s.json' % get_sim_suffix(args, CFG)), 'w', encoding='utf-8') as f: # json.dump(results, f, ensure_ascii=False, indent=4, default=str, sort_keys=True) # diff --git a/vfrecovery/core/run_handler.py b/vfrecovery/core/run_handler.py index d6b4e43..60deb99 100644 --- a/vfrecovery/core/run_handler.py +++ b/vfrecovery/core/run_handler.py @@ -19,7 +19,7 @@ pp_obj = lambda x: "\n%s" % "\n".join(["\t%s" % line for line in x.__repr__().split("\n")]) -class RunAnalyser_core: +class RunAnalyserCore: """ Examples @@ -42,7 +42,7 @@ def __init__(self, df_sim: pd.DataFrame, df_obs: pd.DataFrame): self.obs = df_obs # self.set_weights() self.WMO = np.unique(df_obs['wmo'])[0] - self._jsdata = [] + self.jsobj = [] def __repr__(self): summary = [""] @@ -87,16 +87,15 @@ def trajectory(self): Return ------ :class:`np.array` - """ - if len(self._jsdata.predictions) == 0: + if len(self.jsobj.predictions) == 0: raise ValueError("Please call `fit_predict` first") traj_prediction = np.array([self.obs['longitude'].values[0], self.obs['latitude'].values[0], self.obs['date'].values[0]])[ np.newaxis] # Starting point where swarm was deployed - for p in self._jsdata.predictions: + for p in self.jsobj.predictions: xpred, ypred, tpred = p.location.longitude, p.location.latitude, p.location.time traj_prediction = np.concatenate((traj_prediction, np.array([xpred, ypred, tpred])[np.newaxis]), @@ -105,7 +104,7 @@ def trajectory(self): def bbox(self, s: float = 1) -> list: - """Get a bounding box for maps + """Get a simulation bounding box Parameters ---------- @@ -141,7 +140,7 @@ def bbox(self, s: float = 1) -> list: return ebox -class RunAnalyser_predictor(RunAnalyser_core): +class RunAnalyserPredictor(RunAnalyserCore): def set_weights(self, scale: float = 20): """Compute weights for predictions @@ -223,7 +222,7 @@ def fit_predict(self, weights_scale: float = 20.) -> List[Profile]: obs_cyc = self.obs_cycles[0] this_df = self.obs[self.obs['cyc'] == obs_cyc] - self._jsdata = Simulation.from_dict({ + self.jsobj = Simulation.from_dict({ "initial_profile": Profile.from_dict({ 'location': Location.from_dict({ 'longitude': this_df['longitude'].iloc[0], @@ -243,24 +242,23 @@ def fit_predict(self, weights_scale: float = 20.) -> List[Profile]: }) # Add more stuff to internal storage: - self._add_ref() # Fill: self._jsdata.observations - self._predict_errors() # Fill: self._jsdata.predictions.Metrics.error - # self.add_metrics() + self._add_ref() # Fill: self.jsobj.observations + self._predict_errors() # Fill: self.jsobj.predictions.Metrics.error and self.jsobj.predictions.Metrics.transit # return self -class RunAnalyser_diagnostics(RunAnalyser_predictor): +class RunAnalyserDiagnostics(RunAnalyserPredictor): def _add_ref(self): """Possibly add observations data to internal data structure This is for past cycles, for which we have observed positions of the predicted profiles - This populates the ``self._jsdata.observations`` property (``self._jsdata`` was created by the ``fit_predict`` method) + This populates the ``self.jsobj.observations`` property (``self.jsobj`` was created by the ``fit_predict`` method) """ - if len(self._jsdata.predictions) == 0: + if len(self.jsobj.predictions) == 0: raise ValueError("Please call `fit_predict` first") # Observed profiles that were simulated: @@ -279,7 +277,7 @@ def _add_ref(self): }) Plist.append(p) - self._jsdata.observations = Plist + self.jsobj.observations = Plist return self @@ -288,14 +286,17 @@ def _predict_errors(self): This is for past cycles, for which we have observed positions of the predicted profiles - This populates the ``self._jsdata.predictions.Metrics.error`` property (``self._jsdata`` was created by the ``fit_predict`` method) + This populates the ``self.jsobj.predictions.Metrics.error`` and ``self.jsobj.predictions.Metrics.transit`` properties (``self.jsobj`` was created by the ``fit_predict`` method) + + A transit time to cover the distance error is also calculated + (assume a 12 kts boat speed with 1 kt = 1.852 km/h) """ - if len(self._jsdata.predictions) == 0: + if len(self.jsobj.predictions) == 0: raise ValueError("Please call `fit_predict` first") Plist_updated = [] - for p in self._jsdata.predictions: + for p in self.jsobj.predictions: if p.cycle_number in self.obs_cycles: this_obs_profile = self.obs[self.obs['cyc'] == p.cycle_number] xobs = this_obs_profile['longitude'].iloc[0] @@ -322,9 +323,16 @@ def _predict_errors(self): 'bearing': np.round(sim_bearing - observed_bearing, 3), 'time': pd.Timedelta(dt / 3600, 'h') # From seconds to hours }) + + # also compute a transit time to cover the distance error: + p.metrics.transit = Transit.from_dict({ + 'value': + pd.Timedelta(p.metrics.error.distance / (12 * 1.852), 'h').seconds / 3600. + }) + Plist_updated.append(p) - self._jsdata.predictions = Plist_updated + self.jsobj.predictions = Plist_updated return self def add_metrics(self, VFvel=None): @@ -332,27 +340,18 @@ def add_metrics(self, VFvel=None): This is for past cycles, for which we have observed positions of the predicted profiles - This populates the ``self._jsdata.predictions.Metrics.transit`` and ``self._jsdata.predictions.Metrics.surface_drift`` properties (``self._jsdata`` was created by the ``fit_predict`` method) - - 1. Compute a transit time to cover the distance error - (assume a 12 kts boat speed with 1 kt = 1.852 km/h) + This populates the ``self.jsobj.predictions.Metrics.surface_drift`` property (``self.jsobj`` was created by the ``fit_predict`` method) 1. Compute surface drift due to the time lag between the predicted profile timing and the expected one """ # cyc0 = self.obs_cycles[0] - if len(self._jsdata.predictions) == 0: + if len(self.jsobj.predictions) == 0: raise ValueError("Please call `predict` first") Plist_updated = [] - for p in self._jsdata.predictions: + for p in self.jsobj.predictions: if p.cycle_number in self.obs_cycles and isinstance(p.metrics.error, Location_error): - # Compute a transit time to cover the distance error: - p.metrics.transit = Transit.from_dict({ - 'value': - pd.Timedelta(p.metrics.error.distance / (12 * 1.852), 'h').seconds / 3600. - }) - # Compute the possible drift due to the time lag between the predicted profile timing and the expected one: if VFvel is not None: xpred, ypred, tpred = p.location.longitude, p.location.latitude, p.location.time @@ -371,11 +370,11 @@ def add_metrics(self, VFvel=None): Plist_updated.append(p) - self._jsdata.predictions = Plist_updated + self.jsobj.predictions = Plist_updated return self -class RunAnalyser_viz(RunAnalyser_diagnostics): +class RunAnalyserView(RunAnalyserDiagnostics): def plot_predictions(self, VFvel, @@ -518,7 +517,7 @@ def plot_this(this_ax, i_cycle, ip): return fig, ax -class RunAnalyser(RunAnalyser_viz): +class RunAnalyser(RunAnalyserView): def to_json(self, fp=None): - return self._jsdata.to_json(fp=fp) \ No newline at end of file + return self.jsobj.to_json(fp=fp) \ No newline at end of file diff --git a/vfrecovery/core/simulation_handler_legacy.py b/vfrecovery/core/simulation_handler_legacy.py new file mode 100644 index 0000000..2b58e95 --- /dev/null +++ b/vfrecovery/core/simulation_handler_legacy.py @@ -0,0 +1,595 @@ +import xarray as xr +import pandas as pd +import numpy as np +import json +import matplotlib +from sklearn.neighbors import KernelDensity +from scipy.signal import find_peaks +from sklearn.metrics import pairwise_distances +import matplotlib.pyplot as plt +import argopy.plot as argoplot +import cartopy.crs as ccrs + +from vfrecovery.utils.misc import get_cfg_str, get_ea_profile_page_url +from vfrecovery.plots.utils import save_figurefile, map_add_features +from vfrecovery.utils.geo import haversine, bearing + + +class SimPredictor_0: + """ + + Examples + -------- + T = Trajectories(traj_zarr_file) + df = T.get_index().add_distances() + + SP = SimPredictor(df) + SP.fit_predict() + SP.add_metrics(VFvelocity) + SP.bbox() + SP.plot_predictions(VFvelocity) + SP.plan + SP.n_cycles + SP.trajectory + SP.prediction + """ + + def __init__(self, df_sim: pd.DataFrame, df_obs: pd.DataFrame): + self.swarm = df_sim + self.obs = df_obs + # self.set_weights() + self.WMO = np.unique(df_obs['wmo'])[0] + self._json = None + + def __repr__(self): + summary = [""] + summary.append("Simulation target: %i / %i" % (self.WMO, self.sim_cycles[0])) + summary.append("Swarm size: %i floats" % len(np.unique(self.swarm['wmo']))) + summary.append("Number of simulated cycles: %i profile(s) for cycle number(s): [%s]" % ( + self.n_cycles, ",".join([str(c) for c in self.sim_cycles]))) + summary.append("Observed reference: %i profile(s) for cycle number(s): [%s]" % ( + self.obs.shape[0], ",".join([str(c) for c in self.obs_cycles]))) + return "\n".join(summary) + + @property + def n_cycles(self): + """Number of simulated cycles""" + return len(np.unique(self.swarm['cyc'])) + # return len(self.sim_cycles) + + @property + def obs_cycles(self): + """Observed cycle numbers""" + return np.unique(self.obs['cyc']) + + @property + def sim_cycles(self): + """Simulated cycle numbers""" + return self.obs_cycles[0] + 1 + range(self.n_cycles) + + @property + def plan(self) -> pd.DataFrame: + if not hasattr(self, '_plan'): + df_plan = self.swarm[self.swarm['cyc'] == 1][['date', 'deploy_lon', 'deploy_lat']] + df_plan = df_plan.rename(columns={'deploy_lon': 'longitude', 'deploy_lat': 'latitude'}) + self._plan = df_plan + return self._plan + + @property + def trajectory(self): + """Return the predicted trajectory as a simple :class:`np.array` + + First row is longitude, 2nd is latitude and 3rd is date of simulated profiles + + Return + ------ + :class:`np.array` + + """ + if self._json is None: + raise ValueError("Please call `fit_predict` first") + + traj_prediction = np.array([self.obs['longitude'].values[0], + self.obs['latitude'].values[0], + self.obs['date'].values[0]])[ + np.newaxis] # Starting point where swarm was deployed + for cyc in self._json['predictions'].keys(): + xpred = self._json['predictions'][cyc]['location']['longitude'] + ypred = self._json['predictions'][cyc]['location']['latitude'] + tpred = pd.to_datetime(self._json['predictions'][cyc]['location']['time']) + traj_prediction = np.concatenate((traj_prediction, + np.array([xpred, ypred, tpred])[np.newaxis]), + axis=0) + return traj_prediction + + @property + def predictions(self): + if self._json is None: + raise ValueError("Please call `fit_predict` first") + return self._json + + def bbox(self, s: float = 1) -> list: + """Get a bounding box for maps + + Parameters + ---------- + s: float, default:1 + + Returns + ------- + list + """ + df_sim = self.swarm + df_obs = self.obs + + box = [np.min([df_sim['deploy_lon'].min(), + df_sim['longitude'].min(), + df_sim['rel_lon'].min(), + df_obs['longitude'].min()]), + np.max([df_sim['deploy_lon'].max(), + df_sim['longitude'].max(), + df_sim['rel_lon'].max(), + df_obs['longitude'].max()]), + np.min([df_sim['deploy_lat'].min(), + df_sim['latitude'].min(), + df_sim['rel_lat'].min(), + df_obs['latitude'].min()]), + np.max([df_sim['deploy_lat'].max(), + df_sim['latitude'].max(), + df_sim['rel_lat'].max(), + df_obs['latitude'].max()])] + rx, ry = box[1] - box[0], box[3] - box[2] + r = np.min([rx, ry]) + ebox = [box[0] - s * r, box[1] + s * r, box[2] - s * r, box[3] + s * r] + + return ebox + + +class SimPredictor_1(SimPredictor_0): + + def set_weights(self, scale: float = 20): + """Compute weights for predictions + + Add weights column to swarm :class:`pandas.DataFrame` as a gaussian distance + with a std based on the size of the deployment domain + + Parameters + ---------- + scale: float (default=20.) + """ + rx, ry = self.plan['longitude'].max() - self.plan['longitude'].min(), \ + self.plan['latitude'].max() - self.plan['latitude'].min() + r = np.min([rx, ry]) # Minimal size of the deployment domain + weights = np.exp(-(self.swarm['distance_origin'] ** 2) / (r / scale)) + weights[np.isnan(weights)] = 0 + self.swarm['weights'] = weights + return self + + def fit_predict(self, weights_scale: float = 20.) -> dict: + """Predict profile positions from simulated float swarm + + Prediction is based on a :class:`klearn.neighbors._kde.KernelDensity` estimate of the N_FLOATS + simulated, weighted by their deployment distance to the observed previous cycle position. + + Parameters + ---------- + weights_scale: float (default=20) + Scale (in deg) to use to weight the deployment distance to the observed previous cycle position + + Returns + ------- + dict + """ + + def blank_prediction() -> dict: + return {'location': { + 'longitude': None, + 'latitude': None, + 'time': None}, + 'cycle_number': None, + 'wmo': int(self.WMO), + } + + # Compute weights of the swarm float profiles locations + self.set_weights(scale=weights_scale) + + self._prediction_data = {'weights_scale': weights_scale, 'cyc': {}} + + cycles = np.unique(self.swarm['cyc']).astype(int) # 1, 2, ... + recovery_predictions = {} + for icyc, this_sim_cyc in enumerate(cycles): + this_cyc_df = self.swarm[self.swarm['cyc'] == this_sim_cyc] + weights = this_cyc_df['weights'] + x, y = this_cyc_df['rel_lon'], this_cyc_df['rel_lat'] + + w = weights / np.max(np.abs(weights), axis=0) + X = np.array([x, y]).T + kde = KernelDensity(kernel='gaussian', bandwidth=0.15).fit(X, sample_weight=w) + + xg, yg = (np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 100), + np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), 100)) + xg, yg = np.meshgrid(xg, yg) + Xg = np.array([xg.flatten(), yg.flatten(), ]).T + llh = kde.score_samples(Xg) + xpred = Xg[np.argmax(llh), 0] + ypred = Xg[np.argmax(llh), 1] + tpred = this_cyc_df['date'].mean() + + # Store results + recovery = blank_prediction() + recovery['location']['longitude'] = xpred + recovery['location']['latitude'] = ypred + recovery['location']['time'] = tpred.isoformat() + recovery['cycle_number'] = int(self.sim_cycles[icyc]) + recovery['virtual_cycle_number'] = int(self.sim_cycles[icyc]) + recovery_predictions.update({int(this_sim_cyc): recovery}) + + # + self._prediction_data['cyc'].update({this_sim_cyc: {'weights': this_cyc_df['weights']}}) + + # Store results internally + self._json = {'predictions': recovery_predictions} + + # Add more stuff to internal storage: + self._predict_errors() + self._add_ref() + self.add_metrics() + + # + return self + + +class SimPredictor_2(SimPredictor_1): + + def _predict_errors(self) -> dict: + """Compute error metrics for the predicted positions + + This is for past cycles, for which we have observed positions of the predicted profiles + + This adds more keys to self._json['predictions'] created by the fit_predict method + + Returns + ------- + dict + """ + + def blank_error(): + return {'distance': {'value': None, + 'unit': 'km'}, + 'bearing': {'value': None, + 'unit': 'degree'}, + 'time': {'value': None, + 'unit': 'hour'} + } + + cyc0 = self.obs_cycles[0] + if self._json is None: + raise ValueError("Please call `fit_predict` first") + recovery_predictions = self._json['predictions'] + + for sim_c in recovery_predictions.keys(): + this_prediction = recovery_predictions[sim_c] + if sim_c + cyc0 in self.obs_cycles: + error = blank_error() + + this_obs_profile = self.obs[self.obs['cyc'] == sim_c + cyc0] + xobs = this_obs_profile['longitude'].iloc[0] + yobs = this_obs_profile['latitude'].iloc[0] + tobs = this_obs_profile['date'].iloc[0] + + prev_obs_profile = self.obs[self.obs['cyc'] == sim_c + cyc0 - 1] + xobs0 = prev_obs_profile['longitude'].iloc[0] + yobs0 = prev_obs_profile['latitude'].iloc[0] + + xpred = this_prediction['location']['longitude'] + ypred = this_prediction['location']['latitude'] + tpred = pd.to_datetime(this_prediction['location']['time']) + + dd = haversine(xobs, yobs, xpred, ypred) + error['distance']['value'] = dd + + observed_bearing = bearing(xobs0, yobs0, xobs, yobs) + sim_bearing = bearing(xobs0, yobs0, xpred, ypred) + error['bearing']['value'] = sim_bearing - observed_bearing + + dt = pd.Timedelta(tpred - tobs) / np.timedelta64(1, 's') + # print(tpred, tobs, pd.Timedelta(tpred - tobs)) + error['time']['value'] = dt / 3600 # From seconds to hours + + this_prediction['location_error'] = error + recovery_predictions.update({sim_c: this_prediction}) + + self._json.update({'predictions': recovery_predictions}) + return self + + def _add_ref(self): + """Add observations data to internal data structure + + This adds more keys to self._json['predictions'] created by the fit_predict method + + """ + if self._json is None: + raise ValueError("Please call `predict` first") + + # Observed profiles that were simulated: + profiles_to_predict = [] + for cyc in self.sim_cycles: + this = {'wmo': int(self.WMO), + 'cycle_number': int(cyc), + 'url_float': argoplot.dashboard(self.WMO, url_only=True), + 'url_profile': "", + 'location': {'longitude': None, + 'latitude': None, + 'time': None} + } + if cyc in self.obs_cycles: + this['url_profile'] = get_ea_profile_page_url(self.WMO, cyc) + this_df = self.obs[self.obs['cyc'] == cyc] + this['location']['longitude'] = this_df['longitude'].iloc[0] + this['location']['latitude'] = this_df['latitude'].iloc[0] + this['location']['time'] = this_df['date'].iloc[0].isoformat() + profiles_to_predict.append(this) + + self._json.update({'observations': profiles_to_predict}) + + # Observed profile used as initial conditions to the simulation: + cyc = self.obs_cycles[0] + this_df = self.obs[self.obs['cyc'] == cyc] + self._json.update({'initial_profile': {'wmo': int(self.WMO), + 'cycle_number': int(cyc), + 'url_float': argoplot.dashboard(self.WMO, url_only=True), + 'url_profile': get_ea_profile_page_url(self.WMO, cyc), + 'location': {'longitude': this_df['longitude'].iloc[0], + 'latitude': this_df['latitude'].iloc[0], + 'time': this_df['date'].iloc[0].isoformat() + } + }}) + + # + return self + + def add_metrics(self, VFvel=None): + """Compute more metrics to understand the prediction error + + 1. Compute a transit time to cover the distance error + (assume a 12 kts boat speed with 1 kt = 1.852 km/h) + + 1. Compute the possible drift due to the time lag between the predicted profile timing and the expected one + + This adds more keys to self._json['predictions'] created by the fit_predict method + + """ + cyc0 = self.obs_cycles[0] + if self._json is None: + raise ValueError("Please call `predict` first") + recovery_predictions = self._json['predictions'] + + for sim_c in recovery_predictions.keys(): + this_prediction = recovery_predictions[sim_c] + if sim_c + cyc0 in self.obs_cycles and 'location_error' in this_prediction.keys(): + + error = this_prediction['location_error'] + metrics = {} + + # Compute a transit time to cover the distance error: + metrics['transit'] = {'value': None, + 'unit': 'hour', + 'comment': 'Transit time to cover the distance error ' + '(assume a 12 kts boat speed with 1 kt = 1.852 km/h)'} + + if error['distance']['value'] is not None: + metrics['transit']['value'] = pd.Timedelta(error['distance']['value'] / (12 * 1.852), + 'h').seconds / 3600. + + # Compute the possible drift due to the time lag between the predicted profile timing and the expected one: + if VFvel is not None: + xpred = this_prediction['location']['longitude'] + ypred = this_prediction['location']['latitude'] + tpred = this_prediction['location']['time'] + dsc = VFvel.field.interp( + {VFvel.dim['lon']: xpred, + VFvel.dim['lat']: ypred, + VFvel.dim['time']: tpred, + VFvel.dim['depth']: + VFvel.field[{VFvel.dim['depth']: 0}][VFvel.dim['depth']].values[np.newaxis][0]} + ) + velc = np.sqrt(dsc[VFvel.var['U']] ** 2 + dsc[VFvel.var['V']] ** 2).values[np.newaxis][0] + metrics['surface_drift'] = {'value': None, + 'unit': 'km', + 'surface_currents_speed': None, + 'surface_currents_speed_unit': 'm/s', + 'comment': 'Drift by surface currents due to the float ascent time error ' + '(difference between simulated profile time and the observed one).'} + if error['time']['value'] is not None: + metrics['surface_drift']['value'] = (error['time']['value'] * 3600 * velc / 1e3) + metrics['surface_drift']['surface_currents_speed'] = velc + + # + this_prediction['metrics'] = metrics + recovery_predictions.update({sim_c: this_prediction}) + + self._json.update({"predictions": recovery_predictions}) + return self + + +class SimPredictor_3(SimPredictor_2): + + def plot_predictions(self, + VFvel, + cfg, + sim_suffix='', # get_sim_suffix(this_args, cfg) + s=0.2, + alpha=False, + save_figure=False, + workdir='.', + figsize=None, + dpi=120, + orient='portrait'): + ebox = self.bbox(s=s) + pred_traj = self.trajectory + + if orient == 'portrait': + if self.n_cycles == 1: + nrows, ncols = 2, 1 + if figsize is None: + figsize = (5, 5) + else: + nrows, ncols = self.n_cycles, 2 + if figsize is None: + figsize = (5, (self.n_cycles-1)*5) + else: + if self.n_cycles == 1: + nrows, ncols = 1, 2 + else: + nrows, ncols = 2, self.n_cycles + if figsize is None: + figsize = (ncols*5, 5) + + def plot_this(this_ax, i_cycle, ip): + df_sim = self.swarm[self.swarm['cyc'] == i_cycle + 1] + weights = self._prediction_data['cyc'][i_cycle + 1]['weights'].values + if self.sim_cycles[i_cycle] in self.obs_cycles: + this_profile = self.obs[self.obs['cyc'] == self.sim_cycles[i_cycle]] + else: + this_profile = None + + xpred = self.predictions['predictions'][i_cycle + 1]['location']['longitude'] + ypred = self.predictions['predictions'][i_cycle + 1]['location']['latitude'] + + this_ax.set_extent(ebox) + this_ax = map_add_features(ax[ix]) + + v = VFvel.field.isel(time=0).interp(depth=cfg.mission['parking_depth']) + v.plot.quiver(x="longitude", + y="latitude", + u=VFvel.var['U'], + v=VFvel.var['V'], + ax=this_ax, + color='grey', + alpha=0.5, + scale=5, + add_guide=False) + + this_ax.plot(df_sim['deploy_lon'], df_sim['deploy_lat'], '.', + markersize=3, + color='grey', + alpha=0.1, + markeredgecolor=None, + zorder=0) + + this_ax.plot(pred_traj[:, 0], pred_traj[:, 1], color='k', linewidth=1, marker='+') + this_ax.plot(xpred, ypred, color='g', marker='+') + + w = weights / np.max(np.abs(weights), axis=0) + ii = np.argsort(w) + cmap = plt.cm.cool + # cmap = plt.cm.Reds + + if ip == 0: + x, y = df_sim['deploy_lon'], df_sim['deploy_lat'] + title = 'Initial virtual float positions' + if not alpha: + this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], + marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) + else: + this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], + alpha=w[ii], + marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) + elif ip == 1: + x, y = df_sim['longitude'], df_sim['latitude'] + title = 'Final virtual float positions' + if not alpha: + this_ax.scatter(x, y, c=w, marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) + else: + this_ax.scatter(x, y, c=w, marker='o', s=4, alpha=w, edgecolor=None, vmin=0, vmax=1, cmap=cmap) + elif ip == 2: + x, y = df_sim['rel_lon'], df_sim['rel_lat'] + title = 'Final virtual floats positions relative to observed float' + if not alpha: + this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], + marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) + else: + this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], + marker='o', s=4, alpha=w[ii], edgecolor=None, vmin=0, vmax=1, cmap=cmap) + + # Display full trajectory prediction: + if ip != 0 and this_profile is not None: + this_ax.arrow(this_profile['longitude'].iloc[0], + this_profile['latitude'].iloc[0], + xpred - this_profile['longitude'].iloc[0], + ypred - this_profile['latitude'].iloc[0], + length_includes_head=True, fc='k', ec='c', head_width=0.025, zorder=10) + this_ax.plot(xpred, ypred, 'k+', zorder=10) + + this_ax.set_title("") + # this_ax.set_ylabel("Cycle %i predictions" % (i_cycle+1)) + this_ax.set_title("%s\nCycle %i predictions" % (title, self.sim_cycles[i_cycle]), fontsize=6) + + fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, dpi=dpi, + subplot_kw={'projection': ccrs.PlateCarree()}, + sharex=True, sharey=True) + ax, ix = ax.flatten(), -1 + + if orient == 'portrait': + rows = range(self.n_cycles) + cols = [1, 2] + else: + rows = [1, 2] + cols = range(self.n_cycles) + + if orient == 'portrait': + for i_cycle in rows: + for ip in cols: + ix += 1 + plot_this(ax[ix], i_cycle, ip) + else: + for ip in rows: + for i_cycle in cols: + ix += 1 + plot_this(ax[ix], i_cycle, ip) + + # log.debug("Start to write metrics string") + # + # xpred = SP.prediction[i_cycle + 1]['location']['longitude']['value'] + # + # err = recovery['prediction_location_error'] + # met = recovery['prediction_metrics'] + # if this_profile.shape[0] > 1: + # # err_str = "Prediction vs Truth: [%0.2fkm, $%0.2f^o$]" % (err['distance'], err['bearing']) + # err_str = "Prediction errors: [dist=%0.2f%s, bearing=$%0.2f^o$, time=%s]\n" \ + # "Distance error represents %s of transit at 12kt" % (err['distance']['value'], + # err['distance']['unit'], + # err['bearing']['value'], + # strfdelta(pd.Timedelta(err['time']['value'], 'h'), + # "{hours}H{minutes:02d}"), + # strfdelta(pd.Timedelta(met['transit']['value'], 'h'), + # "{hours}H{minutes:02d}")) + # else: + # err_str = "" + # + # fig.suptitle("VirtualFleet recovery prediction for WMO %i: \ + # starting from cycle %i, predicting cycle %i\n%s\n%s\n%s" % + # (wmo, cyc[0], cyc[1], get_cfg_str(cfg), err_str, "Prediction based on %s" % vel_name), fontsize=15) + + plt.tight_layout() + if save_figure: + save_figurefile(fig, 'vfrecov_predictions_%s' % sim_suffix, workdir) + + return fig, ax + + +class SimPredictor(SimPredictor_3): + + def to_json(self, fp=None): + kw = {'indent': 4, 'sort_keys': True, 'default': str} + if fp is not None: + if hasattr(fp, 'write'): + json.dump(self._json, fp, **kw) + else: + with open(fp, 'w') as f: + json.dump(self._json, f, **kw) + else: + results_js = json.dumps(self._json, **kw) + return results_js + + diff --git a/vfrecovery/core/trajfile_handler.py b/vfrecovery/core/trajfile_handler.py index 84887ce..1512c07 100644 --- a/vfrecovery/core/trajfile_handler.py +++ b/vfrecovery/core/trajfile_handler.py @@ -222,29 +222,46 @@ def worker(row): return self._index def analyse_pairwise_distances(self, - cycle: int = 1, - show_plot: bool = True, + virtual_cycle_number: int = 1, + show_plot: bool = False, save_figure: bool = False, workdir: str = '.', sim_suffix=None, this_cfg=None, this_args: dict = None): - def get_hist_and_peaks(this_d): - x = this_d.flatten() - x = x[~np.isnan(x)] - x = x[:, np.newaxis] - hist, bin_edges = np.histogram(x, bins=100, density=1) - # dh = np.diff(bin_edges[0:2]) - peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) - return {'pdf': hist, 'bins': bin_edges[0:-1], 'Npeaks': len(peaks)} - - # Squeeze traj file to the first predicted cycle (sim can have more than 1 cycle) - ds = self.obj.where((self.obj['cycle_number'] == cycle).compute(), drop=True) + def pairs_pdf(longitude, latitude): + Xi = np.array((longitude, latitude)).T + di = pairwise_distances(Xi, n_jobs=-1) + di = np.triu(di) + di[di == 0] = np.nan + + xi = di.flatten() + xi = xi[~np.isnan(xi)] + xi = xi[:, np.newaxis] + + histi, bin_edgesi = np.histogram(xi, bins=100, density=1) + dhi = np.diff(bin_edgesi[0:2]) + peaksi, _ = find_peaks(histi / np.max(histi), height=.4, distance=20) + + return histi, bin_edgesi, peaksi, dhi, di + + # def get_hist_and_peaks(this_d): + # x = this_d.flatten() + # x = x[~np.isnan(x)] + # x = x[:, np.newaxis] + # hist, bin_edges = np.histogram(x, bins=100, density=1) + # # dh = np.diff(bin_edges[0:2]) + # peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) + # return {'pdf': hist, 'bins': bin_edges[0:-1], 'Npeaks': len(peaks)} + + # Squeeze traj file to virtual_cycle_number (sim can have more than 1 cycle): + ds = self.obj.where((self.obj['cycle_number'] == virtual_cycle_number).compute(), drop=True) ds = ds.compute() - # Compute trajectories relative to the single/only real float initial position: - lon0, lat0 = self.obj.isel(obs=0)['lon'].values[0], self.obj.isel(obs=0)['lat'].values[0] + # Compute swarm trajectories relative to the single/only real float initial position: + # (Make all swarm trajectories to start at the same first position) + lon0, lat0 = self.obj.isel(obs=0)['lon'].values[0], self.obj.isel(obs=0)['lat'].values[0] # deployment locations lon, lat = ds['lon'].values, ds['lat'].values ds['lonc'] = xr.DataArray(lon - np.broadcast_to(lon[:, 0][:, np.newaxis], lon.shape) + lon0, dims=['trajectory', 'obs']) @@ -258,52 +275,19 @@ def get_hist_and_peaks(this_d): # Compute initial points pairwise distances, PDF and nb of peaks: X = ds.isel(obs=0) X = X.isel(trajectory=~np.isnan(X['lon'])) - X0 = np.array((X['lon'].values, X['lat'].values)).T - d0 = pairwise_distances(X0, n_jobs=-1) - d0 = np.triu(d0) - d0[d0 == 0] = np.nan - - x0 = d0.flatten() - x0 = x0[~np.isnan(x0)] - x0 = x0[:, np.newaxis] - - hist0, bin_edges0 = np.histogram(x0, bins=100, density=1) - dh0 = np.diff(bin_edges0[0:2]) - peaks0, _ = find_peaks(hist0 / np.max(hist0), height=.4, distance=20) + hist0, bin_edges0, peaks0, dh0, d0 = pairs_pdf(X['lon'].values, X['lat'].values) # Compute final points pairwise distances, PDF and nb of peaks: X = ds.isel(obs=-1) X = X.isel(trajectory=~np.isnan(X['lon'])) dsf = X - X = np.array((X['lon'].values, X['lat'].values)).T - d = pairwise_distances(X, n_jobs=-1) - d = np.triu(d) - d[d == 0] = np.nan - - x = d.flatten() - x = x[~np.isnan(x)] - x = x[:, np.newaxis] - - hist, bin_edges = np.histogram(x, bins=100, density=1) - dh = np.diff(bin_edges[0:2]) - peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) + hist, bin_edges, peaks, dh, d = pairs_pdf(X['lon'].values, X['lat'].values) # Compute final points pairwise distances (relative traj), PDF and nb of peaks: X1 = ds.isel(obs=-1) X1 = X1.isel(trajectory=~np.isnan(X1['lonc'])) dsfc = X1 - X1 = np.array((X1['lonc'].values, X1['latc'].values)).T - d1 = pairwise_distances(X1, n_jobs=-1) - d1 = np.triu(d1) - d1[d1 == 0] = np.nan - - x1 = d1.flatten() - x1 = x1[~np.isnan(x1)] - x1 = x1[:, np.newaxis] - - hist1, bin_edges1 = np.histogram(x1, bins=100, density=1) - dh1 = np.diff(bin_edges1[0:2]) - peaks1, _ = find_peaks(hist1 / np.max(hist1), height=.4, distance=20) + hist1, bin_edges1, peaks1, dh1, d1 = pairs_pdf(X1['lonc'].values, X1['latc'].values) # Compute the overlapping between the initial and relative state PDFs: bin_unif = np.arange(0, np.max([bin_edges0, bin_edges1]), np.min([dh0, dh1])) @@ -394,7 +378,7 @@ def get_hist_and_peaks(this_d): (this_args.wmo, this_args.cyc[0] - 1, this_args.cyc[0], get_cfg_str(this_cfg)) line1 = "Simulation made with %s and %i virtual floats" % (this_args.velocity, this_args.nfloats) else: - line0 = "VirtualFleet recovery swarm simulation for cycle %i" % cycle + line0 = "VirtualFleet recovery swarm simulation for cycle %i" % virtual_cycle_number line1 = "Simulation made with %i virtual floats" % (self.n_floats) fig.suptitle("%s\n%s" % (line0, line1), fontsize=15) @@ -402,9 +386,9 @@ def get_hist_and_peaks(this_d): if save_figure: if sim_suffix is not None: - filename = 'vfrecov_metrics01_%s_cyc%i' % (sim_suffix, cycle) + filename = 'vfrecov_metrics01_%s_cyc%i' % (sim_suffix, virtual_cycle_number) else: - filename = 'vfrecov_metrics01_cyc%i' % (cycle) + filename = 'vfrecov_metrics01_cyc%i' % (virtual_cycle_number) save_figurefile(fig, filename, workdir) if this_args is not None and this_args.json: From e441d2c50c50fde855f9554927dd706e87c3f6bc Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Mon, 25 Mar 2024 15:25:44 +0100 Subject: [PATCH 16/38] Removed maxProperties from schemas --- schemas/VFrecovery-schema-computation.json | 3 +-- schemas/VFrecovery-schema-location.json | 3 +-- schemas/VFrecovery-schema-metadata.json | 3 +-- schemas/VFrecovery-schema-metrics.json | 3 +-- schemas/VFrecovery-schema-profile.json | 3 +-- schemas/VFrecovery-schema-simulation.json | 3 +-- schemas/VFrecovery-schema-system.json | 3 +-- vfrecovery/core/predict.py | 6 +++--- 8 files changed, 10 insertions(+), 17 deletions(-) diff --git a/schemas/VFrecovery-schema-computation.json b/schemas/VFrecovery-schema-computation.json index f225fd6..714d083 100644 --- a/schemas/VFrecovery-schema-computation.json +++ b/schemas/VFrecovery-schema-computation.json @@ -24,6 +24,5 @@ "type": ["string", "null"], "format": "time-delta" } - }, - "maxProperties": 4 + } } diff --git a/schemas/VFrecovery-schema-location.json b/schemas/VFrecovery-schema-location.json index a09438f..a6d6663 100644 --- a/schemas/VFrecovery-schema-location.json +++ b/schemas/VFrecovery-schema-location.json @@ -26,6 +26,5 @@ "format": "date-time", "description": "Date and time of the geo-location" } - }, - "maxProperties": 3 + } } \ No newline at end of file diff --git a/schemas/VFrecovery-schema-metadata.json b/schemas/VFrecovery-schema-metadata.json index dc365a6..9b47094 100644 --- a/schemas/VFrecovery-schema-metadata.json +++ b/schemas/VFrecovery-schema-metadata.json @@ -28,6 +28,5 @@ "system": { "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-system.json" } - }, - "maxProperties": 5 + } } diff --git a/schemas/VFrecovery-schema-metrics.json b/schemas/VFrecovery-schema-metrics.json index 15ba42c..99e3e6a 100644 --- a/schemas/VFrecovery-schema-metrics.json +++ b/schemas/VFrecovery-schema-metrics.json @@ -96,6 +96,5 @@ "time": {"type": ["string", "null"], "format": "time-delta"} } } - }, - "maxProperties": 5 + } } \ No newline at end of file diff --git a/schemas/VFrecovery-schema-profile.json b/schemas/VFrecovery-schema-profile.json index ee55d75..072eba4 100644 --- a/schemas/VFrecovery-schema-profile.json +++ b/schemas/VFrecovery-schema-profile.json @@ -40,6 +40,5 @@ }, "dependencies": { "virtual_cycle_number": ["metrics"]} - }, - "maxProperties": 8 + } } diff --git a/schemas/VFrecovery-schema-simulation.json b/schemas/VFrecovery-schema-simulation.json index 7dc0be3..869b0c5 100644 --- a/schemas/VFrecovery-schema-simulation.json +++ b/schemas/VFrecovery-schema-simulation.json @@ -40,6 +40,5 @@ "minItems": 1, "uniqueItems": true } - }, - "maxProperties": 5 + } } \ No newline at end of file diff --git a/schemas/VFrecovery-schema-system.json b/schemas/VFrecovery-schema-system.json index 9e6505e..e1b4b7c 100644 --- a/schemas/VFrecovery-schema-system.json +++ b/schemas/VFrecovery-schema-system.json @@ -41,6 +41,5 @@ "type":["string", "null"], "description": "" } - }, - "maxProperties": 8 + } } \ No newline at end of file diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index 1ba18c1..b41c9c0 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -106,9 +106,9 @@ def __init__(self, wmo, cyc, **kwargs): self.wmo = wmo self.cyc = cyc self.output_path = kwargs['output_path'] - log_this.info("=" * 50) + log_this.info("=" * 55) log_this.info("STARTING SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (wmo, cyc[1])) - log_this.info("=" * 50) + log_this.info("=" * 55) # log_this.info("n_predictions: %i" % n_predictions) log_this.info("Working with cycle numbers list: %s" % str(cyc)) @@ -307,7 +307,7 @@ def finish(self, execution_start: float, process_start: float): self.to_json(fp=self.run_file) log_this.info("Simulation results and analysis saved in: %s" % self.run_file) - log_this.info("VirtualFleet-Recovery simulation finished") + log_this.info("VirtualFleet-Recovery prediction finished") return self def to_json(self, fp=None): From f0abd8fc67d1502bc2fbaa8f7c5d606b8e77a90d Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Mon, 25 Mar 2024 15:51:41 +0100 Subject: [PATCH 17/38] fix schema --- schemas/VFrecovery-schema-metrics.json | 6 +++--- schemas/VFrecovery-schema-simulation.json | 8 ++++---- vfrecovery/core/predict.py | 3 ++- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/schemas/VFrecovery-schema-metrics.json b/schemas/VFrecovery-schema-metrics.json index 99e3e6a..05d9aa2 100644 --- a/schemas/VFrecovery-schema-metrics.json +++ b/schemas/VFrecovery-schema-metrics.json @@ -71,7 +71,7 @@ }, "surface_drift": { "description": "Drift by surface currents due to the float ascent time error (difference between simulated profile time and the observed one)", - "type": "object", + "type": ["object", "null"], "properties": { "surface_currents_speed": {"type": "number"}, "surface_currents_speed_unit": {"type": "string"}, @@ -81,7 +81,7 @@ }, "transit": { "description": "Transit time to cover the distance error (assume a 12 kts boat speed with 1 kt = 1.852 km/h)", - "type": "object", + "type": ["object", "null"], "properties": { "unit": {"type": "string"}, "value": {"type": "number"} @@ -89,7 +89,7 @@ }, "error": { "description": "Error amplitude in space/time", - "type": "object", + "type": ["object", "null"], "properties": { "distance": {"type": "number", "unit": "km"}, "bearing": {"type": "number", "unit": "degree"}, diff --git a/schemas/VFrecovery-schema-simulation.json b/schemas/VFrecovery-schema-simulation.json index 869b0c5..7e12f0f 100644 --- a/schemas/VFrecovery-schema-simulation.json +++ b/schemas/VFrecovery-schema-simulation.json @@ -16,7 +16,9 @@ "properties": { "initial_profile": { "description": "Argo float profile used as initial conditions to the simulation", - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-profile.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-profile.json", + "minItems": 1, + "uniqueItems": true }, "meta_data": { "description": "Meta-data of the simulation", @@ -27,9 +29,7 @@ "type": "array", "items": { "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-profile.json" - }, - "minItems": 1, - "uniqueItems": true + } }, "predictions": { "description": "Data from the simulated virtual float profiles", diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index b41c9c0..4dce2c1 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -214,7 +214,8 @@ def execute(self): log_this.info("VirtualFleet instance setup") self.VFleet = VirtualFleet(plan=self.PLAN, fieldset=self.VEL, - mission=self.CFG) + mission=self.CFG, + verbose_events=True) # Execute the simulation: log_this.info("Starting simulation") From 05397bb330ce504ffd1db6429110cba4bb18f426 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Mon, 25 Mar 2024 16:01:28 +0100 Subject: [PATCH 18/38] Update VFrecovery-schema-simulation.json --- schemas/VFrecovery-schema-simulation.json | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/schemas/VFrecovery-schema-simulation.json b/schemas/VFrecovery-schema-simulation.json index 7e12f0f..a51ea80 100644 --- a/schemas/VFrecovery-schema-simulation.json +++ b/schemas/VFrecovery-schema-simulation.json @@ -14,22 +14,23 @@ ], "type": "object", "properties": { + "meta_data": { + "description": "Meta-data of the simulation", + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-metadata.json" + }, "initial_profile": { "description": "Argo float profile used as initial conditions to the simulation", "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-profile.json", "minItems": 1, "uniqueItems": true }, - "meta_data": { - "description": "Meta-data of the simulation", - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-metadata.json" - }, "observations": { "description": "Data from observed Argo float profiles relevant to the simulation predictions", "type": "array", "items": { "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-profile.json" - } + }, + "uniqueItems": true }, "predictions": { "description": "Data from the simulated virtual float profiles", From f7bae597370428bdb851a0183847188e128d517e Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Tue, 26 Mar 2024 13:35:11 +0100 Subject: [PATCH 19/38] misc --- vfrecovery/core/predict.py | 49 ++++++++++++++++++---------------- vfrecovery/core/run_handler.py | 12 ++++++--- vfrecovery/core/utils.py | 8 +++--- 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index 4dce2c1..2d26b76 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -59,25 +59,24 @@ def setup_floats_config( cfg_free_surface_drift: int, ) -> FloatConfiguration: """Load float configuration at a given cycle number and possibly overwrite data with user parameters""" - log_this.debug("Loading float configuration...") try: CFG = FloatConfiguration([wmo, cyc]) except: - log_this.debug("Can't load this profile configuration, fall back on default values") + log_this.error("Can't load this profile configuration, fall back on default values") CFG = FloatConfiguration('default') if cfg_parking_depth is not None: - log_this.info("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], + log_this.debug("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], float(cfg_parking_depth))) CFG.update('parking_depth', float(cfg_parking_depth)) if cfg_cycle_duration is not None: - log_this.info("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], + log_this.debug("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], float(cfg_cycle_duration))) CFG.update('cycle_duration', float(cfg_cycle_duration)) if cfg_profile_depth is not None: - log_this.info("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], + log_this.debug("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], float(cfg_profile_depth))) CFG.update('profile_depth', float(cfg_profile_depth)) @@ -106,9 +105,8 @@ def __init__(self, wmo, cyc, **kwargs): self.wmo = wmo self.cyc = cyc self.output_path = kwargs['output_path'] - log_this.info("=" * 55) - log_this.info("STARTING SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (wmo, cyc[1])) - log_this.info("=" * 55) + log_this.info("%s \\" % ("=" * 55)) + log_this.info("STARTING SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (self.wmo, self.cyc[1])) # log_this.info("n_predictions: %i" % n_predictions) log_this.info("Working with cycle numbers list: %s" % str(cyc)) @@ -131,7 +129,7 @@ def _setup_load_observed_profiles(self): """Load observed float profiles index""" log_this.info("Loading float profiles index") - self.P_obs, self.df_obs = ArgoIndex2jsProfile(self.wmo, self.cyc) + self.P_obs, self.df_obs = ArgoIndex2jsProfile(self.wmo, self.cyc, cache=False, cachedir=str(self.output_path)) [log_this.debug("Observed profiles list: %s" % pp_obj(p)) for p in self.P_obs] if len(self.P_obs) == 1: @@ -141,6 +139,7 @@ def _setup_load_observed_profiles(self): def _setup_float_config(self, **kwargs): """Load and setup float configuration""" + log_this.info("Loading float configuration") # Load real float configuration at the previous cycle, to be used for the simulation as initial conditions. # (the loaded config is possibly overwritten with user defined cfg_* parameters) @@ -185,12 +184,20 @@ def setup(self, **kwargs): self._setup_load_observed_profiles() self._setup_float_config(**kwargs) self._setup_load_velocity_data(**kwargs) - log_this.info("Simulation data will be registered with file suffix: '%s'" % get_simulation_suffix(self.MD)) + log_this.info("Simulation data will be registered under: %s%s*%s*" % (self.output_path, + os.path.sep, + get_simulation_suffix(self.MD))) + log_this.debug("Setup terminated") return self + def _execute_get_velocity(self): + self.VEL = Velocity(model='GLORYS12V1' if self.MD.velocity_field == 'GLORYS' else self.MD.velocity_field, + src=self.ds_vel) + # figure_velocity(VBOX, VEL, VEL_NAME, THIS_PROFILE, WMO, CYC, save_figure=args.save_figure, workdir=WORKDIR) + def _execute_get_plan(self): # VirtualFleet, get a deployment plan: - log_this.info("Deployment plan setup") + log_this.info("Create a deployment plan") df_plan = setup_deployment_plan(self.P_obs[0], nfloats=self.MD.n_floats) log_this.info("Set %i virtual floats to deploy (i.e. swarm size = %i)" % (df_plan.shape[0], df_plan.shape[0])) @@ -198,12 +205,6 @@ def _execute_get_plan(self): 'lat': df_plan['latitude'], 'time': np.array([np.datetime64(t) for t in df_plan['date'].dt.strftime('%Y-%m-%d %H:%M').array]), } - - def _execute_get_velocity(self): - self.VEL = Velocity(model='GLORYS12V1' if self.MD.velocity_field == 'GLORYS' else self.MD.velocity_field, - src=self.ds_vel) - # figure_velocity(VBOX, VEL, VEL_NAME, THIS_PROFILE, WMO, CYC, save_figure=args.save_figure, workdir=WORKDIR) - def execute(self): """Execute a VirtualFleet simulation""" @@ -211,11 +212,11 @@ def execute(self): self._execute_get_plan() # Set up VirtualFleet: - log_this.info("VirtualFleet instance setup") + log_this.info("Create a VirtualFleet instance") self.VFleet = VirtualFleet(plan=self.PLAN, fieldset=self.VEL, mission=self.CFG, - verbose_events=True) + verbose_events=False) # Execute the simulation: log_this.info("Starting simulation") @@ -227,7 +228,7 @@ def execute(self): self.traj_file = os.path.join(self.output_path, 'trajectories_%s.zarr' % get_simulation_suffix(self.MD)) if os.path.exists(self.traj_file): - log_this.info("Using data from a previous similar run (no simulation executed)") + log_this.warning("Using data from a previous similar run (no simulation executed)") else: self.VFleet.simulate(duration=timedelta(hours=self.n_days * 24 + 1), step=timedelta(minutes=5), @@ -277,7 +278,8 @@ def predict(self): return self def _postprocess_metrics(self): - log_this.info("Computing prediction metrics for past cycles with observed ground truth (possibly)") + if self.run.has_ref: + log_this.info("Computing prediction metrics for past cycles with observed ground truth") self.run.add_metrics(self.VEL) def _postprocess_swarm_metrics(self): @@ -304,11 +306,12 @@ def finish(self, execution_start: float, process_start: float): 'cpu_time': pd.Timedelta(time.process_time() - process_start, 's'), }) - self.run_file = os.path.join(self.output_path, 'prediction_%s.json' % get_simulation_suffix(self.MD)) + self.run_file = os.path.join(self.output_path, 'results_%s.json' % get_simulation_suffix(self.MD)) self.to_json(fp=self.run_file) log_this.info("Simulation results and analysis saved in: %s" % self.run_file) - log_this.info("VirtualFleet-Recovery prediction finished") + log_this.info("END OF SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (self.wmo, self.cyc[1])) + log_this.info("%s /" % ("=" * 55)) return self def to_json(self, fp=None): diff --git a/vfrecovery/core/run_handler.py b/vfrecovery/core/run_handler.py index 60deb99..58d72c0 100644 --- a/vfrecovery/core/run_handler.py +++ b/vfrecovery/core/run_handler.py @@ -65,6 +65,10 @@ def obs_cycles(self): """Observed cycle numbers""" return np.unique(self.obs['cyc']) + @property + def has_ref(self): + return len(self.obs_cycles) > 1 + @property def sim_cycles(self): """Simulated cycle numbers""" @@ -316,12 +320,12 @@ def _predict_errors(self): observed_bearing = bearing(xobs0, yobs0, xobs, yobs) sim_bearing = bearing(xobs0, yobs0, xpred, ypred) - dt = pd.Timedelta(tpred - tobs) / np.timedelta64(1, 's') + dt = pd.Timedelta(tpred - tobs)# / np.timedelta64(1, 's') p.metrics.error = Location_error.from_dict({ 'distance': np.round(dd, 3), 'bearing': np.round(sim_bearing - observed_bearing, 3), - 'time': pd.Timedelta(dt / 3600, 'h') # From seconds to hours + 'time': pd.Timedelta(dt, 'h') }) # also compute a transit time to cover the distance error: @@ -362,10 +366,10 @@ def add_metrics(self, VFvel=None): VFvel.dim['depth']: VFvel.field[{VFvel.dim['depth']: 0}][VFvel.dim['depth']].values[np.newaxis][0]} ) - velc = np.sqrt(dsc[VFvel.var['U']] ** 2 + dsc[VFvel.var['V']] ** 2).values[np.newaxis][0] + velc = np.sqrt(dsc[VFvel.var['U']] ** 2 + dsc[VFvel.var['V']] ** 2).values[np.newaxis][0] # m/s p.metrics.surface_drift = SurfaceDrift.from_dict({ "surface_currents_speed": velc, # m/s by default - "value": (p.metrics.error.time * 3600 * velc / 1e3) # km by default + "value": (np.abs(p.metrics.error.time.total_seconds()) * velc / 1e3) # km }) Plist_updated.append(p) diff --git a/vfrecovery/core/utils.py b/vfrecovery/core/utils.py index c928297..a908248 100644 --- a/vfrecovery/core/utils.py +++ b/vfrecovery/core/utils.py @@ -8,7 +8,7 @@ from vfrecovery.json import Profile, MetaData -def ArgoIndex2df_obs(a_wmo, a_cyc) -> pd.DataFrame: +def ArgoIndex2df_obs(a_wmo, a_cyc, cache:bool=False, cachedir:str='.') -> pd.DataFrame: """Retrieve WMO/CYC Argo index entries as :class:`pd.DataFrame` Parameters @@ -23,7 +23,7 @@ def ArgoIndex2df_obs(a_wmo, a_cyc) -> pd.DataFrame: host = "https://data-argo.ifremer.fr" # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" - idx = ArgoIndex(host=host).search_wmo_cyc(a_wmo, a_cyc) + idx = ArgoIndex(host=host, cache=cache, cachedir=cachedir).search_wmo_cyc(a_wmo, a_cyc) if idx.N_MATCH == 0: raise DataNotFound("This float has no cycle %i usable as initial conditions for a simulation of %i" % (a_cyc[0], a_cyc[1])) else: @@ -40,7 +40,7 @@ def df_obs2jsProfile(df_obs) -> List[Profile]: return Plist -def ArgoIndex2jsProfile(a_wmo, a_cyc) -> List[Profile]: +def ArgoIndex2jsProfile(a_wmo, a_cyc, cache:bool=False, cachedir:str='.') -> List[Profile]: """Retrieve WMO/CYC Argo index entries as a list of :class:`vfrecovery.json.Profile` Parameters @@ -52,7 +52,7 @@ def ArgoIndex2jsProfile(a_wmo, a_cyc) -> List[Profile]: ------- :class:`vfrecovery.json.Profile` """ - df_obs = ArgoIndex2df_obs(a_wmo, a_cyc) + df_obs = ArgoIndex2df_obs(a_wmo, a_cyc, cache=cache, cachedir=cachedir) return df_obs2jsProfile(df_obs), df_obs From 6d282cd37ac36ddb92216d3ea2f9993320129e4f Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Tue, 26 Mar 2024 13:47:52 +0100 Subject: [PATCH 20/38] Update README.md --- README.md | 140 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 114 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 91f393f..d080eab 100644 --- a/README.md +++ b/README.md @@ -3,25 +3,118 @@ | [![DOI](https://zenodo.org/badge/543618989.svg)](https://zenodo.org/badge/latestdoi/543618989) | The goal of this repository is to provide a CLI and Python library to make Argo floats trajectory predictions easy, in order to facilitate recovery. -The library produces a prediction _patch_ or _cone_ that could be displayed on a map like here: https://floatrecovery.euro-argo.eu -More about Argo floats recovery in here: https://github.com/euroargodev/recovery/issues + +More about Argo floats recovery in here: +- https://floatrecovery.euro-argo.eu +- https://github.com/euroargodev/recovery/issues + # Documentation ## Command Line Interface -Main commands: +Primary groups of commands are ``predict`` and ``describe``. + +### vfrecovery predict ```bash -vfrecovery predict WMO CYC -vfrecovery predict WMO CYC1 CYC2 CYC3 +Usage: vfrecovery predict [OPTIONS] WMO CYC -vfrecovery describe WMO CYC -vfrecovery describe WMO CYC1 CYC2 CYC3 + Execute VirtualFleet-Recovery predictor -vfrecovery whiterun WMO CYC -vfrecovery whiterun WMO CYC1 CYC2 CYC3 +Options: + -v, --velocity TEXT Velocity field to use. Possible values are: + 'GLORYS', 'ARMOR3D' [default: GLORYS] + --output_path TEXT Simulation data output folder [default: + './vfrecovery_simulations_data//'] + --cfg_parking_depth FLOAT Virtual floats parking depth in db [default: + previous cycle value] + --cfg_cycle_duration FLOAT Virtual floats cycle duration in hours + [default: previous cycle value] + --cfg_profile_depth FLOAT Virtual floats profile depth in db [default: + previous cycle value] + --cfg_free_surface_drift INTEGER + Virtual cycle number to start free surface + drift, inclusive [default: 9999] + -np, --n_predictions INTEGER Number of profiles to simulate after cycle + specified with argument 'CYC' [default: 0] + -nf, --n_floats INTEGER Number of virtual floats simulated to make + predictions [default: 100] + -s, --domain_min_size FLOAT Minimal size (deg) of the simulation domain + around the initial float position [default: + 12] + --log_level [DEBUG|INFO|WARN|ERROR|CRITICAL|QUIET] + Set the details printed to console by the + command (based on standard logging library). + [default: INFO] + -h, --help Show this message and exit. + + Examples: + + vfrecovery predict 6903091 112 + ``` + +### vfrecovery describe -vfrecovery meetwith "cruise_track.csv" WMO CYC0 +```bash +Usage: vfrecovery describe [OPTIONS] WMO [CYC]... + + Returns data about an existing VirtualFleet-Recovery prediction + + Data could be a JSON file, specific metrics or images + +Options: + --log-level [DEBUG|INFO|WARN|ERROR|CRITICAL|QUIET] + Set the details printed to console by the + command (based on standard logging library). + [default: INFO] + -h, --help Show this message and exit. + + Examples: + + vfrecovery describe 6903091 + + vfrecovery describe 6903091 112 + ``` + +## Python interface + + +### vfrecovery.predict + +```python +import vfrecovery + +wmo, cyc = 6903091, 126 +results = vfrecovery.predict(wmo, cyc) +``` + +Signature: +``` +vfrecovery.predict( + wmo: int, + cyc: int, + velocity: str = 'GLORYS', + output_path: Union[str, pathlib.Path] = None, + n_predictions: int = 0, + cfg_parking_depth: float = None, + cfg_cycle_duration: float = None, + cfg_profile_depth: float = None, + cfg_free_surface_drift: int = 9999, + n_floats: int = 100, + domain_min_size: float = 12.0, + log_level: str = 'INFO', +) +``` + + + +# API Design + +## Making predictions + +```bash +vfrecovery predict WMO CYC +vfrecovery predict WMO CYC1 CYC2 CYC3 ``` Options: @@ -35,31 +128,26 @@ vfrecovery predict -nf 2000 WMO CYC vfrecovery predict --velocity GLORYS WMO CYC vfrecovery predict -v GLORYS WMO CYC -vfrecovery predict --quiet WMO CYC -vfrecovery predict -q WMO CYC - vfrecovery predict --cfg_parking_depth 200 WMO CYC -vfrecovery predict -cfg_pdpt 200 WMO CYC +vfrecovery predict --cfg_parking_depth [200, 1000] WMO CYC1 CYC2 vfrecovery predict --cfg_cycle_duration 60 WMO CYC -vfrecovery predict -cfg_clen 60 WMO CYC vfrecovery predict --cfg_profile_depth 1000 WMO CYC -vfrecovery predict -cfg_pfdpt 1000 WMO CYC ``` + +## Describe results ```bash -vfrecovery predict --cfg_parking_depth [200, 1000] WMO CYC1 CYC2 +vfrecovery describe WMO CYC +vfrecovery describe WMO CYC1 CYC2 CYC3 ``` -## Python interface +## Other commands -```python -import vfrecovery +```bash +vfrecovery whiterun WMO CYC +vfrecovery whiterun WMO CYC1 CYC2 CYC3 -vfrecovery.predict( - wmo=, - cyc=, - [OPTION] = , -) -``` \ No newline at end of file +vfrecovery meetwith "cruise_track.csv" WMO CYC0 +``` From 2e60de39b4d1c140e53d97d33712fc2adfc9066f Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Tue, 26 Mar 2024 13:49:33 +0100 Subject: [PATCH 21/38] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d080eab..4f5c28d 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ More about Argo floats recovery in here: Primary groups of commands are ``predict`` and ``describe``. ### vfrecovery predict -```bash +``` Usage: vfrecovery predict [OPTIONS] WMO CYC Execute VirtualFleet-Recovery predictor @@ -55,7 +55,7 @@ Options: ### vfrecovery describe -```bash +``` Usage: vfrecovery describe [OPTIONS] WMO [CYC]... Returns data about an existing VirtualFleet-Recovery prediction From 4fefc14f14d1bb7a0356afc5872662e2aa9b7f67 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Tue, 26 Mar 2024 14:00:28 +0100 Subject: [PATCH 22/38] improved help --- .../command_line_interface/group_describe.py | 2 +- .../command_line_interface/group_predict.py | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/vfrecovery/command_line_interface/group_describe.py b/vfrecovery/command_line_interface/group_describe.py index cca0ca4..bd9690a 100644 --- a/vfrecovery/command_line_interface/group_describe.py +++ b/vfrecovery/command_line_interface/group_describe.py @@ -19,7 +19,7 @@ def cli_group_describe() -> None: @cli_group_describe.command( "describe", - short_help="Describe VirtualFleet-Recovery predictions", + short_help="Describe VirtualFleet-Recovery simulation results", help=""" Returns data about an existing VirtualFleet-Recovery prediction diff --git a/vfrecovery/command_line_interface/group_predict.py b/vfrecovery/command_line_interface/group_predict.py index e4ba56b..2bae26b 100644 --- a/vfrecovery/command_line_interface/group_predict.py +++ b/vfrecovery/command_line_interface/group_predict.py @@ -16,13 +16,16 @@ def cli_group_predict() -> None: "predict", short_help="Execute VirtualFleet-Recovery predictions", help=""" - Execute VirtualFleet-Recovery predictor + Execute the VirtualFleet-Recovery predictor + + WMO is the float World Meteorological Organisation number. + + CYC is the cycle number location to predict. If you want to simulate more than 1 cycle, use the `n_predictions` option (see below). """, epilog=""" - Examples: - - \b - vfrecovery predict 6903091 112 +Examples: +\b +\n\tvfrecovery predict 6903091 112 """, # noqa ) @click.option( @@ -131,6 +134,9 @@ def predict( domain_min_size, log_level, ) -> None: + """ + + """ if log_level == "QUIET": root_logger.disabled = True log_level = "CRITICAL" From efc6bbf1f93328ce6e3b2979c487b5fc0dca0698 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Wed, 27 Mar 2024 14:48:42 +0100 Subject: [PATCH 23/38] Add plots --- README.md | 4 + environment.yml | 4 +- .../command_line_interface/group_describe.py | 48 ++++++-- .../command_line_interface/group_plot.py | 81 ++++++++++++++ .../virtualfleet_recovery.py | 2 + vfrecovery/core/__init__.py | 2 +- vfrecovery/core/describe.py | 36 +++--- vfrecovery/core/plot.py | 66 +++++++++++ vfrecovery/core/predict.py | 69 +++++++----- vfrecovery/core/run_handler.py | 103 ++++++++++-------- vfrecovery/core/trajfile_handler.py | 97 ++++++++++++++++- vfrecovery/plots/plot_velocity.py | 42 ------- .../plots/{plot_positions.py => positions.py} | 0 vfrecovery/plots/utils.py | 9 +- vfrecovery/plots/velocity.py | 8 ++ vfrecovery/utils/misc.py | 14 +++ 16 files changed, 435 insertions(+), 150 deletions(-) create mode 100644 vfrecovery/command_line_interface/group_plot.py create mode 100644 vfrecovery/core/plot.py delete mode 100644 vfrecovery/plots/plot_velocity.py rename vfrecovery/plots/{plot_positions.py => positions.py} (100%) create mode 100644 vfrecovery/plots/velocity.py diff --git a/README.md b/README.md index 4f5c28d..8de145c 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,10 @@ vfrecovery describe WMO CYC vfrecovery describe WMO CYC1 CYC2 CYC3 ``` +```bash +vfrecovery describe velocity WMO CYC +``` + ## Other commands ```bash diff --git a/environment.yml b/environment.yml index fd404e0..1b38c7a 100644 --- a/environment.yml +++ b/environment.yml @@ -7,7 +7,7 @@ dependencies: - parcels>=3.0.0 - dask - distributed - - dask-kubernetes +# - dask-kubernetes - bottleneck - gcsfs - zarr @@ -57,4 +57,4 @@ dependencies: - geojson - dominate - copernicusmarine>=1.0<=2.0 - - git+https://github.com/euroargodev/VirtualFleet.git@master +# - git+https://github.com/euroargodev/VirtualFleet.git@master # better with `pip install --editable` diff --git a/vfrecovery/command_line_interface/group_describe.py b/vfrecovery/command_line_interface/group_describe.py index bd9690a..5cc7e8c 100644 --- a/vfrecovery/command_line_interface/group_describe.py +++ b/vfrecovery/command_line_interface/group_describe.py @@ -4,26 +4,32 @@ import argopy.plot as argoplot from argopy.errors import DataNotFound from argopy import ArgoIndex +import os +from pathlib import Path +import glob +from vfrecovery.utils.misc import list_float_simulation_folders -from vfrecovery.core.describe import describe_function root_logger = logging.getLogger("vfrecovery_root_logger") blank_logger = logging.getLogger("vfrecovery_blank_logger") - @click.group() def cli_group_describe() -> None: pass + @cli_group_describe.command( "describe", - short_help="Describe VirtualFleet-Recovery simulation results", + short_help="Describe VirtualFleet-Recovery data and simulation results", help=""" - Returns data about an existing VirtualFleet-Recovery prediction + + TARGET select what is to be described. A string in: 'all', 'obs', 'velocity'. + + WMO is the float World Meteorological Organisation number - Data could be a JSON file, specific metrics or images + CYC is the cycle number location to restrict description to """, epilog=""" Examples: @@ -34,7 +40,7 @@ def cli_group_describe() -> None: \b vfrecovery describe 6903091 112 """, # noqa - ) +) @click.option( "--log-level", type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), @@ -45,9 +51,11 @@ def cli_group_describe() -> None: "(based on standard logging library)." ), ) +@click.argument('TARGET', nargs=1, type=str) @click.argument('WMO', nargs=1, type=int) @click.argument("CYC", nargs=-1, type=int) def describe( + target, wmo, cyc, log_level, @@ -61,6 +69,9 @@ def describe( root_logger.debug("DEBUG mode activated") # Validate arguments: + if target.lower() not in ["all", "obs", "velocity"]: + raise ValueError("The first argument TARGET must be one in ['all', 'obs', 'velocity']") + assert is_wmo(wmo) wmo = check_wmo(wmo)[0] cyc = list(cyc) @@ -68,12 +79,35 @@ def describe( assert is_cyc(cyc) cyc = check_cyc(cyc) - # # json_dump = describe_function(wmo, # cyc=cyc, # log_level=log_level) # blank_logger.info(json_dump) + if target == 'obs': + describe_obs(wmo, cyc) + + elif target == 'velocity': + describe_velocity(wmo, cyc) + + +def describe_velocity(wmo, cyc): + + # List folders to examine: + plist = list_float_simulation_folders(wmo, cyc) + + # List all available velocity files: + for c in plist.keys(): + p = plist[c] + click.secho("Velocity file(s) for WMO=%s / CYC=%s:" % (wmo, c), fg='green') + vlist = sorted(p.glob("velocity_*.nc")) + if len(vlist) > 0: + [click.secho("\t- %s" % v) for v in vlist] + else: + click.secho("\tNo velocity file", fg='red') + + +def describe_obs(wmo, cyc): url = argoplot.dashboard(wmo, url_only=True) # txt = "You can check this float dashboard while we search for float profiles in the index: %s" % url click.secho("\nYou can check this float dashboard while we search for float profile(s) in the index:") diff --git a/vfrecovery/command_line_interface/group_plot.py b/vfrecovery/command_line_interface/group_plot.py new file mode 100644 index 0000000..0265a30 --- /dev/null +++ b/vfrecovery/command_line_interface/group_plot.py @@ -0,0 +1,81 @@ +import click +import logging +from argopy.utils import is_wmo, is_cyc, check_cyc, check_wmo +import argopy.plot as argoplot +from argopy.errors import DataNotFound +from argopy import ArgoIndex +import os +from pathlib import Path +import glob + +from vfrecovery.core.plot import plot_velocity + +root_logger = logging.getLogger("vfrecovery_root_logger") +blank_logger = logging.getLogger("vfrecovery_blank_logger") + + +@click.group() +def cli_group_plot() -> None: + pass + + +@cli_group_plot.command( + "plot", + short_help="Plot VirtualFleet-Recovery data or simulation results", + help=""" + + TARGET select what is to be plotted. A string in: 'velocity'. + + WMO is the float World Meteorological Organisation number + + CYC is the cycle number location to restrict plots to + """, + epilog=""" + Examples: + + \b + vfrecovery plot velocity 6903091 80 + """, # noqa +) +@click.option( + "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), + default="INFO", + show_default=True, + help=( + "Set the details printed to console by the command " + "(based on standard logging library)." + ), +) +@click.argument('TARGET', nargs=1, type=str) +@click.argument('WMO', nargs=1, type=int) +@click.argument("CYC", nargs=-1, type=int) +def plot( + target, + wmo, + cyc, + log_level, +) -> None: + if log_level == "QUIET": + root_logger.disabled = True + log_level = "CRITICAL" + root_logger.setLevel(level=getattr(logging, log_level.upper())) + + if root_logger.isEnabledFor(logging.DEBUG): + root_logger.debug("DEBUG mode activated") + + # Validate arguments: + if target.lower() not in ["all", "obs", "velocity"]: + raise ValueError("The first argument TARGET must be one in ['all', 'obs', 'velocity']") + + assert is_wmo(wmo) + wmo = check_wmo(wmo)[0] + cyc = list(cyc) + if len(cyc) > 0: + assert is_cyc(cyc) + cyc = check_cyc(cyc) + + if target == 'velocity': + plot_velocity(wmo, cyc, + log_level=log_level, + ) diff --git a/vfrecovery/command_line_interface/virtualfleet_recovery.py b/vfrecovery/command_line_interface/virtualfleet_recovery.py index b38adac..de8c291 100644 --- a/vfrecovery/command_line_interface/virtualfleet_recovery.py +++ b/vfrecovery/command_line_interface/virtualfleet_recovery.py @@ -2,12 +2,14 @@ from vfrecovery.command_line_interface.group_describe import cli_group_describe from vfrecovery.command_line_interface.group_predict import cli_group_predict +from vfrecovery.command_line_interface.group_plot import cli_group_plot @click.command( cls=click.CommandCollection, sources=[ cli_group_describe, cli_group_predict, + cli_group_plot, ], context_settings=dict(help_option_names=["-h", "--help"]), ) diff --git a/vfrecovery/core/__init__.py b/vfrecovery/core/__init__.py index 40b5947..b42f062 100644 --- a/vfrecovery/core/__init__.py +++ b/vfrecovery/core/__init__.py @@ -1,4 +1,4 @@ # from deployment_plan import setup_deployment_plan from .trajfile_handler import Trajectories -from .run_handler import Simulation +from .run_handler import RunAnalyser # from predict import predict_function diff --git a/vfrecovery/core/describe.py b/vfrecovery/core/describe.py index 57f9798..c383f1e 100644 --- a/vfrecovery/core/describe.py +++ b/vfrecovery/core/describe.py @@ -11,15 +11,7 @@ root_logger = logging.getLogger("vfrecovery_root_logger") -def describe_function( - wmo: int, - cyc: Union[int, None], - log_level: str, -) -> str: - if log_level == "QUIET": - root_logger.disabled = True - log_level = "CRITICAL" - root_logger.setLevel(level=getattr(logging, log_level.upper())) +def describe_obs(wmo, cyc): # Validate arguments: assert is_wmo(wmo) @@ -28,7 +20,6 @@ def describe_function( assert is_cyc(cyc) cyc = check_cyc(cyc)[0] - # url = argoplot.dashboard(wmo, url_only=True) txt = "You can check this float dashboard while we search for float profiles in the index: %s" % url @@ -48,8 +39,23 @@ def describe_function( df = df.sort_values(by='date') root_logger.info("\n%s" % df.to_string(max_colwidth=15)) - output = {'wmo': wmo, 'cyc': cyc} - json_dump = json.dumps( - output, sort_keys=False, indent=2 - ) - return json_dump + # output = {'wmo': wmo, 'cyc': cyc} + # json_dump = json.dumps( + # output, sort_keys=False, indent=2 + # ) + # return json_dump + + +def describe_function( + wmo: int, + cyc: Union[int, None], + target: str, + log_level: str, +) -> str: + if log_level == "QUIET": + root_logger.disabled = True + log_level = "CRITICAL" + root_logger.setLevel(level=getattr(logging, log_level.upper())) + + if target == 'obs': + describe_obs(wmo, cyc) \ No newline at end of file diff --git a/vfrecovery/core/plot.py b/vfrecovery/core/plot.py new file mode 100644 index 0000000..58eccd5 --- /dev/null +++ b/vfrecovery/core/plot.py @@ -0,0 +1,66 @@ +import logging +import xarray as xr +from virtualargofleet import Velocity + +from vfrecovery.utils.misc import list_float_simulation_folders +import vfrecovery.plots.velocity as pltvel + + +root_logger = logging.getLogger("vfrecovery_root_logger") +plot_logger = logging.getLogger("vfrecovery_plot") + + +class log_this: + + def __init__(self, txt, log_level): + """Log text to simulation and possibly root logger(s)""" + getattr(root_logger, log_level.lower())(txt) + getattr(plot_logger, log_level.lower())(txt) + + @staticmethod + def info(txt) -> 'log_this': + return log_this(txt, 'INFO') + + @staticmethod + def debug(txt) -> 'log_this': + return log_this(txt, 'DEBUG') + + @staticmethod + def warning(txt) -> 'log_this': + return log_this(txt, 'WARNING') + + @staticmethod + def error(txt) -> 'log_this': + return log_this(txt, 'ERROR') + + +def plot_velocity( + wmo: int, + cyc: int, + log_level: str, +): + if log_level == "QUIET": + root_logger.disabled = True + log_level = "CRITICAL" + root_logger.setLevel(level=getattr(logging, log_level.upper())) + + # List folders to examine: + plist = list_float_simulation_folders(wmo, cyc) + + # + for c in plist.keys(): + p = plist[c] + log_this.info("Velocity figure(s) for WMO=%s / CYC=%s:" % (wmo, c)) + ilist = sorted(p.glob("velocity_*.png")) + if len(ilist) > 0: + [log_this.info("\t- %s" % i) for i in ilist] + else: + log_this.info("No velocity figures ! Generating new ones from velocity files") + + # Load velocity field + vlist = sorted(p.glob("velocity_*.nc")) + for v in vlist: + log_this.info("Loading '%s'" % v) + # ds_vel = xr.open_dataset(v) + # VEL = Velocity(model='GLORYS12V1' if 'GLORYS' in str(v) else 'ARMOR3D', src=ds_vel) + # pltvel.plot(VEL, wmo, cyc, save_figure=False, workdir=p) diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index 2d26b76..8584887 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -15,6 +15,7 @@ from vfrecovery.json import Profile, MetaData, MetaDataSystem, MetaDataComputation from vfrecovery.utils.formatters import COLORS from vfrecovery.downloaders import get_velocity_field +# from vfrecovery.plots.velocity import VFRvelocity # Velocity with plotting features from .utils import df_obs2jsProfile, ArgoIndex2df_obs, ArgoIndex2jsProfile, get_simulation_suffix, get_domain from .deployment_plan import setup_deployment_plan from .trajfile_handler import Trajectories @@ -67,17 +68,17 @@ def setup_floats_config( if cfg_parking_depth is not None: log_this.debug("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], - float(cfg_parking_depth))) + float(cfg_parking_depth))) CFG.update('parking_depth', float(cfg_parking_depth)) if cfg_cycle_duration is not None: log_this.debug("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], - float(cfg_cycle_duration))) + float(cfg_cycle_duration))) CFG.update('cycle_duration', float(cfg_cycle_duration)) if cfg_profile_depth is not None: log_this.debug("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], - float(cfg_profile_depth))) + float(cfg_profile_depth))) CFG.update('profile_depth', float(cfg_profile_depth)) CFG.params = ConfigParam(key='reco_free_surface_drift', @@ -163,8 +164,6 @@ def _setup_load_velocity_data(self, **kwargs): cycle_period = int(np.round(self.CFG.mission['cycle_duration'] / 24)) # Get the float cycle period (in days) self.n_days = (len(self.cyc) - 1) * cycle_period + 1 - # log_this.info((domain_min_size, self.n_days)) - # log_this.info((domain_center, domain)) log_this.info("Loading %s velocity field to cover %i days starting on %s" % ( self.MD.velocity_field, self.n_days, self.P_obs[0].location.time)) @@ -172,6 +171,7 @@ def _setup_load_velocity_data(self, **kwargs): n_days=self.n_days, output=self.output_path, dataset=self.MD.velocity_field) + self.velocity_file = velocity_file log_this.debug(pp_obj(self.ds_vel)) log_this.info("Loaded %s field from %s to %s" % ( self.MD.velocity_field, @@ -185,15 +185,22 @@ def setup(self, **kwargs): self._setup_float_config(**kwargs) self._setup_load_velocity_data(**kwargs) log_this.info("Simulation data will be registered under: %s%s*%s*" % (self.output_path, - os.path.sep, - get_simulation_suffix(self.MD))) + os.path.sep, + get_simulation_suffix(self.MD))) log_this.debug("Setup terminated") return self def _execute_get_velocity(self): + log_this.info("Create a velocity object") self.VEL = Velocity(model='GLORYS12V1' if self.MD.velocity_field == 'GLORYS' else self.MD.velocity_field, src=self.ds_vel) - # figure_velocity(VBOX, VEL, VEL_NAME, THIS_PROFILE, WMO, CYC, save_figure=args.save_figure, workdir=WORKDIR) + + log_this.info("Plot velocity") + for it in [0, -1]: + _, _, fname = self.VEL.plot(it=it, iz=0, save=True, workdir=self.output_path) + fname.rename( + str(fname).replace("velocity_%s" % self.VEL.name, Path(self.velocity_file).name.replace(".nc", "")) + ) def _execute_get_plan(self): # VirtualFleet, get a deployment plan: @@ -205,6 +212,7 @@ def _execute_get_plan(self): 'lat': df_plan['latitude'], 'time': np.array([np.datetime64(t) for t in df_plan['date'].dt.strftime('%Y-%m-%d %H:%M').array]), } + def execute(self): """Execute a VirtualFleet simulation""" @@ -244,32 +252,37 @@ def execute(self): def _predict_read_trajectories(self): # Get simulated profiles index: - log_this.info("Extracting swarm profiles index") + log_this.info("Extract swarm profiles index") - # self.traj = Trajectories(self.VFleet.output) self.traj = Trajectories(self.traj_file) self.traj.get_index().add_distances(origin=self.P_obs[0]) log_this.debug(pp_obj(self.traj)) - # jsdata, fig, ax = self.traj.analyse_pairwise_distances(cycle=1, show_plot=True) - - # figure_positions(args, VEL, DF_SIM, DF_PLAN, THIS_PROFILE, CFG, WMO, CYC, VEL_NAME, - # dd=1, save_figure=args.save_figure, workdir=WORKDIR) + log_this.info("Plot swarm initial and final states") + self.traj.plot_positions(domain_scale=2., + vel=self.VEL, + vel_depth=self.CFG.mission['parking_depth'], + save=True, + workdir=self.output_path, + fname='swarm_states_%s' % get_simulation_suffix(self.MD) + ) def _predict_positions(self): """Make predictions based on simulated profile density""" - self.run = RunAnalyser(self.traj.to_index(), self.df_obs) - log_this.info("Predicting float cycle position(s) from swarm simulation") + log_this.info("Predict float cycle position(s) from swarm simulation") + self.run = RunAnalyser(self.traj.index, self.df_obs) + self.run.fit_predict() log_this.debug(pp_obj(self.run)) - self.run.fit_predict() - # SP.plot_predictions(VEL, - # CFG, - # sim_suffix=get_sim_suffix(args, CFG), - # save_figure=args.save_figure, - # workdir=WORKDIR, - # orient='portrait') - # results = self.run.predictions + log_this.info("Plot predictions") + self.run.plot_predictions( + vel=self.VEL, + vel_depth=self.CFG.mission['parking_depth'], + save=True, + workdir=self.output_path, + fname='predictions_%s' % get_simulation_suffix(self.MD), + orient='portrait' + ) def predict(self): """Make float profile predictions based on the swarm simulation""" @@ -410,10 +423,10 @@ def predict_function( # S = Simulation(wmo, cyc, - n_floats=n_floats, - velocity=velocity, - output_path=output_path, - ) + n_floats=n_floats, + velocity=velocity, + output_path=output_path, + ) S.setup(cfg_parking_depth=cfg_parking_depth, cfg_cycle_duration=cfg_cycle_duration, cfg_profile_depth=cfg_profile_depth, diff --git a/vfrecovery/core/run_handler.py b/vfrecovery/core/run_handler.py index 58d72c0..5ec1c73 100644 --- a/vfrecovery/core/run_handler.py +++ b/vfrecovery/core/run_handler.py @@ -1,10 +1,12 @@ import pandas as pd import numpy as np from typing import List +from pathlib import Path from sklearn.neighbors import KernelDensity from scipy.signal import find_peaks from sklearn.metrics import pairwise_distances +from virtualargofleet import VelocityField import matplotlib import matplotlib.pyplot as plt @@ -44,8 +46,11 @@ def __init__(self, df_sim: pd.DataFrame, df_obs: pd.DataFrame): self.WMO = np.unique(df_obs['wmo'])[0] self.jsobj = [] + if 'distance_origin' not in df_sim: + raise ValueError("Invalid simulation dataframe ! You probably forget to compute distances") + def __repr__(self): - summary = [""] + summary = [""] summary.append("Simulation target: %i / %i" % (self.WMO, self.sim_cycles[0])) summary.append("Swarm size: %i floats" % len(np.unique(self.swarm['wmo']))) summary.append("Number of simulated cycles: %i profile(s) for cycle number(s): [%s]" % ( @@ -118,6 +123,9 @@ def bbox(self, s: float = 1) -> list: ------- list """ + if not isinstance(self.jsobj, Simulation): + raise ValueError("Please call `fit_predict` first") + df_sim = self.swarm df_obs = self.obs @@ -262,7 +270,7 @@ def _add_ref(self): This populates the ``self.jsobj.observations`` property (``self.jsobj`` was created by the ``fit_predict`` method) """ - if len(self.jsobj.predictions) == 0: + if not isinstance(self.jsobj, Simulation): raise ValueError("Please call `fit_predict` first") # Observed profiles that were simulated: @@ -296,7 +304,7 @@ def _predict_errors(self): (assume a 12 kts boat speed with 1 kt = 1.852 km/h) """ - if len(self.jsobj.predictions) == 0: + if not isinstance(self.jsobj, Simulation): raise ValueError("Please call `fit_predict` first") Plist_updated = [] @@ -349,8 +357,7 @@ def add_metrics(self, VFvel=None): 1. Compute surface drift due to the time lag between the predicted profile timing and the expected one """ - # cyc0 = self.obs_cycles[0] - if len(self.jsobj.predictions) == 0: + if not isinstance(self.jsobj, Simulation): raise ValueError("Please call `predict` first") Plist_updated = [] @@ -381,16 +388,20 @@ def add_metrics(self, VFvel=None): class RunAnalyserView(RunAnalyserDiagnostics): def plot_predictions(self, - VFvel, - cfg, - sim_suffix='', # get_sim_suffix(this_args, cfg) + vel: VelocityField = None, + vel_depth: float = 0., + s=0.2, alpha=False, - save_figure=False, - workdir='.', + + save: bool = False, + workdir: Path = Path('.'), + fname: str = 'predictions', + figsize=None, dpi=120, - orient='portrait'): + orient='portrait' + ): ebox = self.bbox(s=s) pred_traj = self.trajectory @@ -398,43 +409,47 @@ def plot_predictions(self, if self.n_cycles == 1: nrows, ncols = 2, 1 if figsize is None: - figsize = (5, 5) + figsize = (10, 10) else: nrows, ncols = self.n_cycles, 2 if figsize is None: - figsize = (5, (self.n_cycles - 1) * 5) + figsize = (10, (self.n_cycles - 1) * 10) else: if self.n_cycles == 1: nrows, ncols = 1, 2 else: nrows, ncols = 2, self.n_cycles if figsize is None: - figsize = (ncols * 5, 5) + figsize = (ncols * 10, 10) def plot_this(this_ax, i_cycle, ip): - df_sim = self.swarm[self.swarm['cyc'] == i_cycle + 1] - weights = self._prediction_data['cyc'][i_cycle + 1]['weights'].values + virtual_cycle_number = i_cycle + 1 + df_sim = self.swarm[self.swarm['cyc'] == virtual_cycle_number] + df_sim = df_sim.reset_index(drop=True) + if self.sim_cycles[i_cycle] in self.obs_cycles: this_profile = self.obs[self.obs['cyc'] == self.sim_cycles[i_cycle]] else: this_profile = None - xpred = self.predictions['predictions'][i_cycle + 1]['location']['longitude'] - ypred = self.predictions['predictions'][i_cycle + 1]['location']['latitude'] + for p in self.jsobj.predictions: + if p.virtual_cycle_number == virtual_cycle_number: + xpred, ypred, tpred = p.location.longitude, p.location.latitude, p.location.time this_ax.set_extent(ebox) this_ax = map_add_features(ax[ix]) - v = VFvel.field.isel(time=0).interp(depth=cfg.mission['parking_depth']) - v.plot.quiver(x="longitude", - y="latitude", - u=VFvel.var['U'], - v=VFvel.var['V'], - ax=this_ax, - color='grey', - alpha=0.5, - scale=5, - add_guide=False) + if vel is not None: + v = vel.field.isel(time=-1).interp(depth=vel_depth) + v.plot.quiver(x="longitude", + y="latitude", + u=vel.var['U'], + v=vel.var['V'], + ax=this_ax, + color='grey', + alpha=0.5, + scale=5, + add_guide=False) this_ax.plot(df_sim['deploy_lon'], df_sim['deploy_lat'], '.', markersize=3, @@ -444,8 +459,9 @@ def plot_this(this_ax, i_cycle, ip): zorder=0) this_ax.plot(pred_traj[:, 0], pred_traj[:, 1], color='k', linewidth=1, marker='+') - this_ax.plot(xpred, ypred, color='g', marker='+') + this_ax.plot(xpred, ypred, color='lightgreen', marker='+') + weights = df_sim['weights'] w = weights / np.max(np.abs(weights), axis=0) ii = np.argsort(w) cmap = plt.cm.cool @@ -454,29 +470,20 @@ def plot_this(this_ax, i_cycle, ip): if ip == 0: x, y = df_sim['deploy_lon'], df_sim['deploy_lat'] title = 'Initial virtual float positions' - if not alpha: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - else: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - alpha=w[ii], - marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) elif ip == 1: x, y = df_sim['longitude'], df_sim['latitude'] title = 'Final virtual float positions' - if not alpha: - this_ax.scatter(x, y, c=w, marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - else: - this_ax.scatter(x, y, c=w, marker='o', s=4, alpha=w, edgecolor=None, vmin=0, vmax=1, cmap=cmap) elif ip == 2: x, y = df_sim['rel_lon'], df_sim['rel_lat'] title = 'Final virtual floats positions relative to observed float' - if not alpha: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - else: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - marker='o', s=4, alpha=w[ii], edgecolor=None, vmin=0, vmax=1, cmap=cmap) + + if not alpha: + this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], + marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) + else: + this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], + alpha=w[ii], + marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) # Display full trajectory prediction: if ip != 0 and this_profile is not None: @@ -515,8 +522,8 @@ def plot_this(this_ax, i_cycle, ip): plot_this(ax[ix], i_cycle, ip) plt.tight_layout() - if save_figure: - save_figurefile(fig, 'vfrecov_predictions_%s' % sim_suffix, workdir) + if save: + save_figurefile(fig, fname, workdir) return fig, ax diff --git a/vfrecovery/core/trajfile_handler.py b/vfrecovery/core/trajfile_handler.py index 1512c07..3f5ce96 100644 --- a/vfrecovery/core/trajfile_handler.py +++ b/vfrecovery/core/trajfile_handler.py @@ -5,9 +5,11 @@ from scipy.signal import find_peaks from sklearn.metrics import pairwise_distances import matplotlib.pyplot as plt +from virtualargofleet import VelocityField +from pathlib import Path from vfrecovery.utils.misc import get_cfg_str -from vfrecovery.plots.utils import save_figurefile +from vfrecovery.plots.utils import get_HBOX, map_add_features, map_add_profiles, save_figurefile from vfrecovery.json import Profile from vfrecovery.json import Metrics, TrajectoryLengths, PairwiseDistances, PairwiseDistancesState @@ -169,6 +171,11 @@ def get_index(self): self.to_index() return self + @property + def index(self): + self.get_index() + return self._index + def add_distances(self, origin: Profile = None) -> pd.DataFrame: """Compute profiles distance to some origin @@ -261,7 +268,8 @@ def pairs_pdf(longitude, latitude): # Compute swarm trajectories relative to the single/only real float initial position: # (Make all swarm trajectories to start at the same first position) - lon0, lat0 = self.obj.isel(obs=0)['lon'].values[0], self.obj.isel(obs=0)['lat'].values[0] # deployment locations + lon0, lat0 = self.obj.isel(obs=0)['lon'].values[0], self.obj.isel(obs=0)['lat'].values[ + 0] # deployment locations lon, lat = ds['lon'].values, ds['lat'].values ds['lonc'] = xr.DataArray(lon - np.broadcast_to(lon[:, 0][:, np.newaxis], lon.shape) + lon0, dims=['trajectory', 'obs']) @@ -398,3 +406,88 @@ def pairs_pdf(longitude, latitude): return M, fig, ax else: return M + + def HBOX(self, s: float = 1.): + """Swarm bounding box + + Parameters + ---------- + s: float + Set how much to extend maps outward the deployment 'box' + + Returns + ------- + list + """ + df_plan = self.index.iloc[0] + + box = [np.min([self.index['deploy_lon'].min(), self.index['longitude'].min(), self.index['rel_lon'].min()]), + np.max([self.index['deploy_lon'].max(), self.index['longitude'].max(), self.index['rel_lon'].max()]), + np.min([self.index['deploy_lat'].min(), self.index['latitude'].min(), self.index['rel_lat'].min()]), + np.max([self.index['deploy_lat'].max(), self.index['latitude'].max(), self.index['rel_lat'].max()])] + rx, ry = df_plan['longitude'].max() - df_plan['longitude'].min(), df_plan['latitude'].max() - df_plan[ + 'latitude'].min() + r = np.min([rx, ry]) + ebox = [box[0] - s * r, box[1] + s * r, box[2] - s * r, box[3] + s * r] + return ebox + + def plot_positions(self, + domain_scale: float = 1, + vel: VelocityField = None, + vel_depth: float = 0., + save: bool = True, + workdir: Path = Path('.'), + fname: str = 'swarm_positions', + ): + """ + + >>> T = Trajectories(traj_file) + >>> T.plot_positions(vel_depth=cfg.mission['parking_depth']) + """ + import cartopy.crs as ccrs + + ebox = self.HBOX(s=domain_scale) + + fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(25, 7), dpi=120, + subplot_kw={'projection': ccrs.PlateCarree()}, + sharex=True, sharey=True) + ax = ax.flatten() + + for ix in [0, 1, 2]: + ax[ix].set_extent(ebox) + ax[ix] = map_add_features(ax[ix]) + + if vel is not None: + vel.field.isel(time=0 if ix == 0 else -1).interp(depth=vel_depth).plot.quiver(x="longitude", + y="latitude", + u=vel.var['U'], + v=vel.var['V'], + ax=ax[ix], + color='grey', + alpha=1 if ix == 0 else 0.5, + add_guide=False) + + ax[ix].plot(self.index['deploy_lon'], self.index['deploy_lat'], '.', + markersize=3, color='grey', alpha=0.1, markeredgecolor=None, zorder=0) + if ix == 0: + title = 'Initial Velocity field at %0.2fm and deployment plan' % vel_depth + elif ix == 1: + x, y, c = self.index['longitude'], self.index['latitude'], self.index['cyc'] + title = 'Final float positions' + # sc = ax[ix].plot(x, y, '.', markersize=3, color='cyan', alpha=0.9, markeredgecolor=None) + sc = ax[ix].scatter(x, y, c=c, s=3, alpha=0.9, edgecolors=None) + elif ix == 2: + x, y, c = self.index['rel_lon'], self.index['rel_lat'], self.index['cyc'] + title = 'Final floats position relative to last float position' + # sc = ax[ix].plot(x, y, '.', markersize=3, color='cyan', alpha=0.9, markeredgecolor=None) + sc = ax[ix].scatter(x, y, c=c, s=3, alpha=0.9, edgecolors=None) + + # ax[ix] = map_add_profiles(ax[ix], this_profile) + ax[ix].set_title(title) + + # fig.suptitle("VirtualFleet recovery prediction for WMO %i: starting from cycle %i, predicting cycle %s\n%s" % + # (wmo, cyc[0], cyc[1:], get_cfg_str(cfg)), fontsize=15) + plt.tight_layout() + if save: + save_figurefile(fig, fname, workdir) + return fig, ax diff --git a/vfrecovery/plots/plot_velocity.py b/vfrecovery/plots/plot_velocity.py deleted file mode 100644 index 2d59bae..0000000 --- a/vfrecovery/plots/plot_velocity.py +++ /dev/null @@ -1,42 +0,0 @@ -import matplotlib.pyplot as plt -import pandas as pd -import cartopy.crs as ccrs -import numpy as np - -from .utils import map_add_profiles, map_add_features, save_figurefile - - -def figure_velocity(box, - vel, vel_name, this_profile, wmo, cyc, - save_figure=False, workdir='.'): - """ - - Parameters - ---------- - box - - Returns - ------- - None - """ - fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(20, 20), dpi=100, subplot_kw={'projection': ccrs.PlateCarree()}) - ax.set_extent(box) - ax = map_add_features(ax) - ax = map_add_profiles(ax, this_profile) - - vel.field.isel(time=0, depth=0).plot.quiver(x="longitude", y="latitude", - u=vel.var['U'], v=vel.var['V'], ax=ax, color='grey', alpha=0.5, - add_guide=False) - - txt = "starting from cycle %i, predicting cycle %i" % (cyc[0], cyc[1]) - ax.set_title( - "VirtualFleet recovery system for WMO %i: %s\n" - "%s velocity snapshot to illustrate the simulation domain\n" - "Vectors: Velocity field at z=%0.2fm, t=%s" % - (wmo, txt, vel_name, vel.field['depth'][0].values[np.newaxis][0], - pd.to_datetime(vel.field['time'][0].values).strftime("%Y/%m/%d %H:%M")), fontsize=15) - - plt.tight_layout() - if save_figure: - save_figurefile(fig, 'vfrecov_velocity_%s' % vel_name, workdir) - return fig, ax diff --git a/vfrecovery/plots/plot_positions.py b/vfrecovery/plots/positions.py similarity index 100% rename from vfrecovery/plots/plot_positions.py rename to vfrecovery/plots/positions.py diff --git a/vfrecovery/plots/utils.py b/vfrecovery/plots/utils.py index a306697..aae37f3 100644 --- a/vfrecovery/plots/utils.py +++ b/vfrecovery/plots/utils.py @@ -1,26 +1,25 @@ import os import numpy as np import argopy.plot as argoplot +from pathlib import Path -def save_figurefile(this_fig, a_name, folder='.'): +def save_figurefile(this_fig, a_name, folder: Path = Path('.')): """ - Parameters ---------- this_fig a_name + Path Returns ------- path """ - figname = os.path.join(folder, "%s.png" % a_name) - # log.debug("Saving %s ..." % figname) + figname = folder.joinpath("%s.png" % a_name) this_fig.savefig(figname) return figname - def map_add_profiles(this_ax, this_profile): """ diff --git a/vfrecovery/plots/velocity.py b/vfrecovery/plots/velocity.py new file mode 100644 index 0000000..42a6d25 --- /dev/null +++ b/vfrecovery/plots/velocity.py @@ -0,0 +1,8 @@ +import matplotlib.pyplot as plt +import pandas as pd +import cartopy.crs as ccrs +import numpy as np +from pathlib import Path +from virtualargofleet import Velocity + +from .utils import map_add_profiles, map_add_features, save_figurefile diff --git a/vfrecovery/utils/misc.py b/vfrecovery/utils/misc.py index 4f23ebe..8e88395 100644 --- a/vfrecovery/utils/misc.py +++ b/vfrecovery/utils/misc.py @@ -1,5 +1,7 @@ import argopy.plot as argoplot from pathlib import Path +import os +from argopy.utils import is_cyc def get_package_dir(): @@ -16,6 +18,18 @@ def get_cfg_str(a_cfg): return txt +def list_float_simulation_folders(wmo, cyc=None) -> dict: + """Return the list of all available simulation folders for a given WMO""" + output_path = {} + pl = Path(os.path.sep.join(["vfrecovery_simulations_data", str(wmo)])).glob("*") + for p in pl: + if p.is_dir(): + cyc = p.parts[-1] + if is_cyc(cyc): + output_path.update({int(cyc): p}) + if cyc is not None: + output_path = {c: output_path[c] for c in list(output_path) if str(c) in cyc} + return output_path def get_ea_profile_page_url(wmo, cyc): From e5f50d016e11f58d88bb772265f7d721a530b0c8 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Wed, 27 Mar 2024 15:19:24 +0100 Subject: [PATCH 24/38] Some refactoring and clean up --- .../command_line_interface/group_describe.py | 28 +- .../command_line_interface/group_plot.py | 81 --- vfrecovery/command_line_interface/utils.py | 19 - .../virtualfleet_recovery.py | 2 - vfrecovery/core/__init__.py | 4 +- .../{run_handler.py => analysis_handler.py} | 6 - vfrecovery/core/deployment_plan.py | 7 +- vfrecovery/core/describe.py | 61 -- vfrecovery/core/floats_config.py | 58 ++ vfrecovery/core/plot.py | 66 -- vfrecovery/core/predict.py | 324 +--------- vfrecovery/core/simulation_handler.py | 290 +++++++++ vfrecovery/core/simulation_handler_legacy.py | 595 ------------------ vfrecovery/core/trajfile_handler.py | 2 +- vfrecovery/core/trajfile_handler_legacy.py | 407 ------------ vfrecovery/core/utils.py | 2 + 16 files changed, 375 insertions(+), 1577 deletions(-) delete mode 100644 vfrecovery/command_line_interface/group_plot.py rename vfrecovery/core/{run_handler.py => analysis_handler.py} (99%) delete mode 100644 vfrecovery/core/describe.py create mode 100644 vfrecovery/core/floats_config.py delete mode 100644 vfrecovery/core/plot.py create mode 100644 vfrecovery/core/simulation_handler.py delete mode 100644 vfrecovery/core/simulation_handler_legacy.py delete mode 100644 vfrecovery/core/trajfile_handler_legacy.py diff --git a/vfrecovery/command_line_interface/group_describe.py b/vfrecovery/command_line_interface/group_describe.py index 5cc7e8c..b7e9d65 100644 --- a/vfrecovery/command_line_interface/group_describe.py +++ b/vfrecovery/command_line_interface/group_describe.py @@ -2,11 +2,7 @@ import logging from argopy.utils import is_wmo, is_cyc, check_cyc, check_wmo import argopy.plot as argoplot -from argopy.errors import DataNotFound from argopy import ArgoIndex -import os -from pathlib import Path -import glob from vfrecovery.utils.misc import list_float_simulation_folders @@ -25,7 +21,7 @@ def cli_group_describe() -> None: short_help="Describe VirtualFleet-Recovery data and simulation results", help=""" - TARGET select what is to be described. A string in: 'all', 'obs', 'velocity'. + TARGET select what is to be described. A string in: ['obs', 'velocity']. WMO is the float World Meteorological Organisation number @@ -35,10 +31,10 @@ def cli_group_describe() -> None: Examples: \b - vfrecovery describe 6903091 + vfrecovery describe velocity 6903091 \b - vfrecovery describe 6903091 112 + vfrecovery describe obs 6903091 112 """, # noqa ) @click.option( @@ -79,11 +75,6 @@ def describe( assert is_cyc(cyc) cyc = check_cyc(cyc) - # json_dump = describe_function(wmo, - # cyc=cyc, - # log_level=log_level) - # blank_logger.info(json_dump) - if target == 'obs': describe_obs(wmo, cyc) @@ -99,13 +90,22 @@ def describe_velocity(wmo, cyc): # List all available velocity files: for c in plist.keys(): p = plist[c] - click.secho("Velocity file(s) for WMO=%s / CYC=%s:" % (wmo, c), fg='green') + click.secho("Velocity data for WMO=%s / CYC=%s:" % (wmo, c), fg='blue') + + click.secho("\tNetcdf files:") vlist = sorted(p.glob("velocity_*.nc")) if len(vlist) > 0: - [click.secho("\t- %s" % v) for v in vlist] + [click.secho("\t\t- %s" % v, fg='green') for v in vlist] else: click.secho("\tNo velocity file", fg='red') + click.secho("\tFigures:") + vlist = sorted(p.glob("velocity_*.png")) + if len(vlist) > 0: + [click.secho("\t\t- %s" % v, fg='green') for v in vlist] + else: + click.secho("\tNo velocity figures", fg='red') + def describe_obs(wmo, cyc): url = argoplot.dashboard(wmo, url_only=True) diff --git a/vfrecovery/command_line_interface/group_plot.py b/vfrecovery/command_line_interface/group_plot.py deleted file mode 100644 index 0265a30..0000000 --- a/vfrecovery/command_line_interface/group_plot.py +++ /dev/null @@ -1,81 +0,0 @@ -import click -import logging -from argopy.utils import is_wmo, is_cyc, check_cyc, check_wmo -import argopy.plot as argoplot -from argopy.errors import DataNotFound -from argopy import ArgoIndex -import os -from pathlib import Path -import glob - -from vfrecovery.core.plot import plot_velocity - -root_logger = logging.getLogger("vfrecovery_root_logger") -blank_logger = logging.getLogger("vfrecovery_blank_logger") - - -@click.group() -def cli_group_plot() -> None: - pass - - -@cli_group_plot.command( - "plot", - short_help="Plot VirtualFleet-Recovery data or simulation results", - help=""" - - TARGET select what is to be plotted. A string in: 'velocity'. - - WMO is the float World Meteorological Organisation number - - CYC is the cycle number location to restrict plots to - """, - epilog=""" - Examples: - - \b - vfrecovery plot velocity 6903091 80 - """, # noqa -) -@click.option( - "--log-level", - type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), - default="INFO", - show_default=True, - help=( - "Set the details printed to console by the command " - "(based on standard logging library)." - ), -) -@click.argument('TARGET', nargs=1, type=str) -@click.argument('WMO', nargs=1, type=int) -@click.argument("CYC", nargs=-1, type=int) -def plot( - target, - wmo, - cyc, - log_level, -) -> None: - if log_level == "QUIET": - root_logger.disabled = True - log_level = "CRITICAL" - root_logger.setLevel(level=getattr(logging, log_level.upper())) - - if root_logger.isEnabledFor(logging.DEBUG): - root_logger.debug("DEBUG mode activated") - - # Validate arguments: - if target.lower() not in ["all", "obs", "velocity"]: - raise ValueError("The first argument TARGET must be one in ['all', 'obs', 'velocity']") - - assert is_wmo(wmo) - wmo = check_wmo(wmo)[0] - cyc = list(cyc) - if len(cyc) > 0: - assert is_cyc(cyc) - cyc = check_cyc(cyc) - - if target == 'velocity': - plot_velocity(wmo, cyc, - log_level=log_level, - ) diff --git a/vfrecovery/command_line_interface/utils.py b/vfrecovery/command_line_interface/utils.py index 5d799aa..34a2c16 100644 --- a/vfrecovery/command_line_interface/utils.py +++ b/vfrecovery/command_line_interface/utils.py @@ -6,22 +6,3 @@ PREF = "\033[" RESET = f"{PREF}0m" - - -def puts(text, color=None, bold=False, file=sys.stdout): - """Alternative to print, uses no color by default but accepts any color from the COLORS class. - - Parameters - ---------- - text - color=None - bold=False - file=sys.stdout - """ - if color is None: - txt = f'{PREF}{1 if bold else 0}m' + text + RESET - print(txt, file=file) - else: - txt = f'{PREF}{1 if bold else 0};{color}' + text + RESET - print(txt, file=file) - log.info(text) diff --git a/vfrecovery/command_line_interface/virtualfleet_recovery.py b/vfrecovery/command_line_interface/virtualfleet_recovery.py index de8c291..b38adac 100644 --- a/vfrecovery/command_line_interface/virtualfleet_recovery.py +++ b/vfrecovery/command_line_interface/virtualfleet_recovery.py @@ -2,14 +2,12 @@ from vfrecovery.command_line_interface.group_describe import cli_group_describe from vfrecovery.command_line_interface.group_predict import cli_group_predict -from vfrecovery.command_line_interface.group_plot import cli_group_plot @click.command( cls=click.CommandCollection, sources=[ cli_group_describe, cli_group_predict, - cli_group_plot, ], context_settings=dict(help_option_names=["-h", "--help"]), ) diff --git a/vfrecovery/core/__init__.py b/vfrecovery/core/__init__.py index b42f062..3c591c3 100644 --- a/vfrecovery/core/__init__.py +++ b/vfrecovery/core/__init__.py @@ -1,4 +1,4 @@ # from deployment_plan import setup_deployment_plan from .trajfile_handler import Trajectories -from .run_handler import RunAnalyser -# from predict import predict_function +from .analysis_handler import RunAnalyser +from .simulation_handler import Simulation diff --git a/vfrecovery/core/run_handler.py b/vfrecovery/core/analysis_handler.py similarity index 99% rename from vfrecovery/core/run_handler.py rename to vfrecovery/core/analysis_handler.py index 5ec1c73..329a720 100644 --- a/vfrecovery/core/run_handler.py +++ b/vfrecovery/core/analysis_handler.py @@ -4,11 +4,8 @@ from pathlib import Path from sklearn.neighbors import KernelDensity -from scipy.signal import find_peaks -from sklearn.metrics import pairwise_distances from virtualargofleet import VelocityField -import matplotlib import matplotlib.pyplot as plt import argopy.plot as argoplot import cartopy.crs as ccrs @@ -18,9 +15,6 @@ from vfrecovery.json import Simulation, Profile, Location, Metrics, Transit, SurfaceDrift, Location_error -pp_obj = lambda x: "\n%s" % "\n".join(["\t%s" % line for line in x.__repr__().split("\n")]) - - class RunAnalyserCore: """ diff --git a/vfrecovery/core/deployment_plan.py b/vfrecovery/core/deployment_plan.py index 3737e25..f73c071 100644 --- a/vfrecovery/core/deployment_plan.py +++ b/vfrecovery/core/deployment_plan.py @@ -3,8 +3,11 @@ from vfrecovery.json import Profile -def setup_deployment_plan(P: Profile, nfloats: int = 120): - # We will deploy a collection of virtual floats that are located around the real float with random perturbations in space and time +def setup_deployment_plan(P: Profile, nfloats: int = 120) -> pd.DataFrame: + """Create a deployment plan as a :class:`pandas.DataFrame` + + We will deploy a collection of virtual floats that are located around the real float with random perturbations in space and time + """ # Amplitude of the profile position perturbations in the zonal (deg), meridional (deg), and temporal (hours) directions: rx = 0.5 diff --git a/vfrecovery/core/describe.py b/vfrecovery/core/describe.py deleted file mode 100644 index c383f1e..0000000 --- a/vfrecovery/core/describe.py +++ /dev/null @@ -1,61 +0,0 @@ -import logging -import json -from typing import Union -from argopy.utils import is_wmo, is_cyc, check_cyc, check_wmo -import argopy.plot as argoplot -from argopy.errors import DataNotFound -from argopy import ArgoIndex - -from .utils import ArgoIndex2df_obs - -root_logger = logging.getLogger("vfrecovery_root_logger") - - -def describe_obs(wmo, cyc): - - # Validate arguments: - assert is_wmo(wmo) - wmo = check_wmo(wmo)[0] - if cyc is not None: - assert is_cyc(cyc) - cyc = check_cyc(cyc)[0] - - # - url = argoplot.dashboard(wmo, url_only=True) - txt = "You can check this float dashboard while we search for float profiles in the index: %s" % url - root_logger.info(txt) - - # Load observed float profiles index: - host = "https://data-argo.ifremer.fr" - # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" - # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" - idx = ArgoIndex(host=host) - if cyc is not None: - idx.search_wmo_cyc(wmo, cyc) - else: - idx.search_wmo(wmo) - - df = idx.to_dataframe() - df = df.sort_values(by='date') - root_logger.info("\n%s" % df.to_string(max_colwidth=15)) - - # output = {'wmo': wmo, 'cyc': cyc} - # json_dump = json.dumps( - # output, sort_keys=False, indent=2 - # ) - # return json_dump - - -def describe_function( - wmo: int, - cyc: Union[int, None], - target: str, - log_level: str, -) -> str: - if log_level == "QUIET": - root_logger.disabled = True - log_level = "CRITICAL" - root_logger.setLevel(level=getattr(logging, log_level.upper())) - - if target == 'obs': - describe_obs(wmo, cyc) \ No newline at end of file diff --git a/vfrecovery/core/floats_config.py b/vfrecovery/core/floats_config.py new file mode 100644 index 0000000..edeeabd --- /dev/null +++ b/vfrecovery/core/floats_config.py @@ -0,0 +1,58 @@ +from virtualargofleet import FloatConfiguration, ConfigParam + + +def setup_floats_config( + wmo: int, + cyc: int, + cfg_parking_depth: float, + cfg_cycle_duration: float, + cfg_profile_depth: float, + cfg_free_surface_drift: int, + logger, +) -> FloatConfiguration: + """Load float configuration at a given cycle number and possibly overwrite data with user parameters + + Parameters + ---------- + wmo: int + cyc: int + cfg_parking_depth: float, + cfg_cycle_duration: float, + cfg_profile_depth: float, + cfg_free_surface_drift: int, + logger + + Returns + ------- + :class:`virtualargofleet.FloatConfiguration` + + """ + try: + CFG = FloatConfiguration([wmo, cyc]) + except: + logger.error("Can't load this profile configuration, fall back on default values") + CFG = FloatConfiguration('default') + + if cfg_parking_depth is not None: + logger.debug("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], + float(cfg_parking_depth))) + CFG.update('parking_depth', float(cfg_parking_depth)) + + if cfg_cycle_duration is not None: + logger.debug("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], + float(cfg_cycle_duration))) + CFG.update('cycle_duration', float(cfg_cycle_duration)) + + if cfg_profile_depth is not None: + logger.debug("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], + float(cfg_profile_depth))) + CFG.update('profile_depth', float(cfg_profile_depth)) + + CFG.params = ConfigParam(key='reco_free_surface_drift', + value=int(cfg_free_surface_drift), + unit='cycle', + description='First cycle with free surface drift', + dtype=int) + + return CFG + diff --git a/vfrecovery/core/plot.py b/vfrecovery/core/plot.py deleted file mode 100644 index 58eccd5..0000000 --- a/vfrecovery/core/plot.py +++ /dev/null @@ -1,66 +0,0 @@ -import logging -import xarray as xr -from virtualargofleet import Velocity - -from vfrecovery.utils.misc import list_float_simulation_folders -import vfrecovery.plots.velocity as pltvel - - -root_logger = logging.getLogger("vfrecovery_root_logger") -plot_logger = logging.getLogger("vfrecovery_plot") - - -class log_this: - - def __init__(self, txt, log_level): - """Log text to simulation and possibly root logger(s)""" - getattr(root_logger, log_level.lower())(txt) - getattr(plot_logger, log_level.lower())(txt) - - @staticmethod - def info(txt) -> 'log_this': - return log_this(txt, 'INFO') - - @staticmethod - def debug(txt) -> 'log_this': - return log_this(txt, 'DEBUG') - - @staticmethod - def warning(txt) -> 'log_this': - return log_this(txt, 'WARNING') - - @staticmethod - def error(txt) -> 'log_this': - return log_this(txt, 'ERROR') - - -def plot_velocity( - wmo: int, - cyc: int, - log_level: str, -): - if log_level == "QUIET": - root_logger.disabled = True - log_level = "CRITICAL" - root_logger.setLevel(level=getattr(logging, log_level.upper())) - - # List folders to examine: - plist = list_float_simulation_folders(wmo, cyc) - - # - for c in plist.keys(): - p = plist[c] - log_this.info("Velocity figure(s) for WMO=%s / CYC=%s:" % (wmo, c)) - ilist = sorted(p.glob("velocity_*.png")) - if len(ilist) > 0: - [log_this.info("\t- %s" % i) for i in ilist] - else: - log_this.info("No velocity figures ! Generating new ones from velocity files") - - # Load velocity field - vlist = sorted(p.glob("velocity_*.nc")) - for v in vlist: - log_this.info("Loading '%s'" % v) - # ds_vel = xr.open_dataset(v) - # VEL = Velocity(model='GLORYS12V1' if 'GLORYS' in str(v) else 'ARMOR3D', src=ds_vel) - # pltvel.plot(VEL, wmo, cyc, save_figure=False, workdir=p) diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index 8584887..dadd1d6 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -1,30 +1,15 @@ import time from argopy.utils import is_wmo, is_cyc, check_cyc, check_wmo -import argopy.plot as argoplot -from virtualargofleet import Velocity, VirtualFleet, FloatConfiguration, ConfigParam from pathlib import Path from typing import Union -import pandas as pd -import numpy as np import os import logging -import json -import pprint -from datetime import timedelta -from vfrecovery.json import Profile, MetaData, MetaDataSystem, MetaDataComputation -from vfrecovery.utils.formatters import COLORS -from vfrecovery.downloaders import get_velocity_field -# from vfrecovery.plots.velocity import VFRvelocity # Velocity with plotting features -from .utils import df_obs2jsProfile, ArgoIndex2df_obs, ArgoIndex2jsProfile, get_simulation_suffix, get_domain -from .deployment_plan import setup_deployment_plan -from .trajfile_handler import Trajectories -from .run_handler import RunAnalyser +from .simulation_handler import Simulation root_logger = logging.getLogger("vfrecovery_root_logger") sim_logger = logging.getLogger("vfrecovery_simulation") -pp_obj = lambda x: "\n%s" % "\n".join(["\t%s" % line for line in x.__repr__().split("\n")]) class log_this: @@ -51,287 +36,6 @@ def error(txt) -> 'log_this': return log_this(txt, 'ERROR') -def setup_floats_config( - wmo: int, - cyc: int, - cfg_parking_depth: float, - cfg_cycle_duration: float, - cfg_profile_depth: float, - cfg_free_surface_drift: int, -) -> FloatConfiguration: - """Load float configuration at a given cycle number and possibly overwrite data with user parameters""" - try: - CFG = FloatConfiguration([wmo, cyc]) - except: - log_this.error("Can't load this profile configuration, fall back on default values") - CFG = FloatConfiguration('default') - - if cfg_parking_depth is not None: - log_this.debug("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], - float(cfg_parking_depth))) - CFG.update('parking_depth', float(cfg_parking_depth)) - - if cfg_cycle_duration is not None: - log_this.debug("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], - float(cfg_cycle_duration))) - CFG.update('cycle_duration', float(cfg_cycle_duration)) - - if cfg_profile_depth is not None: - log_this.debug("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], - float(cfg_profile_depth))) - CFG.update('profile_depth', float(cfg_profile_depth)) - - CFG.params = ConfigParam(key='reco_free_surface_drift', - value=int(cfg_free_surface_drift), - unit='cycle', - description='First cycle with free surface drift', - dtype=int) - - return CFG - - -class Simulation: - """ - - >>> S = Simulation(wmo, cyc, n_floats=n_floats, velocity=velocity) - >>> S.setup() - >>> S.execute() - >>> S.predict() - >>> S.postprocess() - >>> S.to_json() - - """ - - def __init__(self, wmo, cyc, **kwargs): - self.wmo = wmo - self.cyc = cyc - self.output_path = kwargs['output_path'] - log_this.info("%s \\" % ("=" * 55)) - log_this.info("STARTING SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (self.wmo, self.cyc[1])) - - # log_this.info("n_predictions: %i" % n_predictions) - log_this.info("Working with cycle numbers list: %s" % str(cyc)) - - # - url = argoplot.dashboard(wmo, url_only=True) - txt = "You can check this float dashboard while we prepare the prediction: %s" % url - log_this.info(txt) - - # Create Simulation Meta-data class holder - self.MD = MetaData.from_dict({ - 'n_floats': kwargs['n_floats'], - 'velocity_field': kwargs['velocity'], - 'system': MetaDataSystem.auto_load(), - 'vfconfig': None, # will be filled later - 'computation': None, # will be filled later - }) - - def _setup_load_observed_profiles(self): - """Load observed float profiles index""" - - log_this.info("Loading float profiles index") - self.P_obs, self.df_obs = ArgoIndex2jsProfile(self.wmo, self.cyc, cache=False, cachedir=str(self.output_path)) - [log_this.debug("Observed profiles list: %s" % pp_obj(p)) for p in self.P_obs] - - if len(self.P_obs) == 1: - log_this.info('Real-time scenario: True position unknown !') - else: - log_this.info('Evaluation scenario: Historical position known') - - def _setup_float_config(self, **kwargs): - """Load and setup float configuration""" - log_this.info("Loading float configuration") - - # Load real float configuration at the previous cycle, to be used for the simulation as initial conditions. - # (the loaded config is possibly overwritten with user defined cfg_* parameters) - self.CFG = setup_floats_config(self.wmo, self.cyc[0], - kwargs['cfg_parking_depth'], - kwargs['cfg_cycle_duration'], - kwargs['cfg_profile_depth'], - kwargs['cfg_free_surface_drift']) - self.MD.vfconfig = self.CFG # Register floats configuration to the simulation meta-data class - - # and save the final virtual float configuration on file: - self.CFG.to_json( - Path(os.path.join(self.output_path, "floats_configuration_%s.json" % get_simulation_suffix(self.MD)))) - log_this.debug(pp_obj(self.CFG)) - - def _setup_load_velocity_data(self, **kwargs): - # Define domain to load velocity for: - # In space: - domain, domain_center = get_domain(self.P_obs, kwargs['domain_min_size']) - # and time: - cycle_period = int(np.round(self.CFG.mission['cycle_duration'] / 24)) # Get the float cycle period (in days) - self.n_days = (len(self.cyc) - 1) * cycle_period + 1 - - log_this.info("Loading %s velocity field to cover %i days starting on %s" % ( - self.MD.velocity_field, self.n_days, self.P_obs[0].location.time)) - - self.ds_vel, velocity_file = get_velocity_field(domain, self.P_obs[0].location.time, - n_days=self.n_days, - output=self.output_path, - dataset=self.MD.velocity_field) - self.velocity_file = velocity_file - log_this.debug(pp_obj(self.ds_vel)) - log_this.info("Loaded %s field from %s to %s" % ( - self.MD.velocity_field, - pd.to_datetime(self.ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), - pd.to_datetime(self.ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")) - ) - - def setup(self, **kwargs): - """Fulfill all requirements for the simulation""" - self._setup_load_observed_profiles() - self._setup_float_config(**kwargs) - self._setup_load_velocity_data(**kwargs) - log_this.info("Simulation data will be registered under: %s%s*%s*" % (self.output_path, - os.path.sep, - get_simulation_suffix(self.MD))) - log_this.debug("Setup terminated") - return self - - def _execute_get_velocity(self): - log_this.info("Create a velocity object") - self.VEL = Velocity(model='GLORYS12V1' if self.MD.velocity_field == 'GLORYS' else self.MD.velocity_field, - src=self.ds_vel) - - log_this.info("Plot velocity") - for it in [0, -1]: - _, _, fname = self.VEL.plot(it=it, iz=0, save=True, workdir=self.output_path) - fname.rename( - str(fname).replace("velocity_%s" % self.VEL.name, Path(self.velocity_file).name.replace(".nc", "")) - ) - - def _execute_get_plan(self): - # VirtualFleet, get a deployment plan: - log_this.info("Create a deployment plan") - df_plan = setup_deployment_plan(self.P_obs[0], nfloats=self.MD.n_floats) - log_this.info("Set %i virtual floats to deploy (i.e. swarm size = %i)" % (df_plan.shape[0], df_plan.shape[0])) - - self.PLAN = {'lon': df_plan['longitude'], - 'lat': df_plan['latitude'], - 'time': np.array([np.datetime64(t) for t in df_plan['date'].dt.strftime('%Y-%m-%d %H:%M').array]), - } - - def execute(self): - """Execute a VirtualFleet simulation""" - - self._execute_get_velocity() - self._execute_get_plan() - - # Set up VirtualFleet: - log_this.info("Create a VirtualFleet instance") - self.VFleet = VirtualFleet(plan=self.PLAN, - fieldset=self.VEL, - mission=self.CFG, - verbose_events=False) - - # Execute the simulation: - log_this.info("Starting simulation") - - # Remove traj file if exists: - # output_path = os.path.join(WORKDIR, 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) - # if os.path.exists(output_path): - # shutil.rmtree(output_path) - - self.traj_file = os.path.join(self.output_path, 'trajectories_%s.zarr' % get_simulation_suffix(self.MD)) - if os.path.exists(self.traj_file): - log_this.warning("Using data from a previous similar run (no simulation executed)") - else: - self.VFleet.simulate(duration=timedelta(hours=self.n_days * 24 + 1), - step=timedelta(minutes=5), - record=timedelta(minutes=30), - output=True, - output_folder=self.output_path, - output_file='trajectories_%s.zarr' % get_simulation_suffix(self.MD), - verbose_progress=True, - ) - log_this.info("Simulation ended with success") - return self - - def _predict_read_trajectories(self): - - # Get simulated profiles index: - log_this.info("Extract swarm profiles index") - - self.traj = Trajectories(self.traj_file) - self.traj.get_index().add_distances(origin=self.P_obs[0]) - log_this.debug(pp_obj(self.traj)) - - log_this.info("Plot swarm initial and final states") - self.traj.plot_positions(domain_scale=2., - vel=self.VEL, - vel_depth=self.CFG.mission['parking_depth'], - save=True, - workdir=self.output_path, - fname='swarm_states_%s' % get_simulation_suffix(self.MD) - ) - - def _predict_positions(self): - """Make predictions based on simulated profile density""" - log_this.info("Predict float cycle position(s) from swarm simulation") - self.run = RunAnalyser(self.traj.index, self.df_obs) - self.run.fit_predict() - log_this.debug(pp_obj(self.run)) - - log_this.info("Plot predictions") - self.run.plot_predictions( - vel=self.VEL, - vel_depth=self.CFG.mission['parking_depth'], - save=True, - workdir=self.output_path, - fname='predictions_%s' % get_simulation_suffix(self.MD), - orient='portrait' - ) - - def predict(self): - """Make float profile predictions based on the swarm simulation""" - self._predict_read_trajectories() - self._predict_positions() - return self - - def _postprocess_metrics(self): - if self.run.has_ref: - log_this.info("Computing prediction metrics for past cycles with observed ground truth") - self.run.add_metrics(self.VEL) - - def _postprocess_swarm_metrics(self): - log_this.info("Computing swarm metrics") - Plist_updated = [] - for p in self.run.jsobj.predictions: - this_cyc = p.virtual_cycle_number - swarm_metrics = self.traj.analyse_pairwise_distances(virtual_cycle_number=this_cyc, show_plot=False) - p.metrics.trajectory_lengths = swarm_metrics.trajectory_lengths - p.metrics.pairwise_distances = swarm_metrics.pairwise_distances - Plist_updated.append(p) - self.run.jsobj.predictions = Plist_updated - - def postprocess(self): - self._postprocess_metrics() - self._postprocess_swarm_metrics() - return self - - def finish(self, execution_start: float, process_start: float): - """Click timers and save results to finish""" - self.MD.computation = MetaDataComputation.from_dict({ - 'date': pd.to_datetime('now', utc=True), - 'wall_time': pd.Timedelta(time.time() - execution_start, 's'), - 'cpu_time': pd.Timedelta(time.process_time() - process_start, 's'), - }) - - self.run_file = os.path.join(self.output_path, 'results_%s.json' % get_simulation_suffix(self.MD)) - self.to_json(fp=self.run_file) - log_this.info("Simulation results and analysis saved in: %s" % self.run_file) - - log_this.info("END OF SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (self.wmo, self.cyc[1])) - log_this.info("%s /" % ("=" * 55)) - return self - - def to_json(self, fp=None): - y = self.run.jsobj # :class:`Simulation` instance - y.meta_data = self.MD - return y.to_json(fp=fp) - def predict_function( wmo: int, @@ -374,12 +78,6 @@ def predict_function( root_logger.disabled = True log_level = "CRITICAL" root_logger.setLevel(level=getattr(logging, log_level.upper())) - # print('DEBUG', logging.DEBUG) - # print('INFO', logging.INFO) - # print('WARNING', logging.WARNING) - # print('ERROR', logging.ERROR) - # print('root_logger', root_logger.getEffectiveLevel()) - # print(root_logger.isEnabledFor(logging.INFO)) execution_start = time.time() process_start = time.process_time() @@ -416,16 +114,13 @@ def predict_function( datefmt='%Y/%m/%d %I:%M:%S')) sim_logger.handlers = [] sim_logger.addHandler(simlogfile) - # log_this.info("This is INFO") - # log_this.warning("This is WARN") - # log_this.debug("This is DEBUG") - # log_this.error("This is ERROR") # S = Simulation(wmo, cyc, n_floats=n_floats, velocity=velocity, output_path=output_path, + logger=log_this, ) S.setup(cfg_parking_depth=cfg_parking_depth, cfg_cycle_duration=cfg_cycle_duration, @@ -440,14 +135,9 @@ def predict_function( # # return S.MD.computation.to_json() - # return MD.to_json() + # return S.MD.to_json() return S.to_json() - # output = {'wmo': wmo, 'cyc': cyc, 'velocity': velocity, 'n_predictions': n_predictions, 'cfg': CFG.to_json(indent=0)} - # json_dump = json.dumps( - # output, sort_keys=False, indent=2 - # ) - # return json_dump # def predictor(args): # """Prediction manager""" @@ -456,14 +146,6 @@ def predict_function( # mplbackend = matplotlib.get_backend() # matplotlib.use('Agg') -# with open(os.path.join(WORKDIR, 'prediction_%s.json' % get_sim_suffix(args, CFG)), 'w', encoding='utf-8') as f: -# json.dump(results, f, ensure_ascii=False, indent=4, default=str, sort_keys=True) -# -# if not args.json: -# puts(results_js, color=COLORS.green) -# puts("\nCheck results at:") -# puts("\t%s" % WORKDIR, color=COLORS.green) -# # if args.save_figure: # plt.close('all') # # Restore Matplotlib backend diff --git a/vfrecovery/core/simulation_handler.py b/vfrecovery/core/simulation_handler.py new file mode 100644 index 0000000..97b9c48 --- /dev/null +++ b/vfrecovery/core/simulation_handler.py @@ -0,0 +1,290 @@ +import time +import argopy.plot as argoplot +from virtualargofleet import Velocity, VirtualFleet +from pathlib import Path +import pandas as pd +import numpy as np +import os +from datetime import timedelta +import logging + +from vfrecovery.json import MetaData, MetaDataSystem, MetaDataComputation +from vfrecovery.downloaders import get_velocity_field +from .utils import ArgoIndex2jsProfile, get_simulation_suffix, get_domain, pp_obj +from .floats_config import setup_floats_config +from .deployment_plan import setup_deployment_plan +from .trajfile_handler import Trajectories +from .analysis_handler import RunAnalyser + + +root_logger = logging.getLogger("vfrecovery_root_logger") + + +class default_logger: + + def __init__(self, txt, log_level): + """Log text to simulation and possibly root logger(s)""" + getattr(root_logger, log_level.lower())(txt) + + @staticmethod + def info(txt) -> 'default_logger': + return default_logger(txt, 'INFO') + + @staticmethod + def debug(txt) -> 'default_logger': + return default_logger(txt, 'DEBUG') + + @staticmethod + def warning(txt) -> 'default_logger': + return default_logger(txt, 'WARNING') + + @staticmethod + def error(txt) -> 'default_logger': + return default_logger(txt, 'ERROR') + + + +class Simulation: + """Base class to execute the simulation/prediction workflow + + >>> S = Simulation(wmo, cyc, n_floats=n_floats, velocity=velocity, output_path=Path('.')) + >>> S.setup() + >>> S.execute() + >>> S.predict() + >>> S.postprocess() + >>> S.to_json() + """ + + def __init__(self, wmo, cyc, **kwargs): + self.wmo = wmo + self.cyc = cyc + self.output_path = kwargs['output_path'] + self.logger = default_logger if 'logger' not in kwargs else kwargs['logger'] + + self.logger.info("%s \\" % ("=" * 55)) + self.logger.info("STARTING SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (self.wmo, self.cyc[1])) + + # self.logger.info("n_predictions: %i" % n_predictions) + self.logger.info("Working with cycle numbers list: %s" % str(cyc)) + + # + url = argoplot.dashboard(wmo, url_only=True) + txt = "You can check this float dashboard while we prepare the prediction: %s" % url + self.logger.info(txt) + + # Create Simulation Meta-data class holder + self.MD = MetaData.from_dict({ + 'n_floats': kwargs['n_floats'], + 'velocity_field': kwargs['velocity'], + 'system': MetaDataSystem.auto_load(), + 'vfconfig': None, # will be filled later + 'computation': None, # will be filled later + }) + + def _setup_load_observed_profiles(self): + """Load observed float profiles index""" + + self.logger.info("Loading float profiles index") + self.P_obs, self.df_obs = ArgoIndex2jsProfile(self.wmo, self.cyc, cache=False, cachedir=str(self.output_path)) + [self.logger.debug("Observed profiles list: %s" % pp_obj(p)) for p in self.P_obs] + + if len(self.P_obs) == 1: + self.logger.info('Real-time scenario: True position unknown !') + else: + self.logger.info('Evaluation scenario: Historical position known') + + def _setup_float_config(self, **kwargs): + """Load and setup float configuration""" + self.logger.info("Loading float configuration") + + # Load real float configuration at the previous cycle, to be used for the simulation as initial conditions. + # (the loaded config is possibly overwritten with user defined cfg_* parameters) + self.CFG = setup_floats_config(self.wmo, self.cyc[0], + kwargs['cfg_parking_depth'], + kwargs['cfg_cycle_duration'], + kwargs['cfg_profile_depth'], + kwargs['cfg_free_surface_drift'], + self.logger, + ) + self.MD.vfconfig = self.CFG # Register floats configuration to the simulation meta-data class + + # and save the final virtual float configuration on file: + self.CFG.to_json( + Path(os.path.join(self.output_path, "floats_configuration_%s.json" % get_simulation_suffix(self.MD)))) + self.logger.debug(pp_obj(self.CFG)) + + def _setup_load_velocity_data(self, **kwargs): + # Define domain to load velocity for: + # In space: + domain, domain_center = get_domain(self.P_obs, kwargs['domain_min_size']) + # and time: + cycle_period = int(np.round(self.CFG.mission['cycle_duration'] / 24)) # Get the float cycle period (in days) + self.n_days = (len(self.cyc) - 1) * cycle_period + 1 + + self.logger.info("Loading %s velocity field to cover %i days starting on %s" % ( + self.MD.velocity_field, self.n_days, self.P_obs[0].location.time)) + + self.ds_vel, velocity_file = get_velocity_field(domain, self.P_obs[0].location.time, + n_days=self.n_days, + output=self.output_path, + dataset=self.MD.velocity_field) + self.velocity_file = velocity_file + self.logger.debug(pp_obj(self.ds_vel)) + self.logger.info("Loaded %s field from %s to %s" % ( + self.MD.velocity_field, + pd.to_datetime(self.ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), + pd.to_datetime(self.ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")) + ) + + def setup(self, **kwargs): + """Fulfill all requirements for the simulation""" + self._setup_load_observed_profiles() + self._setup_float_config(**kwargs) + self._setup_load_velocity_data(**kwargs) + self.logger.info("Simulation data will be registered under: %s%s*%s*" % (self.output_path, + os.path.sep, + get_simulation_suffix(self.MD))) + self.logger.debug("Setup terminated") + return self + + def _execute_get_velocity(self): + self.logger.info("Create a velocity object") + self.VEL = Velocity(model='GLORYS12V1' if self.MD.velocity_field == 'GLORYS' else self.MD.velocity_field, + src=self.ds_vel) + + self.logger.info("Plot velocity") + for it in [0, -1]: + _, _, fname = self.VEL.plot(it=it, iz=0, save=True, workdir=self.output_path) + fname.rename( + str(fname).replace("velocity_%s" % self.VEL.name, Path(self.velocity_file).name.replace(".nc", "")) + ) + + def _execute_get_plan(self): + # VirtualFleet, get a deployment plan: + self.logger.info("Create a deployment plan") + df_plan = setup_deployment_plan(self.P_obs[0], nfloats=self.MD.n_floats) + self.logger.info("Set %i virtual floats to deploy (i.e. swarm size = %i)" % (df_plan.shape[0], df_plan.shape[0])) + + self.PLAN = {'lon': df_plan['longitude'], + 'lat': df_plan['latitude'], + 'time': np.array([np.datetime64(t) for t in df_plan['date'].dt.strftime('%Y-%m-%d %H:%M').array]), + } + + def execute(self): + """Execute a VirtualFleet simulation""" + + self._execute_get_velocity() + self._execute_get_plan() + + # Set up VirtualFleet: + self.logger.info("Create a VirtualFleet instance") + self.VFleet = VirtualFleet(plan=self.PLAN, + fieldset=self.VEL, + mission=self.CFG, + verbose_events=False) + + # Execute the simulation: + self.logger.info("Starting simulation") + + # Remove traj file if exists: + # output_path = os.path.join(WORKDIR, 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) + # if os.path.exists(output_path): + # shutil.rmtree(output_path) + + self.traj_file = os.path.join(self.output_path, 'trajectories_%s.zarr' % get_simulation_suffix(self.MD)) + if os.path.exists(self.traj_file): + self.logger.warning("Using data from a previous similar run (no simulation executed)") + else: + self.VFleet.simulate(duration=timedelta(hours=self.n_days * 24 + 1), + step=timedelta(minutes=5), + record=timedelta(minutes=30), + output=True, + output_folder=self.output_path, + output_file='trajectories_%s.zarr' % get_simulation_suffix(self.MD), + verbose_progress=True, + ) + self.logger.info("Simulation ended with success") + return self + + def _predict_read_trajectories(self): + + # Get simulated profiles index: + self.logger.info("Extract swarm profiles index") + + self.traj = Trajectories(self.traj_file) + self.traj.get_index().add_distances(origin=self.P_obs[0]) + self.logger.debug(pp_obj(self.traj)) + + self.logger.info("Plot swarm initial and final states") + self.traj.plot_positions(domain_scale=2., + vel=self.VEL, + vel_depth=self.CFG.mission['parking_depth'], + save=True, + workdir=self.output_path, + fname='swarm_states_%s' % get_simulation_suffix(self.MD) + ) + + def _predict_positions(self): + """Make predictions based on simulated profile density""" + self.logger.info("Predict float cycle position(s) from swarm simulation") + self.run = RunAnalyser(self.traj.index, self.df_obs) + self.run.fit_predict() + self.logger.debug(pp_obj(self.run)) + + self.logger.info("Plot predictions") + self.run.plot_predictions( + vel=self.VEL, + vel_depth=self.CFG.mission['parking_depth'], + save=True, + workdir=self.output_path, + fname='predictions_%s' % get_simulation_suffix(self.MD), + orient='portrait' + ) + + def predict(self): + """Make float profile predictions based on the swarm simulation""" + self._predict_read_trajectories() + self._predict_positions() + return self + + def _postprocess_metrics(self): + if self.run.has_ref: + self.logger.info("Computing prediction metrics for past cycles with observed ground truth") + self.run.add_metrics(self.VEL) + + def _postprocess_swarm_metrics(self): + self.logger.info("Computing swarm metrics") + Plist_updated = [] + for p in self.run.jsobj.predictions: + this_cyc = p.virtual_cycle_number + swarm_metrics = self.traj.analyse_pairwise_distances(virtual_cycle_number=this_cyc, show_plot=False) + p.metrics.trajectory_lengths = swarm_metrics.trajectory_lengths + p.metrics.pairwise_distances = swarm_metrics.pairwise_distances + Plist_updated.append(p) + self.run.jsobj.predictions = Plist_updated + + def postprocess(self): + self._postprocess_metrics() + self._postprocess_swarm_metrics() + return self + + def finish(self, execution_start: float, process_start: float): + """Click timers and save results to finish""" + self.MD.computation = MetaDataComputation.from_dict({ + 'date': pd.to_datetime('now', utc=True), + 'wall_time': pd.Timedelta(time.time() - execution_start, 's'), + 'cpu_time': pd.Timedelta(time.process_time() - process_start, 's'), + }) + + self.run_file = os.path.join(self.output_path, 'results_%s.json' % get_simulation_suffix(self.MD)) + self.to_json(fp=self.run_file) + self.logger.info("Simulation results and analysis saved in: %s" % self.run_file) + + self.logger.info("END OF SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (self.wmo, self.cyc[1])) + self.logger.info("%s /" % ("=" * 55)) + return self + + def to_json(self, fp=None): + y = self.run.jsobj # :class:`Simulation` instance + y.meta_data = self.MD + return y.to_json(fp=fp) diff --git a/vfrecovery/core/simulation_handler_legacy.py b/vfrecovery/core/simulation_handler_legacy.py deleted file mode 100644 index 2b58e95..0000000 --- a/vfrecovery/core/simulation_handler_legacy.py +++ /dev/null @@ -1,595 +0,0 @@ -import xarray as xr -import pandas as pd -import numpy as np -import json -import matplotlib -from sklearn.neighbors import KernelDensity -from scipy.signal import find_peaks -from sklearn.metrics import pairwise_distances -import matplotlib.pyplot as plt -import argopy.plot as argoplot -import cartopy.crs as ccrs - -from vfrecovery.utils.misc import get_cfg_str, get_ea_profile_page_url -from vfrecovery.plots.utils import save_figurefile, map_add_features -from vfrecovery.utils.geo import haversine, bearing - - -class SimPredictor_0: - """ - - Examples - -------- - T = Trajectories(traj_zarr_file) - df = T.get_index().add_distances() - - SP = SimPredictor(df) - SP.fit_predict() - SP.add_metrics(VFvelocity) - SP.bbox() - SP.plot_predictions(VFvelocity) - SP.plan - SP.n_cycles - SP.trajectory - SP.prediction - """ - - def __init__(self, df_sim: pd.DataFrame, df_obs: pd.DataFrame): - self.swarm = df_sim - self.obs = df_obs - # self.set_weights() - self.WMO = np.unique(df_obs['wmo'])[0] - self._json = None - - def __repr__(self): - summary = [""] - summary.append("Simulation target: %i / %i" % (self.WMO, self.sim_cycles[0])) - summary.append("Swarm size: %i floats" % len(np.unique(self.swarm['wmo']))) - summary.append("Number of simulated cycles: %i profile(s) for cycle number(s): [%s]" % ( - self.n_cycles, ",".join([str(c) for c in self.sim_cycles]))) - summary.append("Observed reference: %i profile(s) for cycle number(s): [%s]" % ( - self.obs.shape[0], ",".join([str(c) for c in self.obs_cycles]))) - return "\n".join(summary) - - @property - def n_cycles(self): - """Number of simulated cycles""" - return len(np.unique(self.swarm['cyc'])) - # return len(self.sim_cycles) - - @property - def obs_cycles(self): - """Observed cycle numbers""" - return np.unique(self.obs['cyc']) - - @property - def sim_cycles(self): - """Simulated cycle numbers""" - return self.obs_cycles[0] + 1 + range(self.n_cycles) - - @property - def plan(self) -> pd.DataFrame: - if not hasattr(self, '_plan'): - df_plan = self.swarm[self.swarm['cyc'] == 1][['date', 'deploy_lon', 'deploy_lat']] - df_plan = df_plan.rename(columns={'deploy_lon': 'longitude', 'deploy_lat': 'latitude'}) - self._plan = df_plan - return self._plan - - @property - def trajectory(self): - """Return the predicted trajectory as a simple :class:`np.array` - - First row is longitude, 2nd is latitude and 3rd is date of simulated profiles - - Return - ------ - :class:`np.array` - - """ - if self._json is None: - raise ValueError("Please call `fit_predict` first") - - traj_prediction = np.array([self.obs['longitude'].values[0], - self.obs['latitude'].values[0], - self.obs['date'].values[0]])[ - np.newaxis] # Starting point where swarm was deployed - for cyc in self._json['predictions'].keys(): - xpred = self._json['predictions'][cyc]['location']['longitude'] - ypred = self._json['predictions'][cyc]['location']['latitude'] - tpred = pd.to_datetime(self._json['predictions'][cyc]['location']['time']) - traj_prediction = np.concatenate((traj_prediction, - np.array([xpred, ypred, tpred])[np.newaxis]), - axis=0) - return traj_prediction - - @property - def predictions(self): - if self._json is None: - raise ValueError("Please call `fit_predict` first") - return self._json - - def bbox(self, s: float = 1) -> list: - """Get a bounding box for maps - - Parameters - ---------- - s: float, default:1 - - Returns - ------- - list - """ - df_sim = self.swarm - df_obs = self.obs - - box = [np.min([df_sim['deploy_lon'].min(), - df_sim['longitude'].min(), - df_sim['rel_lon'].min(), - df_obs['longitude'].min()]), - np.max([df_sim['deploy_lon'].max(), - df_sim['longitude'].max(), - df_sim['rel_lon'].max(), - df_obs['longitude'].max()]), - np.min([df_sim['deploy_lat'].min(), - df_sim['latitude'].min(), - df_sim['rel_lat'].min(), - df_obs['latitude'].min()]), - np.max([df_sim['deploy_lat'].max(), - df_sim['latitude'].max(), - df_sim['rel_lat'].max(), - df_obs['latitude'].max()])] - rx, ry = box[1] - box[0], box[3] - box[2] - r = np.min([rx, ry]) - ebox = [box[0] - s * r, box[1] + s * r, box[2] - s * r, box[3] + s * r] - - return ebox - - -class SimPredictor_1(SimPredictor_0): - - def set_weights(self, scale: float = 20): - """Compute weights for predictions - - Add weights column to swarm :class:`pandas.DataFrame` as a gaussian distance - with a std based on the size of the deployment domain - - Parameters - ---------- - scale: float (default=20.) - """ - rx, ry = self.plan['longitude'].max() - self.plan['longitude'].min(), \ - self.plan['latitude'].max() - self.plan['latitude'].min() - r = np.min([rx, ry]) # Minimal size of the deployment domain - weights = np.exp(-(self.swarm['distance_origin'] ** 2) / (r / scale)) - weights[np.isnan(weights)] = 0 - self.swarm['weights'] = weights - return self - - def fit_predict(self, weights_scale: float = 20.) -> dict: - """Predict profile positions from simulated float swarm - - Prediction is based on a :class:`klearn.neighbors._kde.KernelDensity` estimate of the N_FLOATS - simulated, weighted by their deployment distance to the observed previous cycle position. - - Parameters - ---------- - weights_scale: float (default=20) - Scale (in deg) to use to weight the deployment distance to the observed previous cycle position - - Returns - ------- - dict - """ - - def blank_prediction() -> dict: - return {'location': { - 'longitude': None, - 'latitude': None, - 'time': None}, - 'cycle_number': None, - 'wmo': int(self.WMO), - } - - # Compute weights of the swarm float profiles locations - self.set_weights(scale=weights_scale) - - self._prediction_data = {'weights_scale': weights_scale, 'cyc': {}} - - cycles = np.unique(self.swarm['cyc']).astype(int) # 1, 2, ... - recovery_predictions = {} - for icyc, this_sim_cyc in enumerate(cycles): - this_cyc_df = self.swarm[self.swarm['cyc'] == this_sim_cyc] - weights = this_cyc_df['weights'] - x, y = this_cyc_df['rel_lon'], this_cyc_df['rel_lat'] - - w = weights / np.max(np.abs(weights), axis=0) - X = np.array([x, y]).T - kde = KernelDensity(kernel='gaussian', bandwidth=0.15).fit(X, sample_weight=w) - - xg, yg = (np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 100), - np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), 100)) - xg, yg = np.meshgrid(xg, yg) - Xg = np.array([xg.flatten(), yg.flatten(), ]).T - llh = kde.score_samples(Xg) - xpred = Xg[np.argmax(llh), 0] - ypred = Xg[np.argmax(llh), 1] - tpred = this_cyc_df['date'].mean() - - # Store results - recovery = blank_prediction() - recovery['location']['longitude'] = xpred - recovery['location']['latitude'] = ypred - recovery['location']['time'] = tpred.isoformat() - recovery['cycle_number'] = int(self.sim_cycles[icyc]) - recovery['virtual_cycle_number'] = int(self.sim_cycles[icyc]) - recovery_predictions.update({int(this_sim_cyc): recovery}) - - # - self._prediction_data['cyc'].update({this_sim_cyc: {'weights': this_cyc_df['weights']}}) - - # Store results internally - self._json = {'predictions': recovery_predictions} - - # Add more stuff to internal storage: - self._predict_errors() - self._add_ref() - self.add_metrics() - - # - return self - - -class SimPredictor_2(SimPredictor_1): - - def _predict_errors(self) -> dict: - """Compute error metrics for the predicted positions - - This is for past cycles, for which we have observed positions of the predicted profiles - - This adds more keys to self._json['predictions'] created by the fit_predict method - - Returns - ------- - dict - """ - - def blank_error(): - return {'distance': {'value': None, - 'unit': 'km'}, - 'bearing': {'value': None, - 'unit': 'degree'}, - 'time': {'value': None, - 'unit': 'hour'} - } - - cyc0 = self.obs_cycles[0] - if self._json is None: - raise ValueError("Please call `fit_predict` first") - recovery_predictions = self._json['predictions'] - - for sim_c in recovery_predictions.keys(): - this_prediction = recovery_predictions[sim_c] - if sim_c + cyc0 in self.obs_cycles: - error = blank_error() - - this_obs_profile = self.obs[self.obs['cyc'] == sim_c + cyc0] - xobs = this_obs_profile['longitude'].iloc[0] - yobs = this_obs_profile['latitude'].iloc[0] - tobs = this_obs_profile['date'].iloc[0] - - prev_obs_profile = self.obs[self.obs['cyc'] == sim_c + cyc0 - 1] - xobs0 = prev_obs_profile['longitude'].iloc[0] - yobs0 = prev_obs_profile['latitude'].iloc[0] - - xpred = this_prediction['location']['longitude'] - ypred = this_prediction['location']['latitude'] - tpred = pd.to_datetime(this_prediction['location']['time']) - - dd = haversine(xobs, yobs, xpred, ypred) - error['distance']['value'] = dd - - observed_bearing = bearing(xobs0, yobs0, xobs, yobs) - sim_bearing = bearing(xobs0, yobs0, xpred, ypred) - error['bearing']['value'] = sim_bearing - observed_bearing - - dt = pd.Timedelta(tpred - tobs) / np.timedelta64(1, 's') - # print(tpred, tobs, pd.Timedelta(tpred - tobs)) - error['time']['value'] = dt / 3600 # From seconds to hours - - this_prediction['location_error'] = error - recovery_predictions.update({sim_c: this_prediction}) - - self._json.update({'predictions': recovery_predictions}) - return self - - def _add_ref(self): - """Add observations data to internal data structure - - This adds more keys to self._json['predictions'] created by the fit_predict method - - """ - if self._json is None: - raise ValueError("Please call `predict` first") - - # Observed profiles that were simulated: - profiles_to_predict = [] - for cyc in self.sim_cycles: - this = {'wmo': int(self.WMO), - 'cycle_number': int(cyc), - 'url_float': argoplot.dashboard(self.WMO, url_only=True), - 'url_profile': "", - 'location': {'longitude': None, - 'latitude': None, - 'time': None} - } - if cyc in self.obs_cycles: - this['url_profile'] = get_ea_profile_page_url(self.WMO, cyc) - this_df = self.obs[self.obs['cyc'] == cyc] - this['location']['longitude'] = this_df['longitude'].iloc[0] - this['location']['latitude'] = this_df['latitude'].iloc[0] - this['location']['time'] = this_df['date'].iloc[0].isoformat() - profiles_to_predict.append(this) - - self._json.update({'observations': profiles_to_predict}) - - # Observed profile used as initial conditions to the simulation: - cyc = self.obs_cycles[0] - this_df = self.obs[self.obs['cyc'] == cyc] - self._json.update({'initial_profile': {'wmo': int(self.WMO), - 'cycle_number': int(cyc), - 'url_float': argoplot.dashboard(self.WMO, url_only=True), - 'url_profile': get_ea_profile_page_url(self.WMO, cyc), - 'location': {'longitude': this_df['longitude'].iloc[0], - 'latitude': this_df['latitude'].iloc[0], - 'time': this_df['date'].iloc[0].isoformat() - } - }}) - - # - return self - - def add_metrics(self, VFvel=None): - """Compute more metrics to understand the prediction error - - 1. Compute a transit time to cover the distance error - (assume a 12 kts boat speed with 1 kt = 1.852 km/h) - - 1. Compute the possible drift due to the time lag between the predicted profile timing and the expected one - - This adds more keys to self._json['predictions'] created by the fit_predict method - - """ - cyc0 = self.obs_cycles[0] - if self._json is None: - raise ValueError("Please call `predict` first") - recovery_predictions = self._json['predictions'] - - for sim_c in recovery_predictions.keys(): - this_prediction = recovery_predictions[sim_c] - if sim_c + cyc0 in self.obs_cycles and 'location_error' in this_prediction.keys(): - - error = this_prediction['location_error'] - metrics = {} - - # Compute a transit time to cover the distance error: - metrics['transit'] = {'value': None, - 'unit': 'hour', - 'comment': 'Transit time to cover the distance error ' - '(assume a 12 kts boat speed with 1 kt = 1.852 km/h)'} - - if error['distance']['value'] is not None: - metrics['transit']['value'] = pd.Timedelta(error['distance']['value'] / (12 * 1.852), - 'h').seconds / 3600. - - # Compute the possible drift due to the time lag between the predicted profile timing and the expected one: - if VFvel is not None: - xpred = this_prediction['location']['longitude'] - ypred = this_prediction['location']['latitude'] - tpred = this_prediction['location']['time'] - dsc = VFvel.field.interp( - {VFvel.dim['lon']: xpred, - VFvel.dim['lat']: ypred, - VFvel.dim['time']: tpred, - VFvel.dim['depth']: - VFvel.field[{VFvel.dim['depth']: 0}][VFvel.dim['depth']].values[np.newaxis][0]} - ) - velc = np.sqrt(dsc[VFvel.var['U']] ** 2 + dsc[VFvel.var['V']] ** 2).values[np.newaxis][0] - metrics['surface_drift'] = {'value': None, - 'unit': 'km', - 'surface_currents_speed': None, - 'surface_currents_speed_unit': 'm/s', - 'comment': 'Drift by surface currents due to the float ascent time error ' - '(difference between simulated profile time and the observed one).'} - if error['time']['value'] is not None: - metrics['surface_drift']['value'] = (error['time']['value'] * 3600 * velc / 1e3) - metrics['surface_drift']['surface_currents_speed'] = velc - - # - this_prediction['metrics'] = metrics - recovery_predictions.update({sim_c: this_prediction}) - - self._json.update({"predictions": recovery_predictions}) - return self - - -class SimPredictor_3(SimPredictor_2): - - def plot_predictions(self, - VFvel, - cfg, - sim_suffix='', # get_sim_suffix(this_args, cfg) - s=0.2, - alpha=False, - save_figure=False, - workdir='.', - figsize=None, - dpi=120, - orient='portrait'): - ebox = self.bbox(s=s) - pred_traj = self.trajectory - - if orient == 'portrait': - if self.n_cycles == 1: - nrows, ncols = 2, 1 - if figsize is None: - figsize = (5, 5) - else: - nrows, ncols = self.n_cycles, 2 - if figsize is None: - figsize = (5, (self.n_cycles-1)*5) - else: - if self.n_cycles == 1: - nrows, ncols = 1, 2 - else: - nrows, ncols = 2, self.n_cycles - if figsize is None: - figsize = (ncols*5, 5) - - def plot_this(this_ax, i_cycle, ip): - df_sim = self.swarm[self.swarm['cyc'] == i_cycle + 1] - weights = self._prediction_data['cyc'][i_cycle + 1]['weights'].values - if self.sim_cycles[i_cycle] in self.obs_cycles: - this_profile = self.obs[self.obs['cyc'] == self.sim_cycles[i_cycle]] - else: - this_profile = None - - xpred = self.predictions['predictions'][i_cycle + 1]['location']['longitude'] - ypred = self.predictions['predictions'][i_cycle + 1]['location']['latitude'] - - this_ax.set_extent(ebox) - this_ax = map_add_features(ax[ix]) - - v = VFvel.field.isel(time=0).interp(depth=cfg.mission['parking_depth']) - v.plot.quiver(x="longitude", - y="latitude", - u=VFvel.var['U'], - v=VFvel.var['V'], - ax=this_ax, - color='grey', - alpha=0.5, - scale=5, - add_guide=False) - - this_ax.plot(df_sim['deploy_lon'], df_sim['deploy_lat'], '.', - markersize=3, - color='grey', - alpha=0.1, - markeredgecolor=None, - zorder=0) - - this_ax.plot(pred_traj[:, 0], pred_traj[:, 1], color='k', linewidth=1, marker='+') - this_ax.plot(xpred, ypred, color='g', marker='+') - - w = weights / np.max(np.abs(weights), axis=0) - ii = np.argsort(w) - cmap = plt.cm.cool - # cmap = plt.cm.Reds - - if ip == 0: - x, y = df_sim['deploy_lon'], df_sim['deploy_lat'] - title = 'Initial virtual float positions' - if not alpha: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - else: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - alpha=w[ii], - marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - elif ip == 1: - x, y = df_sim['longitude'], df_sim['latitude'] - title = 'Final virtual float positions' - if not alpha: - this_ax.scatter(x, y, c=w, marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - else: - this_ax.scatter(x, y, c=w, marker='o', s=4, alpha=w, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - elif ip == 2: - x, y = df_sim['rel_lon'], df_sim['rel_lat'] - title = 'Final virtual floats positions relative to observed float' - if not alpha: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - else: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - marker='o', s=4, alpha=w[ii], edgecolor=None, vmin=0, vmax=1, cmap=cmap) - - # Display full trajectory prediction: - if ip != 0 and this_profile is not None: - this_ax.arrow(this_profile['longitude'].iloc[0], - this_profile['latitude'].iloc[0], - xpred - this_profile['longitude'].iloc[0], - ypred - this_profile['latitude'].iloc[0], - length_includes_head=True, fc='k', ec='c', head_width=0.025, zorder=10) - this_ax.plot(xpred, ypred, 'k+', zorder=10) - - this_ax.set_title("") - # this_ax.set_ylabel("Cycle %i predictions" % (i_cycle+1)) - this_ax.set_title("%s\nCycle %i predictions" % (title, self.sim_cycles[i_cycle]), fontsize=6) - - fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, dpi=dpi, - subplot_kw={'projection': ccrs.PlateCarree()}, - sharex=True, sharey=True) - ax, ix = ax.flatten(), -1 - - if orient == 'portrait': - rows = range(self.n_cycles) - cols = [1, 2] - else: - rows = [1, 2] - cols = range(self.n_cycles) - - if orient == 'portrait': - for i_cycle in rows: - for ip in cols: - ix += 1 - plot_this(ax[ix], i_cycle, ip) - else: - for ip in rows: - for i_cycle in cols: - ix += 1 - plot_this(ax[ix], i_cycle, ip) - - # log.debug("Start to write metrics string") - # - # xpred = SP.prediction[i_cycle + 1]['location']['longitude']['value'] - # - # err = recovery['prediction_location_error'] - # met = recovery['prediction_metrics'] - # if this_profile.shape[0] > 1: - # # err_str = "Prediction vs Truth: [%0.2fkm, $%0.2f^o$]" % (err['distance'], err['bearing']) - # err_str = "Prediction errors: [dist=%0.2f%s, bearing=$%0.2f^o$, time=%s]\n" \ - # "Distance error represents %s of transit at 12kt" % (err['distance']['value'], - # err['distance']['unit'], - # err['bearing']['value'], - # strfdelta(pd.Timedelta(err['time']['value'], 'h'), - # "{hours}H{minutes:02d}"), - # strfdelta(pd.Timedelta(met['transit']['value'], 'h'), - # "{hours}H{minutes:02d}")) - # else: - # err_str = "" - # - # fig.suptitle("VirtualFleet recovery prediction for WMO %i: \ - # starting from cycle %i, predicting cycle %i\n%s\n%s\n%s" % - # (wmo, cyc[0], cyc[1], get_cfg_str(cfg), err_str, "Prediction based on %s" % vel_name), fontsize=15) - - plt.tight_layout() - if save_figure: - save_figurefile(fig, 'vfrecov_predictions_%s' % sim_suffix, workdir) - - return fig, ax - - -class SimPredictor(SimPredictor_3): - - def to_json(self, fp=None): - kw = {'indent': 4, 'sort_keys': True, 'default': str} - if fp is not None: - if hasattr(fp, 'write'): - json.dump(self._json, fp, **kw) - else: - with open(fp, 'w') as f: - json.dump(self._json, f, **kw) - else: - results_js = json.dumps(self._json, **kw) - return results_js - - diff --git a/vfrecovery/core/trajfile_handler.py b/vfrecovery/core/trajfile_handler.py index 3f5ce96..7718e8e 100644 --- a/vfrecovery/core/trajfile_handler.py +++ b/vfrecovery/core/trajfile_handler.py @@ -9,7 +9,7 @@ from pathlib import Path from vfrecovery.utils.misc import get_cfg_str -from vfrecovery.plots.utils import get_HBOX, map_add_features, map_add_profiles, save_figurefile +from vfrecovery.plots.utils import map_add_features, save_figurefile from vfrecovery.json import Profile from vfrecovery.json import Metrics, TrajectoryLengths, PairwiseDistances, PairwiseDistancesState diff --git a/vfrecovery/core/trajfile_handler_legacy.py b/vfrecovery/core/trajfile_handler_legacy.py deleted file mode 100644 index 42943bc..0000000 --- a/vfrecovery/core/trajfile_handler_legacy.py +++ /dev/null @@ -1,407 +0,0 @@ -import xarray as xr -import pandas as pd -import numpy as np -import matplotlib -from scipy.signal import find_peaks -from sklearn.metrics import pairwise_distances -import matplotlib.pyplot as plt - -from vfrecovery.utils.misc import get_cfg_str -from vfrecovery.plots.utils import save_figurefile -from vfrecovery.json import Profile - - -class Trajectories: - """Trajectory file manager for VFrecovery - - Examples: - --------- - T = Trajectories(traj_zarr_file) - T.n_floats - T.sim_cycles - df = T.to_index() - df = T.get_index().add_distances() - jsdata, fig, ax = T.analyse_pairwise_distances(cycle=1, show_plot=True) - """ - - def __init__(self, zfile): - self.zarr_file = zfile - self.obj = xr.open_zarr(zfile) - self._index = None - - @property - def n_floats(self): - # len(self.obj['trajectory']) - return self.obj['trajectory'].shape[0] - - @property - def sim_cycles(self): - """Return list of cycles simulated""" - cycs = np.unique(self.obj['cycle_number']) - last_obs_phase = \ - self.obj.where(self.obj['cycle_number'] == cycs[-1])['cycle_phase'].isel(trajectory=0).isel(obs=-1).values[ - np.newaxis][0] - if last_obs_phase < 3: - cycs = cycs[0:-1] - return cycs - - def __repr__(self): - summary = [""] - summary.append("Swarm size: %i floats" % self.n_floats) - start_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=0).values) - end_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=-1).values) - summary.append("Simulation length: %s, from %s to %s" % ( - pd.Timedelta(end_date - start_date, 'd'), start_date.strftime("%Y/%m/%d"), end_date.strftime("%Y/%m/%d"))) - return "\n".join(summary) - - # def to_index_par(self) -> pd.DataFrame: - # # Deployment loc: - # deploy_lon, deploy_lat = self.obj.isel(obs=0)['lon'].values, self.obj.isel(obs=0)['lat'].values - # - # def worker(ds, cyc, x0, y0): - # mask = np.logical_and((ds['cycle_number'] == cyc).compute(), - # (ds['cycle_phase'] >= 3).compute()) - # this_cyc = ds.where(mask, drop=True) - # if len(this_cyc['time']) > 0: - # data = { - # 'date': this_cyc.isel(obs=-1)['time'].values, - # 'latitude': this_cyc.isel(obs=-1)['lat'].values, - # 'longitude': this_cyc.isel(obs=-1)['lon'].values, - # 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, - # 'cyc': cyc, - # # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, - # 'deploy_lon': x0, - # 'deploy_lat': y0, - # } - # return pd.DataFrame(data) - # else: - # return None - # - # cycles = np.unique(self.obj['cycle_number']) - # rows = [] - # with concurrent.futures.ThreadPoolExecutor() as executor: - # future_to_url = { - # executor.submit( - # worker, - # self.obj, - # cyc, - # deploy_lon, - # deploy_lat - # ): cyc - # for cyc in cycles - # } - # futures = concurrent.futures.as_completed(future_to_url) - # for future in futures: - # data = None - # try: - # data = future.result() - # except Exception: - # raise - # finally: - # rows.append(data) - # - # rows = [r for r in rows if r is not None] - # df = pd.concat(rows).reset_index() - # df['wmo'] = df['wmo'].astype(int) - # df['cyc'] = df['cyc'].astype(int) - # # df['cycle_phase'] = df['cycle_phase'].astype(int) - # self._index = df - # - # return self._index - - def to_index(self) -> pd.DataFrame: - """Compute and return index (profile dataframe from trajectory dataset) - - Create a Profile index :class:`pandas.dataframe` with columns: [data, latitude ,longitude, wmo, cyc, deploy_lon, deploy_lat] - from a trajectory :class:`xarray.dataset`. - - There is one dataframe row for each dataset trajectory cycle. - - We use the last trajectory point of given cycle number (with cycle phase >= 3) to identify a profile location. - - If they are N trajectories simulating C cycles, there will be about a maximum of N*C rows in the dataframe. - - Returns - ------- - :class:`pandas.dataframe` - """ - if self._index is None: - - # Deployment loc: - deploy_lon, deploy_lat = self.obj.isel(obs=0)['lon'].values, self.obj.isel(obs=0)['lat'].values - - def worker(ds, cyc, x0, y0): - mask = np.logical_and((ds['cycle_number'] == cyc).compute(), - (ds['cycle_phase'] >= 3).compute()) - this_cyc = ds.where(mask, drop=True) - if len(this_cyc['time']) > 0: - data = { - 'date': this_cyc.isel(obs=-1)['time'].values, - 'latitude': this_cyc.isel(obs=-1)['lat'].values, - 'longitude': this_cyc.isel(obs=-1)['lon'].values, - 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, - 'cyc': cyc, - # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, - 'deploy_lon': x0, - 'deploy_lat': y0, - } - return pd.DataFrame(data) - else: - return None - - cycles = np.unique(self.obj['cycle_number']) - rows = [] - for cyc in cycles: - df = worker(self.obj, cyc, deploy_lon, deploy_lat) - rows.append(df) - rows = [r for r in rows if r is not None] - df = pd.concat(rows).reset_index() - df['wmo'] = df['wmo'].astype(int) - df['cyc'] = df['cyc'].astype(int) - # df['cycle_phase'] = df['cycle_phase'].astype(int) - self._index = df - - return self._index - - def get_index(self): - """Compute index and return self""" - self.to_index() - return self - - def add_distances(self, origin: Profile = None) -> pd.DataFrame: - """Compute profiles distance to some origin - - Returns - ------- - :class:`pandas.dataframe` - """ - - # Compute distance between the predicted profile and the initial profile location from the deployment plan - # We assume that virtual floats are sequentially taken from the deployment plan - # Since distances are very short, we compute a simple rectangular distance - - # Observed cycles: - # obs_cyc = np.unique(this_profile['cyc']) - - # Simulated cycles: - # sim_cyc = np.unique(this_df['cyc']) - - df = self._index - - x2, y2 = origin.location.longitude, origin.location.latitude # real float initial position - df['distance'] = np.nan - df['rel_lon'] = np.nan - df['rel_lat'] = np.nan - df['distance_origin'] = np.nan - - def worker(row): - # Simulation profile coordinates: - x0, y0 = row['deploy_lon'], row['deploy_lat'] # virtual float initial position - x1, y1 = row['longitude'], row['latitude'] # virtual float position - - # Distance between each pair of cycles of virtual floats: - dist = np.sqrt((y1 - y0) ** 2 + (x1 - x0) ** 2) - row['distance'] = dist - - # Shift between each pair of cycles: - dx, dy = x1 - x0, y1 - y0 - # Get a relative displacement from real float initial position: - row['rel_lon'] = x2 + dx - row['rel_lat'] = y2 + dy - - # Distance between the predicted profile and the observed initial profile - dist = np.sqrt((y2 - y0) ** 2 + (x2 - x0) ** 2) - row['distance_origin'] = dist - - return row - - df = df.apply(worker, axis=1) - self._index = df - - return self._index - - def analyse_pairwise_distances(self, - cycle: int = 1, - show_plot: bool = True, - save_figure: bool = False, - workdir: str = '.', - sim_suffix = None, - this_cfg = None, - this_args: dict = None): - - def get_hist_and_peaks(this_d): - x = this_d.flatten() - x = x[~np.isnan(x)] - x = x[:, np.newaxis] - hist, bin_edges = np.histogram(x, bins=100, density=1) - # dh = np.diff(bin_edges[0:2]) - peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) - return {'pdf': hist, 'bins': bin_edges[0:-1], 'Npeaks': len(peaks)} - - # Squeeze traj file to the first predicted cycle (sim can have more than 1 cycle) - ds = self.obj.where((self.obj['cycle_number'] == cycle).compute(), drop=True) - ds = ds.compute() - - # Compute trajectories relative to the single/only real float initial position: - lon0, lat0 = self.obj.isel(obs=0)['lon'].values[0], self.obj.isel(obs=0)['lat'].values[0] - lon, lat = ds['lon'].values, ds['lat'].values - ds['lonc'] = xr.DataArray(lon - np.broadcast_to(lon[:, 0][:, np.newaxis], lon.shape) + lon0, - dims=['trajectory', 'obs']) - ds['latc'] = xr.DataArray(lat - np.broadcast_to(lat[:, 0][:, np.newaxis], lat.shape) + lat0, - dims=['trajectory', 'obs']) - - # Compute trajectory lengths: - ds['length'] = np.sqrt(ds.diff(dim='obs')['lon'] ** 2 + ds.diff(dim='obs')['lat'] ** 2).sum(dim='obs') - ds['lengthc'] = np.sqrt(ds.diff(dim='obs')['lonc'] ** 2 + ds.diff(dim='obs')['latc'] ** 2).sum(dim='obs') - - # Compute initial points pairwise distances, PDF and nb of peaks: - X = ds.isel(obs=0) - X = X.isel(trajectory=~np.isnan(X['lon'])) - X0 = np.array((X['lon'].values, X['lat'].values)).T - d0 = pairwise_distances(X0, n_jobs=-1) - d0 = np.triu(d0) - d0[d0 == 0] = np.nan - - x0 = d0.flatten() - x0 = x0[~np.isnan(x0)] - x0 = x0[:, np.newaxis] - - hist0, bin_edges0 = np.histogram(x0, bins=100, density=1) - dh0 = np.diff(bin_edges0[0:2]) - peaks0, _ = find_peaks(hist0 / np.max(hist0), height=.4, distance=20) - - # Compute final points pairwise distances, PDF and nb of peaks: - X = ds.isel(obs=-1) - X = X.isel(trajectory=~np.isnan(X['lon'])) - dsf = X - X = np.array((X['lon'].values, X['lat'].values)).T - d = pairwise_distances(X, n_jobs=-1) - d = np.triu(d) - d[d == 0] = np.nan - - x = d.flatten() - x = x[~np.isnan(x)] - x = x[:, np.newaxis] - - hist, bin_edges = np.histogram(x, bins=100, density=1) - dh = np.diff(bin_edges[0:2]) - peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) - - # Compute final points pairwise distances (relative traj), PDF and nb of peaks: - X1 = ds.isel(obs=-1) - X1 = X1.isel(trajectory=~np.isnan(X1['lonc'])) - dsfc = X1 - X1 = np.array((X1['lonc'].values, X1['latc'].values)).T - d1 = pairwise_distances(X1, n_jobs=-1) - d1 = np.triu(d1) - d1[d1 == 0] = np.nan - - x1 = d1.flatten() - x1 = x1[~np.isnan(x1)] - x1 = x1[:, np.newaxis] - - hist1, bin_edges1 = np.histogram(x1, bins=100, density=1) - dh1 = np.diff(bin_edges1[0:2]) - peaks1, _ = find_peaks(hist1 / np.max(hist1), height=.4, distance=20) - - # Compute the overlapping between the initial and relative state PDFs: - bin_unif = np.arange(0, np.max([bin_edges0, bin_edges1]), np.min([dh0, dh1])) - dh_unif = np.diff(bin_unif[0:2]) - hist0_unif = np.interp(bin_unif, bin_edges0[0:-1], hist0) - hist_unif = np.interp(bin_unif, bin_edges[0:-1], hist) - hist1_unif = np.interp(bin_unif, bin_edges1[0:-1], hist1) - - # Area under hist1 AND hist0: - # overlapping = np.sum(hist1_unif[hist0_unif >= hist1_unif]*dh_unif) - overlapping = np.sum(hist_unif[hist0_unif >= hist_unif] * dh_unif) - - # Ratio of the max PDF ranges: - # staggering = np.max(bin_edges1)/np.max(bin_edges0) - staggering = np.max(bin_edges) / np.max(bin_edges0) - - # Store metrics in a dict: - prediction_metrics = {} - - prediction_metrics['trajectory_lengths'] = {'median': np.nanmedian(ds['length'].values), - 'std': np.nanstd(ds['length'].values)} - - prediction_metrics['pairwise_distances'] = { - 'initial_state': {'median': np.nanmedian(d0), 'std': np.nanstd(d0), 'nPDFpeaks': len(peaks0)}, - 'final_state': {'median': np.nanmedian(d), 'std': np.nanstd(d), 'nPDFpeaks': len(peaks)}, - 'relative_state': {'median': np.nanmedian(d1), 'std': np.nanstd(d1), 'nPDFpeaks': len(peaks1)}, - 'overlapping': {'value': overlapping, - 'comment': 'Overlapping area between PDF(initial_state) and PDF(final_state)'}, - 'staggering': {'value': staggering, 'comment': 'Ratio of PDF(initial_state) vs PDF(final_state) ranges'}, - 'score': {'value': overlapping / len(peaks), 'comment': 'overlapping/nPDFpeaks(final_state)'}} - - if np.isinf(overlapping / len(peaks)): - raise ValueError("Can't compute the prediction score, infinity !") - - ratio = prediction_metrics['pairwise_distances']['final_state']['std'] / \ - prediction_metrics['pairwise_distances']['initial_state']['std'] - prediction_metrics['pairwise_distances']['std_ratio'] = ratio - - # Figure: - if show_plot: - backend = matplotlib.get_backend() - if this_args is not None and this_args.json: - matplotlib.use('Agg') - - fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(18, 10), dpi=90) - ax, ix = ax.flatten(), -1 - cmap = plt.cm.coolwarm - - ix += 1 - dd = dsf['length'].values - ax[ix].plot(X0[:, 0], X0[:, 1], '.', markersize=3, color='grey', alpha=0.5, markeredgecolor=None, zorder=0) - ax[ix].scatter(X[:, 0], X[:, 1], c=dd, zorder=10, s=3, cmap=cmap) - ax[ix].grid() - this_traj = int(dsf.isel(trajectory=np.argmax(dd))['trajectory'].values[np.newaxis][0]) - ax[ix].plot(ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lon'], - ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lat'], 'r', - zorder=13, label='Longest traj.') - this_traj = int(dsf.isel(trajectory=np.argmin(dd))['trajectory'].values[np.newaxis][0]) - ax[ix].plot(ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lon'], - ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lat'], 'b', - zorder=13, label='Shortest traj.') - ax[ix].legend() - ax[ix].set_title('Trajectory lengths') - - ix += 1 - ax[ix].plot(bin_edges0[0:-1], hist0, label='Initial (%i peak)' % len(peaks0), color='gray') - ax[ix].plot(bin_edges[0:-1], hist, label='Final (%i peak)' % len(peaks), color='lightblue') - ax[ix].plot(bin_edges[peaks], hist[peaks], "x", label='Peaks') - ax[ix].legend() - ax[ix].grid() - ax[ix].set_xlabel('Pairwise distance [degree]') - line1 = "Staggering: %0.4f" % staggering - line2 = "Overlapping: %0.4f" % overlapping - line3 = "Score: %0.4f" % (overlapping / len(peaks)) - ax[ix].set_title("Pairwise distances PDF: [%s / %s / %s]" % (line1, line2, line3)) - - if this_args is not None: - line0 = "VirtualFleet recovery swarm simulation for WMO %i, starting from cycle %i, predicting cycle %i\n%s" % \ - (this_args.wmo, this_args.cyc[0] - 1, this_args.cyc[0], get_cfg_str(this_cfg)) - line1 = "Simulation made with %s and %i virtual floats" % (this_args.velocity, this_args.nfloats) - else: - line0 = "VirtualFleet recovery swarm simulation for cycle %i" % cycle - line1 = "Simulation made with %i virtual floats" % (self.n_floats) - - fig.suptitle("%s\n%s" % (line0, line1), fontsize=15) - plt.tight_layout() - - if save_figure: - if sim_suffix is not None: - filename = 'vfrecov_metrics01_%s_cyc%i' % (sim_suffix, cycle) - else: - filename = 'vfrecov_metrics01_cyc%i' % (cycle) - save_figurefile(fig, filename, workdir) - - if this_args is not None and this_args.json: - matplotlib.use(backend) - - if show_plot: - return prediction_metrics, fig, ax - else: - return prediction_metrics - diff --git a/vfrecovery/core/utils.py b/vfrecovery/core/utils.py index a908248..08eb52b 100644 --- a/vfrecovery/core/utils.py +++ b/vfrecovery/core/utils.py @@ -7,6 +7,8 @@ from vfrecovery.json import Profile, MetaData +pp_obj = lambda x: "\n%s" % "\n".join(["\t%s" % line for line in x.__repr__().split("\n")]) + def ArgoIndex2df_obs(a_wmo, a_cyc, cache:bool=False, cachedir:str='.') -> pd.DataFrame: """Retrieve WMO/CYC Argo index entries as :class:`pd.DataFrame` From f9c75268afb384ef7b731f192e335a8f7be82431 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 29 Mar 2024 09:18:58 +0100 Subject: [PATCH 25/38] misc --- README.md | 42 +++++++++------- vfrecovery/__init__.py | 1 + .../command_line_interface/group_predict.py | 19 +++++-- vfrecovery/core/predict.py | 7 +++ vfrecovery/core/simulation_handler.py | 49 +++++++++++++------ vfrecovery/core/utils.py | 2 + vfrecovery/downloaders/core.py | 39 ++++++++++----- vfrecovery/downloaders/glorys.py | 37 ++++++++++++-- vfrecovery/python_interface/predict.py | 8 ++- 9 files changed, 151 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 8de145c..953cfb9 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,12 @@ Primary groups of commands are ``predict`` and ``describe``. ``` Usage: vfrecovery predict [OPTIONS] WMO CYC - Execute VirtualFleet-Recovery predictor + Execute the VirtualFleet-Recovery predictor + + WMO is the float World Meteorological Organisation number. + + CYC is the cycle number location to predict. If you want to simulate more + than 1 cycle, use the `n_predictions` option (see below). Options: -v, --velocity TEXT Velocity field to use. Possible values are: @@ -35,13 +40,18 @@ Options: --cfg_free_surface_drift INTEGER Virtual cycle number to start free surface drift, inclusive [default: 9999] - -np, --n_predictions INTEGER Number of profiles to simulate after cycle + -np, --n_predictions INTEGER Number of profiles to predict after cycle specified with argument 'CYC' [default: 0] - -nf, --n_floats INTEGER Number of virtual floats simulated to make - predictions [default: 100] + -nf, --n_floats INTEGER Swarm size, i.e. the number of virtual + floats simulated to make predictions + [default: 100] -s, --domain_min_size FLOAT Minimal size (deg) of the simulation domain around the initial float position [default: - 12] + 5] + --overwrite Should past simulation data be overwritten + or not, for a similar set of arguments + --lazy / --no-lazy Load velocity data in lazy mode (not saved + on file). [default: lazy] --log_level [DEBUG|INFO|WARN|ERROR|CRITICAL|QUIET] Set the details printed to console by the command (based on standard logging library). @@ -50,7 +60,7 @@ Options: Examples: - vfrecovery predict 6903091 112 + vfrecovery predict 6903091 112 ``` ### vfrecovery describe @@ -101,7 +111,9 @@ vfrecovery.predict( cfg_profile_depth: float = None, cfg_free_surface_drift: int = 9999, n_floats: int = 100, - domain_min_size: float = 12.0, + domain_min_size: float = 5.0, + overwrite: bool = False, + lazy: bool = True, log_level: str = 'INFO', ) ``` @@ -120,7 +132,7 @@ vfrecovery predict WMO CYC1 CYC2 CYC3 Options: ```bash vfrecovery predict --n_predictions 3 WMO CYC0 -vfrecovery predict -n 3 WMO CYC0 +vfrecovery predict -np 3 WMO CYC0 vfrecovery predict --n_floats 2000 WMO CYC vfrecovery predict -nf 2000 WMO CYC @@ -129,29 +141,21 @@ vfrecovery predict --velocity GLORYS WMO CYC vfrecovery predict -v GLORYS WMO CYC vfrecovery predict --cfg_parking_depth 200 WMO CYC -vfrecovery predict --cfg_parking_depth [200, 1000] WMO CYC1 CYC2 vfrecovery predict --cfg_cycle_duration 60 WMO CYC vfrecovery predict --cfg_profile_depth 1000 WMO CYC ``` - -## Describe results -```bash -vfrecovery describe WMO CYC -vfrecovery describe WMO CYC1 CYC2 CYC3 -``` +## Describe results ```bash vfrecovery describe velocity WMO CYC +vfrecovery describe obs WMO CYC1 CYC2 CYC3 ``` -## Other commands +## Other possible commands ```bash -vfrecovery whiterun WMO CYC -vfrecovery whiterun WMO CYC1 CYC2 CYC3 - vfrecovery meetwith "cruise_track.csv" WMO CYC0 ``` diff --git a/vfrecovery/__init__.py b/vfrecovery/__init__.py index 6ae85fb..f98d93b 100644 --- a/vfrecovery/__init__.py +++ b/vfrecovery/__init__.py @@ -18,3 +18,4 @@ logging.Formatter.converter = time.gmtime from vfrecovery.python_interface.predict import predict +from vfrecovery.downloaders import Armor3d, Glorys diff --git a/vfrecovery/command_line_interface/group_predict.py b/vfrecovery/command_line_interface/group_predict.py index 2bae26b..3c28720 100644 --- a/vfrecovery/command_line_interface/group_predict.py +++ b/vfrecovery/command_line_interface/group_predict.py @@ -90,7 +90,7 @@ def cli_group_predict() -> None: required=False, default=0, show_default=True, - help="Number of profiles to simulate after cycle specified with argument 'CYC'", + help="Number of profiles to predict after cycle specified with argument 'CYC'", ) @click.option( "-nf", "--n_floats", @@ -98,16 +98,25 @@ def cli_group_predict() -> None: required=False, default=100, show_default=True, - help="Number of virtual floats simulated to make predictions", + help="Swarm size, i.e. the number of virtual floats simulated to make predictions", ) @click.option( "-s", "--domain_min_size", type=float, required=False, - default=12, + default=5, show_default=True, help="Minimal size (deg) of the simulation domain around the initial float position", ) +@click.option('--overwrite', + is_flag=True, + help="Should past simulation data be overwritten or not, for a similar set of arguments" + ) +@click.option('--lazy/--no-lazy', + default=True, + show_default=True, + help="Load velocity data in lazy mode (not saved on file)." + ) @click.option( "--log_level", type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), @@ -132,6 +141,8 @@ def predict( cfg_free_surface_drift, n_floats, domain_min_size, + overwrite, + lazy, log_level, ) -> None: """ @@ -156,5 +167,7 @@ def predict( cfg_free_surface_drift=cfg_free_surface_drift, n_floats=n_floats, domain_min_size=domain_min_size, + overwrite=overwrite, + lazy=lazy, log_level=log_level) # blank_logger.info(json_dump) diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index dadd1d6..ae9ab9c 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -6,6 +6,7 @@ import logging from .simulation_handler import Simulation +from .utils import pp_obj root_logger = logging.getLogger("vfrecovery_root_logger") sim_logger = logging.getLogger("vfrecovery_simulation") @@ -49,6 +50,8 @@ def predict_function( cfg_free_surface_drift: int, n_floats: int, domain_min_size: float, + overwrite: bool, + lazy: bool, log_level: str, ) -> str: """ @@ -67,6 +70,8 @@ def predict_function( cfg_free_surface_drift n_floats domain_min_size + overwrite + lazy log_level Returns @@ -120,6 +125,8 @@ def predict_function( n_floats=n_floats, velocity=velocity, output_path=output_path, + overwrite=overwrite, + lazy=lazy, logger=log_this, ) S.setup(cfg_parking_depth=cfg_parking_depth, diff --git a/vfrecovery/core/simulation_handler.py b/vfrecovery/core/simulation_handler.py index 97b9c48..321f585 100644 --- a/vfrecovery/core/simulation_handler.py +++ b/vfrecovery/core/simulation_handler.py @@ -16,7 +16,6 @@ from .trajfile_handler import Trajectories from .analysis_handler import RunAnalyser - root_logger = logging.getLogger("vfrecovery_root_logger") @@ -43,7 +42,6 @@ def error(txt) -> 'default_logger': return default_logger(txt, 'ERROR') - class Simulation: """Base class to execute the simulation/prediction workflow @@ -61,6 +59,9 @@ def __init__(self, wmo, cyc, **kwargs): self.output_path = kwargs['output_path'] self.logger = default_logger if 'logger' not in kwargs else kwargs['logger'] + self.overwrite = kwargs['overwrite'] if 'overwrite' in kwargs else False + self.lazy = kwargs['lazy'] if 'lazy' in kwargs else True + self.logger.info("%s \\" % ("=" * 55)) self.logger.info("STARTING SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (self.wmo, self.cyc[1])) @@ -119,22 +120,31 @@ def _setup_load_velocity_data(self, **kwargs): domain, domain_center = get_domain(self.P_obs, kwargs['domain_min_size']) # and time: cycle_period = int(np.round(self.CFG.mission['cycle_duration'] / 24)) # Get the float cycle period (in days) - self.n_days = (len(self.cyc) - 1) * cycle_period + 1 - - self.logger.info("Loading %s velocity field to cover %i days starting on %s" % ( - self.MD.velocity_field, self.n_days, self.P_obs[0].location.time)) + self.n_days = len(self.cyc) * cycle_period + 1 + + self.logger.info("Velocity field should cover %i cycles of %i hours" % (len(self.cyc), 24 * cycle_period)) + self.logger.info("Loading %i days of %s velocity starting on %s" % ( + self.n_days, self.MD.velocity_field, self.P_obs[0].location.time)) + + self.ds_vel, velocity_file, new_file = get_velocity_field(domain, self.P_obs[0].location.time, + n_days=self.n_days, + output=self.output_path, + dataset=self.MD.velocity_field, + logger=self.logger, + lazy=self.lazy, + ) + if new_file: + # We force overwrite results because we're using a new velocity field + self.logger.warning("Found a new velocity field, force overwriting results") + self.overwrite = True - self.ds_vel, velocity_file = get_velocity_field(domain, self.P_obs[0].location.time, - n_days=self.n_days, - output=self.output_path, - dataset=self.MD.velocity_field) self.velocity_file = velocity_file self.logger.debug(pp_obj(self.ds_vel)) self.logger.info("Loaded %s field from %s to %s" % ( self.MD.velocity_field, pd.to_datetime(self.ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), pd.to_datetime(self.ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")) - ) + ) def setup(self, **kwargs): """Fulfill all requirements for the simulation""" @@ -142,15 +152,17 @@ def setup(self, **kwargs): self._setup_float_config(**kwargs) self._setup_load_velocity_data(**kwargs) self.logger.info("Simulation data will be registered under: %s%s*%s*" % (self.output_path, - os.path.sep, - get_simulation_suffix(self.MD))) + os.path.sep, + get_simulation_suffix(self.MD))) self.logger.debug("Setup terminated") return self def _execute_get_velocity(self): self.logger.info("Create a velocity object") self.VEL = Velocity(model='GLORYS12V1' if self.MD.velocity_field == 'GLORYS' else self.MD.velocity_field, - src=self.ds_vel) + src=self.ds_vel, + logger=self.logger, + ) self.logger.info("Plot velocity") for it in [0, -1]: @@ -163,7 +175,8 @@ def _execute_get_plan(self): # VirtualFleet, get a deployment plan: self.logger.info("Create a deployment plan") df_plan = setup_deployment_plan(self.P_obs[0], nfloats=self.MD.n_floats) - self.logger.info("Set %i virtual floats to deploy (i.e. swarm size = %i)" % (df_plan.shape[0], df_plan.shape[0])) + self.logger.info( + "Set %i virtual floats to deploy (i.e. swarm size = %i)" % (df_plan.shape[0], df_plan.shape[0])) self.PLAN = {'lon': df_plan['longitude'], 'lat': df_plan['latitude'], @@ -182,6 +195,7 @@ def execute(self): fieldset=self.VEL, mission=self.CFG, verbose_events=False) + self.logger.debug(pp_obj(self.VFleet)) # Execute the simulation: self.logger.info("Starting simulation") @@ -192,7 +206,7 @@ def execute(self): # shutil.rmtree(output_path) self.traj_file = os.path.join(self.output_path, 'trajectories_%s.zarr' % get_simulation_suffix(self.MD)) - if os.path.exists(self.traj_file): + if os.path.exists(self.traj_file) and not self.overwrite: self.logger.warning("Using data from a previous similar run (no simulation executed)") else: self.VFleet.simulate(duration=timedelta(hours=self.n_days * 24 + 1), @@ -204,6 +218,7 @@ def execute(self): verbose_progress=True, ) self.logger.info("Simulation ended with success") + self.logger.info(pp_obj(self.VFleet)) return self def _predict_read_trajectories(self): @@ -274,7 +289,9 @@ def finish(self, execution_start: float, process_start: float): 'date': pd.to_datetime('now', utc=True), 'wall_time': pd.Timedelta(time.time() - execution_start, 's'), 'cpu_time': pd.Timedelta(time.process_time() - process_start, 's'), + 'description': None, }) + self.logger.debug(pp_obj(self.MD.computation)) self.run_file = os.path.join(self.output_path, 'results_%s.json' % get_simulation_suffix(self.MD)) self.to_json(fp=self.run_file) diff --git a/vfrecovery/core/utils.py b/vfrecovery/core/utils.py index 08eb52b..c3811d9 100644 --- a/vfrecovery/core/utils.py +++ b/vfrecovery/core/utils.py @@ -71,7 +71,9 @@ def get_simulation_suffix(md: MetaData) -> str: def get_domain(Plist, size): + # Get mean position of the observed profiles: c = [np.mean([p.location.longitude for p in Plist]), np.mean([p.location.latitude for p in Plist])] + # Set the domain: domain = [c[0] - size / 2, c[0] + size / 2, c[1] - size / 2, c[1] + size / 2] domain = [np.round(d, 3) for d in domain] diff --git a/vfrecovery/downloaders/core.py b/vfrecovery/downloaders/core.py index 0e86d9f..21205aa 100644 --- a/vfrecovery/downloaders/core.py +++ b/vfrecovery/downloaders/core.py @@ -3,14 +3,11 @@ import xarray as xr from . import Glorys, Armor3d -# import logging +from vfrecovery.core.utils import pp_obj -# log = logging.getLogger("vfrecovery.download.core") - - -def get_velocity_field(a_box, a_date, n_days=1, output='.', dataset='ARMOR3D'): - """Return the velocity field as an :class:xr.Dataset, download if needed +def get_velocity_field(a_box, a_date, n_days=1, output='.', dataset='ARMOR3D', logger=None, lazy=True) -> tuple: + """Return the velocity field as an :class:`xr.Dataset`, force download/save if not lazy Parameters ---------- @@ -19,24 +16,44 @@ def get_velocity_field(a_box, a_date, n_days=1, output='.', dataset='ARMOR3D'): n_days output dataset + logger + lazy + + Retuns + ------ + tuple """ + def get_velocity_filename(dataset, n_days): download_date = pd.to_datetime('now', utc='now').strftime("%Y%m%d") fname = os.path.join(output, 'velocity_%s_%idays_%s.nc' % (dataset, n_days, download_date)) return fname + velocity_file = get_velocity_filename(dataset, n_days) if not os.path.exists(velocity_file): + new = True # Define Data loader: loader = Armor3d if dataset == 'ARMOR3D' else Glorys - loader = loader(a_box, a_date - pd.Timedelta(1, 'D'), n_days=n_days) - # Load data from Copernicus Marine Data store: - ds = loader.to_xarray() + # Make an instance + # (we add a 1-day security delay at the beginning to make sure that we have velocity at the deployment time) + loader = loader(a_box, a_date - pd.Timedelta(1, 'D'), n_days=n_days+1, logger=logger) + + # Load data from the Copernicus Marine Data store: + ds = loader.to_xarray() # Lazy by default + if logger is not None: + logger.debug(pp_obj(loader)) # Save on file for later re-used: - ds.to_netcdf(velocity_file) + # (this can take a while and is often longer than the lazy mode !) + if not lazy: + if logger is not None: + logger.debug("Saving velocity on file for later re-used") + ds.to_netcdf(velocity_file) + else: + new = False ds = xr.open_dataset(velocity_file) - return ds, velocity_file + return ds, velocity_file, new diff --git a/vfrecovery/downloaders/glorys.py b/vfrecovery/downloaders/glorys.py index ff404c4..70c2f29 100644 --- a/vfrecovery/downloaders/glorys.py +++ b/vfrecovery/downloaders/glorys.py @@ -4,6 +4,32 @@ import pandas as pd import xarray as xr import copernicusmarine +import logging + + +logger = logging.getLogger("vfrecovery.downloaders") + +class default_logger: + + def __init__(self, txt, log_level): + """Log text""" + getattr(logger, log_level.lower())(txt) + + @staticmethod + def info(txt) -> 'default_logger': + return default_logger(txt, 'INFO') + + @staticmethod + def debug(txt) -> 'default_logger': + return default_logger(txt, 'DEBUG') + + @staticmethod + def warning(txt) -> 'default_logger': + return default_logger(txt, 'WARNING') + + @staticmethod + def error(txt) -> 'default_logger': + return default_logger(txt, 'ERROR') def get_glorys_forecast_from_datarmor(a_box, a_start_date, n_days=1): @@ -69,7 +95,7 @@ class Glorys: """ - def __init__(self, box, start_date, n_days=1, max_depth=2500): + def __init__(self, box, start_date, n_days=1, max_depth=2500, **kwargs): """ Parameters ---------- @@ -95,6 +121,10 @@ def __init__(self, box, start_date, n_days=1, max_depth=2500): self._loader = self._get_forecast self.dataset_id = "cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m" + self.logger = kwargs['logger'] if 'logger' in kwargs else default_logger + self.overwrite_metadata_cache = kwargs['overwrite_metadata_cache'] if 'overwrite_metadata_cache' in kwargs else False + self.disable_progress_bar = kwargs['disable_progress_bar'] if 'disable_progress_bar' in kwargs else False + def _get_this(self, dataset_id, dates): ds = copernicusmarine.open_dataset( dataset_id=dataset_id, @@ -106,7 +136,8 @@ def _get_this(self, dataset_id, dates): start_datetime=dates[0].strftime("%Y-%m-%dT%H:%M:%S"), end_datetime=dates[1].strftime("%Y-%m-%dT%H:%M:%S"), variables=['uo', 'vo'], - disable_progress_bar=True, + disable_progress_bar=self.disable_progress_bar, + overwrite_metadata_cache=self.overwrite_metadata_cache, ) return ds @@ -147,7 +178,7 @@ def to_xarray(self): def __repr__(self): summary = [""] summary.append("dataset_id: %s" % self.dataset_id) - summary.append("First day: %s" % self.start_date) + summary.append("Starting date: %s" % self.start_date) summary.append("N days: %s" % self.n_days) summary.append("Domain: %s" % self.box) summary.append("Max depth (m): %s" % self.max_depth) diff --git a/vfrecovery/python_interface/predict.py b/vfrecovery/python_interface/predict.py index 60ac24d..b240d23 100644 --- a/vfrecovery/python_interface/predict.py +++ b/vfrecovery/python_interface/predict.py @@ -15,7 +15,9 @@ def predict( cfg_profile_depth: float = None, cfg_free_surface_drift: int = 9999, n_floats: int = 100, - domain_min_size: float = 12., + domain_min_size: float = 5., + overwrite: bool = False, + lazy: bool = True, log_level: str = 'INFO', ): """ @@ -34,6 +36,8 @@ def predict( cfg_free_surface_drift n_floats domain_min_size + overwrite + lazy log_level Returns @@ -52,6 +56,8 @@ def predict( cfg_free_surface_drift=cfg_free_surface_drift, n_floats=n_floats, domain_min_size=domain_min_size, + overwrite=overwrite, + lazy=lazy, log_level=log_level, ) results = json.loads(results_json) From 1f880db8df49642e42761648c95d7fc13a404a46 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 29 Mar 2024 14:59:16 +0100 Subject: [PATCH 26/38] New simulation registry --- .../command_line_interface/group_describe.py | 14 +- .../command_line_interface/group_predict.py | 2 +- vfrecovery/core/__init__.py | 1 + vfrecovery/core/db.py | 240 ++++++++++++++++++ vfrecovery/core/predict.py | 24 +- vfrecovery/core/simulation_handler.py | 36 ++- vfrecovery/core/utils.py | 24 +- vfrecovery/downloaders/core.py | 7 +- vfrecovery/static/assets/.gitkeep | 0 .../static/assets/simulations_registry.pkl | Bin 0 -> 2041 bytes 10 files changed, 331 insertions(+), 17 deletions(-) create mode 100644 vfrecovery/core/db.py create mode 100644 vfrecovery/static/assets/.gitkeep create mode 100644 vfrecovery/static/assets/simulations_registry.pkl diff --git a/vfrecovery/command_line_interface/group_describe.py b/vfrecovery/command_line_interface/group_describe.py index b7e9d65..7dfb03b 100644 --- a/vfrecovery/command_line_interface/group_describe.py +++ b/vfrecovery/command_line_interface/group_describe.py @@ -5,6 +5,7 @@ from argopy import ArgoIndex from vfrecovery.utils.misc import list_float_simulation_folders +from vfrecovery.core.db import DB root_logger = logging.getLogger("vfrecovery_root_logger") @@ -21,7 +22,7 @@ def cli_group_describe() -> None: short_help="Describe VirtualFleet-Recovery data and simulation results", help=""" - TARGET select what is to be described. A string in: ['obs', 'velocity']. + TARGET select what is to be described. A string in: ['obs', 'velocity', 'run']. WMO is the float World Meteorological Organisation number @@ -65,8 +66,8 @@ def describe( root_logger.debug("DEBUG mode activated") # Validate arguments: - if target.lower() not in ["all", "obs", "velocity"]: - raise ValueError("The first argument TARGET must be one in ['all', 'obs', 'velocity']") + if target.lower() not in ["run", "obs", "velocity"]: + raise ValueError("The first argument TARGET must be one in ['run', 'obs', 'velocity']") assert is_wmo(wmo) wmo = check_wmo(wmo)[0] @@ -81,6 +82,13 @@ def describe( elif target == 'velocity': describe_velocity(wmo, cyc) + elif target == 'run': + describe_run(wmo, cyc) + + +def describe_run(wmo, cyc): + print(DB.read_data().T) + def describe_velocity(wmo, cyc): diff --git a/vfrecovery/command_line_interface/group_predict.py b/vfrecovery/command_line_interface/group_predict.py index 3c28720..19930d9 100644 --- a/vfrecovery/command_line_interface/group_predict.py +++ b/vfrecovery/command_line_interface/group_predict.py @@ -170,4 +170,4 @@ def predict( overwrite=overwrite, lazy=lazy, log_level=log_level) - # blank_logger.info(json_dump) + blank_logger.info(json_dump) diff --git a/vfrecovery/core/__init__.py b/vfrecovery/core/__init__.py index 3c591c3..b02c95e 100644 --- a/vfrecovery/core/__init__.py +++ b/vfrecovery/core/__init__.py @@ -2,3 +2,4 @@ from .trajfile_handler import Trajectories from .analysis_handler import RunAnalyser from .simulation_handler import Simulation +from .db import DB diff --git a/vfrecovery/core/db.py b/vfrecovery/core/db.py new file mode 100644 index 0000000..e0567b1 --- /dev/null +++ b/vfrecovery/core/db.py @@ -0,0 +1,240 @@ +""" +The primary goal of this module is to make it easy to determine if one simulation has already been done or not + +The goal is to provide a "database"-like set of functions to manage all simulations being performed + +We should provide methods: + - to record a new simulation + - to list all past simulations + - to search in all past simulations + + +What is defining a unique VFrevovery simulation ? +- WMO & CYC & n_predictions targets > 3 params (int, int, int) +- Floats configuration > at least 6 numeric parameters +- Velocity field: name, date of download, domain size > 3 params (str, datetime, int) +- Output path > 1 param (str/Path) +- Swarm size > 1 param (int) + +Rq: the velocity field time frame is set by the WMO/CYC/n_predictions targets. so there's no need for it. + +This first implementation relies on a simple local pickle file with a panda dataframe + +""" +from typing import List, Dict +from virtualargofleet import FloatConfiguration +from pathlib import Path +import pandas as pd +import numpy as np + +from .utils import make_hash_sha256 + +class DB: + """ + + >>> DB.dbfile + >>> DB.init() + >>> DB.clear() + >>> DB.isconnected() + >>> DB.read_data() # Return db content as :class:`pd.DataFrame` + + >>> data = {'wmo': 6903091, 'cyc': 120, 'n_predictions': 0, 'cfg': FloatConfiguration('recovery'), 'velocity': {'name': 'GLORYS', 'download': pd.to_datetime('now', utc=True), 'domain_size': 5}, 'output': Path('.'), 'swarm_size': 1000} + >>> DB.from_dict(data).register() # save to db + >>> DB.from_dict(data).checkout() # delete from db + + >>> partial_data = {'wmo': 6903091} + >>> DB.from_dict(partial_data) # Create new instance for actions + >>> DB.from_dict(partial_data).list() + + """ + wmo: int + cyc: int + n_predictions: int + cfg: FloatConfiguration + velocity: Dict + output: Path + swarm_size: int + + required: List = ['wmo', 'cyc', 'n_predictions', 'cfg_cycle_duration', + 'cfg_life_expectancy', 'cfg_parking_depth', 'cfg_profile_depth', + 'cfg_reco_free_surface_drift', 'cfg_vertical_speed', 'velocity_name', + 'velocity_download', 'velocity_domain_size', 'output', 'swarm_size'] + properties: List = ['wmo', 'cyc', 'n_predictions', 'cfg', 'velocity', 'output', 'swarm_size'] + + _data: pd.DataFrame + dbfile: Path = (Path(__file__).parent.parent).joinpath('static').joinpath('assets').joinpath( + "simulations_registry.pkl") + + def __init__(self, **kwargs): + # for key in self.required: + # if key not in kwargs: + # raise ValueError("Missing '%s' property" % key) + for key in kwargs: + if key in self.properties: + setattr(self, key, kwargs[key]) + + # Connect to database: + self.connect() + + @classmethod + def isconnected(cls) -> bool: + return cls.dbfile.exists() + + @classmethod + def clear(cls): + return cls.dbfile.unlink(missing_ok=True) + + @classmethod + def init(cls): + df = pd.DataFrame({'wmo': pd.Series(dtype='int'), + 'cyc': pd.Series(dtype='int'), + 'n_predictions': pd.Series(dtype='int'), + 'cfg_cycle_duration': pd.Series(dtype='float'), + 'cfg_life_expectancy': pd.Series(dtype='float'), + 'cfg_parking_depth': pd.Series(dtype='float'), + 'cfg_profile_depth': pd.Series(dtype='float'), + 'cfg_reco_free_surface_drift': pd.Series(dtype='float'), + 'cfg_vertical_speed': pd.Series(dtype='float'), + 'velocity_name': pd.Series(dtype='string'), + 'velocity_download': pd.Series(dtype='datetime64[ns]'), + 'velocity_domain_size': pd.Series(dtype='float'), + 'output': pd.Series(dtype='string'), + 'swarm_size': pd.Series(dtype='int'), + 'uid': pd.Series(dtype='string'), + }) + df.name = pd.to_datetime('now', utc=True) + cls._data = df + return cls + + @classmethod + def connect(cls): + """Connect to database and refresh data holder""" + if not cls.isconnected(): + cls.init() + cls._data.to_pickle(cls.dbfile) + else: + cls._data = pd.read_pickle(cls.dbfile) + cls._data['uid'] = cls._data.apply(lambda row: make_hash_sha256(row.to_dict()), axis=1) + return cls + + @classmethod + def read_data(cls): + """Return database content as a :class:`pd.DataFrame`""" + cls.connect() + return cls._data + + @classmethod + def exists(cls, dict_of_values): + df = cls.read_data() + v = df.iloc[:, 0] == df.iloc[:, 0] + for key, value in dict_of_values.items(): + v &= (df[key] == value) + return v.any() + + @classmethod + def put_data(cls, row): + if not cls.exists(row): + df = cls.read_data() + df = pd.concat([df, pd.DataFrame([row])], ignore_index=True) + df.app + df.to_pickle(cls.dbfile) + else: + print("This record is already in the database") + + @classmethod + def del_data(cls, row): + df = cls.read_data() + v = df.iloc[:, 0] == df.iloc[:, 0] + for key, value in row.items(): + v &= (df[key] == value) + df = df[v != True] + df.to_pickle(cls.dbfile) + + @classmethod + def get_data(cls, row): + df = cls.read_data() + v = df.iloc[:, 0] == df.iloc[:, 0] + for key, value in row.items(): + v &= (df[key] == value) + return df[v == True] + + @classmethod + def info(cls) -> str: + return cls.__repr__(cls) + + def __repr__(self): + summary = [""] + + summary.append("db_file: %s" % self.dbfile) + summary.append("connected: %s" % self.isconnected()) + summary.append("Number of records: %i" % self.read_data().shape[0]) + + if hasattr(self, 'wmo'): + summary.append("Current record:") + for p in self.properties: + v = getattr(self, p) + if np.asarray(v).dtype.kind in set('buifc'): + summary.append("\t%s: %s" % (p, v)) + else: + summary.append("\t%s: '%s'" % (p, v)) + return "\n".join(summary) + + @staticmethod + def from_dict(obj: Dict) -> "DB": + return DB(**obj) + + def _instance2row(self): + row = {} + for key in ['wmo', 'cyc', 'n_predictions', 'cfg_cycle_duration', + 'cfg_life_expectancy', 'cfg_parking_depth', 'cfg_profile_depth', + 'cfg_reco_free_surface_drift', 'cfg_vertical_speed', 'velocity_name', + 'velocity_download', 'velocity_domain_size', 'output', 'swarm_size']: + row.update({key: getattr(self, key, None)}) + + if hasattr(self, 'cfg'): + for key in self.cfg.mission: + row.update({"cfg_%s" % key: self.cfg.mission[key]}) + + if hasattr(self, 'velocity'): + for key in ['name', 'download', 'domain_size']: + if key in self.velocity: + row.update({"velocity_%s" % key: self.velocity[key]}) + + if hasattr(self, 'output'): + row.update({'output': str(getattr(self, 'output', None))}) + return row + + def register(self): + """Add one new record to the database""" + new_row = self._instance2row() + + for key, value in new_row.items(): + if value is None: + raise ValueError("Cannot register a new record with missing value for '%s'" % key) + + self.put_data(new_row) + + def checkout(self): + """Remove record from the database""" + row = self._instance2row() + + for key, value in row.items(): + if value is None: + raise ValueError("Cannot id a record to remove with missing value for '%s'" % key) + + self.del_data(row) + + @property + def registered(self): + row = self._instance2row() + return self.exists(row) + + @property + def uid(self): + row = self._instance2row() + return self.get_data(row)['uid'].values[0] + + @property + def record(self) -> pd.DataFrame: + row = self._instance2row() + return self.get_data(row) diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index ae9ab9c..63cdabc 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -4,6 +4,8 @@ from typing import Union import os import logging +import json + from .simulation_handler import Simulation from .utils import pp_obj @@ -135,15 +137,21 @@ def predict_function( cfg_free_surface_drift=cfg_free_surface_drift, domain_min_size=domain_min_size, ) - S.execute() - S.predict() - S.postprocess() - S.finish(execution_start, process_start) + if not S.is_registered or overwrite: + S.execute() + S.predict() + S.postprocess() + S.finish(execution_start, process_start) + # return S.MD.computation.to_json() + # return S.MD.to_json() + return S.to_json() + else: + log_this.info("This simulation already exists, stopping here !") + # Loading json results from previous run + with open(S.run_file, 'r') as f: + jsdata = json.load(f) + return json.dumps(jsdata, indent=4) - # - # return S.MD.computation.to_json() - # return S.MD.to_json() - return S.to_json() # def predictor(args): diff --git a/vfrecovery/core/simulation_handler.py b/vfrecovery/core/simulation_handler.py index 321f585..9f52086 100644 --- a/vfrecovery/core/simulation_handler.py +++ b/vfrecovery/core/simulation_handler.py @@ -15,6 +15,8 @@ from .deployment_plan import setup_deployment_plan from .trajfile_handler import Trajectories from .analysis_handler import RunAnalyser +from .db import DB + root_logger = logging.getLogger("vfrecovery_root_logger") @@ -117,7 +119,8 @@ def _setup_float_config(self, **kwargs): def _setup_load_velocity_data(self, **kwargs): # Define domain to load velocity for: # In space: - domain, domain_center = get_domain(self.P_obs, kwargs['domain_min_size']) + self.domain_min_size = kwargs['domain_min_size'] + domain, domain_center = get_domain(self.P_obs, self.domain_min_size) # and time: cycle_period = int(np.round(self.CFG.mission['cycle_duration'] / 24)) # Get the float cycle period (in days) self.n_days = len(self.cyc) * cycle_period + 1 @@ -146,6 +149,26 @@ def _setup_load_velocity_data(self, **kwargs): pd.to_datetime(self.ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")) ) + def _instance2rec(self): + """Convert this instance data to a dictionnary to be used with DB""" + cyc = self.cyc[1] + n_predictions = len(self.cyc) - 1 - 1 # Remove initial conditions and cyc target + + data = {'wmo': self.wmo, 'cyc': cyc, 'n_predictions': n_predictions, + 'cfg': self.MD.vfconfig, + 'velocity': {'name': self.MD.velocity_field, + 'download': self.ds_vel.attrs['access_date'], + 'domain_size': self.domain_min_size}, + 'output': self.output_path, + 'swarm_size': self.MD.n_floats} + + return data + + @property + def is_registered(self): + """Check is this simulation has laready been registered""" + return DB.from_dict(self._instance2rec()).registered + def setup(self, **kwargs): """Fulfill all requirements for the simulation""" self._setup_load_observed_profiles() @@ -154,7 +177,10 @@ def setup(self, **kwargs): self.logger.info("Simulation data will be registered under: %s%s*%s*" % (self.output_path, os.path.sep, get_simulation_suffix(self.MD))) + self.run_file = os.path.join(self.output_path, 'results_%s.json' % get_simulation_suffix(self.MD)) + self.logger.info("Check if such a simulation has already been registered: %s" % self.is_registered) self.logger.debug("Setup terminated") + return self def _execute_get_velocity(self): @@ -283,6 +309,10 @@ def postprocess(self): self._postprocess_swarm_metrics() return self + def register(self): + """Save simulation to the registry""" + return DB.from_dict(self._instance2rec()).register() + def finish(self, execution_start: float, process_start: float): """Click timers and save results to finish""" self.MD.computation = MetaDataComputation.from_dict({ @@ -293,10 +323,12 @@ def finish(self, execution_start: float, process_start: float): }) self.logger.debug(pp_obj(self.MD.computation)) - self.run_file = os.path.join(self.output_path, 'results_%s.json' % get_simulation_suffix(self.MD)) self.to_json(fp=self.run_file) self.logger.info("Simulation results and analysis saved in: %s" % self.run_file) + self.register() + self.logger.debug("Simulation recorded in registry") + self.logger.info("END OF SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (self.wmo, self.cyc[1])) self.logger.info("%s /" % ("=" * 55)) return self diff --git a/vfrecovery/core/utils.py b/vfrecovery/core/utils.py index c3811d9..ca91eb2 100644 --- a/vfrecovery/core/utils.py +++ b/vfrecovery/core/utils.py @@ -4,6 +4,9 @@ from argopy import ArgoIndex import argopy.plot as argoplot from argopy.errors import DataNotFound +import hashlib +import base64 + from vfrecovery.json import Profile, MetaData @@ -77,4 +80,23 @@ def get_domain(Plist, size): domain = [c[0] - size / 2, c[0] + size / 2, c[1] - size / 2, c[1] + size / 2] domain = [np.round(d, 3) for d in domain] - return domain, c \ No newline at end of file + return domain, c + + +def make_hash_sha256(o): + hasher = hashlib.sha256() + hasher.update(repr(make_hashable(o)).encode()) + return base64.b64encode(hasher.digest()).decode() + + +def make_hashable(o): + if isinstance(o, (tuple, list)): + return tuple((make_hashable(e) for e in o)) + + if isinstance(o, dict): + return tuple(sorted((k, make_hashable(v)) for k, v in o.items())) + + if isinstance(o, (set, frozenset)): + return tuple(sorted(make_hashable(e) for e in o)) + + return o \ No newline at end of file diff --git a/vfrecovery/downloaders/core.py b/vfrecovery/downloaders/core.py index 21205aa..fbf31e4 100644 --- a/vfrecovery/downloaders/core.py +++ b/vfrecovery/downloaders/core.py @@ -24,9 +24,10 @@ def get_velocity_field(a_box, a_date, n_days=1, output='.', dataset='ARMOR3D', l tuple """ + access_date = pd.to_datetime('now', utc='now').strftime("%Y%m%d") + def get_velocity_filename(dataset, n_days): - download_date = pd.to_datetime('now', utc='now').strftime("%Y%m%d") - fname = os.path.join(output, 'velocity_%s_%idays_%s.nc' % (dataset, n_days, download_date)) + fname = os.path.join(output, 'velocity_%s_%idays_%s.nc' % (dataset, n_days, access_date)) return fname velocity_file = get_velocity_filename(dataset, n_days) @@ -56,4 +57,6 @@ def get_velocity_filename(dataset, n_days): new = False ds = xr.open_dataset(velocity_file) + ds.attrs['access_date'] = access_date + return ds, velocity_file, new diff --git a/vfrecovery/static/assets/.gitkeep b/vfrecovery/static/assets/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/vfrecovery/static/assets/simulations_registry.pkl b/vfrecovery/static/assets/simulations_registry.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6abc70fbe06ed06f86a860b6a3fa6f0f4ea6ec0f GIT binary patch literal 2041 zcmah~&yUTwLY3uv7hBz?E#VBM2ztJ$-+E+MA>+fdXSwyllcA@kw7q}L8tudiV5eP4B#+Mnw2 zw#DsM=Fs=ZYJ0oO)kfv_!IMAAaHrWe`c=|j9&Yg0i|m8JuUD7ZcDO!Sn-6EsH+Y;B zkbR~Un}Iy2;(jV~tqNZQ52CYX74%mQA}b!*Yp!Q2cDePDL#}T$a&6pi7dUqnIQQ%} zNbSt^&u{F1ebc_C!C@}2VT}yt5l3G}H*x#u*8Jw*l!1?BUujqai$Tg-Bs|wF$$;2z z{-@;=H%j%pzTjan&T))0?oZtq7cEAsLKJr)&Sr|~sr}R?^ zKFq@l(>rdLH$J3`*)}gWfg}|xI@2VzrsL1xj^2*f$o+6aVFWJspim6G zz!G%EBIJ}GX580I`k;v|^kz&Q2sxnv&vbO`Q)wtLK;?TI=b?O{9B@waLWRtSQ6)lM zZ+Mq0EqoT!JmWk7RChU47NKWUmX&Smq#vYH8K(^9oNM?JCM3^HTrD~ML;v9LF}fR>ol{ZiwzD9w~Wa# zFce&JEw`bTFs-5D-Up~OjM=1Y?yxvcr#YN?29*Z%Mq}pjIJZyp H!W;by4uhkN literal 0 HcmV?d00001 From 203e3955890f09f4b753580d4234b8cdbeb0fd06 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 29 Mar 2024 15:27:01 +0100 Subject: [PATCH 27/38] more db update --- .../command_line_interface/group_describe.py | 5 ++- vfrecovery/core/db.py | 42 ++++++++---------- vfrecovery/core/predict.py | 2 - vfrecovery/core/simulation_handler.py | 4 +- .../static/assets/simulations_registry.pkl | Bin 2041 -> 2644 bytes 5 files changed, 25 insertions(+), 28 deletions(-) diff --git a/vfrecovery/command_line_interface/group_describe.py b/vfrecovery/command_line_interface/group_describe.py index 7dfb03b..bd170fe 100644 --- a/vfrecovery/command_line_interface/group_describe.py +++ b/vfrecovery/command_line_interface/group_describe.py @@ -87,7 +87,10 @@ def describe( def describe_run(wmo, cyc): - print(DB.read_data().T) + partial_data = {'wmo': wmo} + if len(cyc) > 0: + partial_data.update({'cyc': cyc[0]}) + click.echo(DB.from_dict(partial_data).record.T) def describe_velocity(wmo, cyc): diff --git a/vfrecovery/core/db.py b/vfrecovery/core/db.py index e0567b1..02080b2 100644 --- a/vfrecovery/core/db.py +++ b/vfrecovery/core/db.py @@ -39,12 +39,15 @@ class DB: >>> DB.read_data() # Return db content as :class:`pd.DataFrame` >>> data = {'wmo': 6903091, 'cyc': 120, 'n_predictions': 0, 'cfg': FloatConfiguration('recovery'), 'velocity': {'name': 'GLORYS', 'download': pd.to_datetime('now', utc=True), 'domain_size': 5}, 'output': Path('.'), 'swarm_size': 1000} - >>> DB.from_dict(data).register() # save to db + >>> DB.from_dict(data).checkin() # save to db >>> DB.from_dict(data).checkout() # delete from db + >>> DB.from_dict(data).checked + >>> DB.from_dict(data).uid + >>> DB.from_dict(data).record >>> partial_data = {'wmo': 6903091} >>> DB.from_dict(partial_data) # Create new instance for actions - >>> DB.from_dict(partial_data).list() + >>> DB.from_dict(partial_data).record """ wmo: int @@ -52,14 +55,14 @@ class DB: n_predictions: int cfg: FloatConfiguration velocity: Dict - output: Path swarm_size: int + output: Path required: List = ['wmo', 'cyc', 'n_predictions', 'cfg_cycle_duration', 'cfg_life_expectancy', 'cfg_parking_depth', 'cfg_profile_depth', 'cfg_reco_free_surface_drift', 'cfg_vertical_speed', 'velocity_name', - 'velocity_download', 'velocity_domain_size', 'output', 'swarm_size'] - properties: List = ['wmo', 'cyc', 'n_predictions', 'cfg', 'velocity', 'output', 'swarm_size'] + 'velocity_download', 'velocity_domain_size', 'swarm_size', 'output'] + properties: List = ['wmo', 'cyc', 'n_predictions', 'cfg', 'velocity', 'swarm_size', 'output'] _data: pd.DataFrame dbfile: Path = (Path(__file__).parent.parent).joinpath('static').joinpath('assets').joinpath( @@ -98,8 +101,8 @@ def init(cls): 'velocity_name': pd.Series(dtype='string'), 'velocity_download': pd.Series(dtype='datetime64[ns]'), 'velocity_domain_size': pd.Series(dtype='float'), - 'output': pd.Series(dtype='string'), 'swarm_size': pd.Series(dtype='int'), + 'output': pd.Series(dtype='string'), 'uid': pd.Series(dtype='string'), }) df.name = pd.to_datetime('now', utc=True) @@ -136,7 +139,6 @@ def put_data(cls, row): if not cls.exists(row): df = cls.read_data() df = pd.concat([df, pd.DataFrame([row])], ignore_index=True) - df.app df.to_pickle(cls.dbfile) else: print("This record is already in the database") @@ -153,10 +155,11 @@ def del_data(cls, row): @classmethod def get_data(cls, row): df = cls.read_data() - v = df.iloc[:, 0] == df.iloc[:, 0] - for key, value in row.items(): - v &= (df[key] == value) - return df[v == True] + mask = df.iloc[:, 0] == df.iloc[:, 0] + for key in row: + if row[key] is not None: + mask &= df[key] == row[key] + return df[mask] @classmethod def info(cls) -> str: @@ -169,14 +172,6 @@ def __repr__(self): summary.append("connected: %s" % self.isconnected()) summary.append("Number of records: %i" % self.read_data().shape[0]) - if hasattr(self, 'wmo'): - summary.append("Current record:") - for p in self.properties: - v = getattr(self, p) - if np.asarray(v).dtype.kind in set('buifc'): - summary.append("\t%s: %s" % (p, v)) - else: - summary.append("\t%s: '%s'" % (p, v)) return "\n".join(summary) @staticmethod @@ -188,7 +183,7 @@ def _instance2row(self): for key in ['wmo', 'cyc', 'n_predictions', 'cfg_cycle_duration', 'cfg_life_expectancy', 'cfg_parking_depth', 'cfg_profile_depth', 'cfg_reco_free_surface_drift', 'cfg_vertical_speed', 'velocity_name', - 'velocity_download', 'velocity_domain_size', 'output', 'swarm_size']: + 'velocity_download', 'velocity_domain_size', 'swarm_size', 'output']: row.update({key: getattr(self, key, None)}) if hasattr(self, 'cfg'): @@ -202,15 +197,16 @@ def _instance2row(self): if hasattr(self, 'output'): row.update({'output': str(getattr(self, 'output', None))}) + return row - def register(self): + def checkin(self): """Add one new record to the database""" new_row = self._instance2row() for key, value in new_row.items(): if value is None: - raise ValueError("Cannot register a new record with missing value for '%s'" % key) + raise ValueError("Cannot checkin a new record with missing value for '%s'" % key) self.put_data(new_row) @@ -225,7 +221,7 @@ def checkout(self): self.del_data(row) @property - def registered(self): + def checked(self): row = self._instance2row() return self.exists(row) diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index 63cdabc..e831fa5 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -14,7 +14,6 @@ sim_logger = logging.getLogger("vfrecovery_simulation") - class log_this: def __init__(self, txt, log_level): @@ -39,7 +38,6 @@ def error(txt) -> 'log_this': return log_this(txt, 'ERROR') - def predict_function( wmo: int, cyc: int, diff --git a/vfrecovery/core/simulation_handler.py b/vfrecovery/core/simulation_handler.py index 9f52086..63b45d4 100644 --- a/vfrecovery/core/simulation_handler.py +++ b/vfrecovery/core/simulation_handler.py @@ -167,7 +167,7 @@ def _instance2rec(self): @property def is_registered(self): """Check is this simulation has laready been registered""" - return DB.from_dict(self._instance2rec()).registered + return DB.from_dict(self._instance2rec()).checked def setup(self, **kwargs): """Fulfill all requirements for the simulation""" @@ -311,7 +311,7 @@ def postprocess(self): def register(self): """Save simulation to the registry""" - return DB.from_dict(self._instance2rec()).register() + return DB.from_dict(self._instance2rec()).checkin() def finish(self, execution_start: float, process_start: float): """Click timers and save results to finish""" diff --git a/vfrecovery/static/assets/simulations_registry.pkl b/vfrecovery/static/assets/simulations_registry.pkl index 6abc70fbe06ed06f86a860b6a3fa6f0f4ea6ec0f..9c779faf6621f579410e4d94f6a0848abaf1950e 100644 GIT binary patch delta 861 zcma)3OHUI~6z-jN3T?FoBM%K4)P$BmoavMnYD6AlUqgq-khatq=ws;gT{_e0V_|WF z!Azpfy=r1i+`4e#(ij)GG|?R!<3_jq0W~gkro{x}#(Q(m^E)TsJ?E$8;4Hp_KlU42 z_&e<*ji&YSLhEx+>zmDRSJOE)*b4^XFfbf@Iyefh0X<9tCm`W7V6}9xQ?niIgZp4q z$sdAYC2zv4@G{n~_-)}fUM#2wD+1Ga2 z_qIQ%wL(Ixw()2=MZVH5!$a*2#TwOZfiirgyVBgYJ?MVz{ax>!eLukP4caK8Rm7{& zo_m|7Gyg>Q9nT{tqH;GyQWK}2FgOg4DF5++R-9a`PTt)m-^~zRhtIQ>e5?8Ko2;*zX@A1c!*#s>7}N2{(psXh z6kYb!T#c~15^hL&X3ptYVq-2wBH7I19JlC8rztj`m`3tADa_M>EWVLcWuSE9{%GL$L@wxkuW z-&rmB-T9E+HKF1Ore=?NC`n8exnRaui^qK)ZnYZ7ZId?>u56(|@dQVP341)H!lxV) z!IexY7>wi-;oxF`U9TqBie4#Rar>#-B$dptbvDNcgk6|M-sX?aSU5vx$7b3Bzu?!w z8#t&R0Twu;|8-8*R?^A#h-FXQqY*?{`W!TcFO z{&_I}8gnX8)P^Ns@;nwDAnPT|o4V+f3{j9zfNCVccFB43dh<;-;ILs_JUN#`jgfEi6pr7*sU=gAdRX!jb5p1I om9~N0v~= Date: Fri, 29 Mar 2024 15:47:55 +0100 Subject: [PATCH 28/38] misc --- vfrecovery/__init__.py | 1 + .../command_line_interface/group_describe.py | 2 +- .../static/assets/simulations_registry.pkl | Bin 2644 -> 3235 bytes 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/vfrecovery/__init__.py b/vfrecovery/__init__.py index f98d93b..ea6c165 100644 --- a/vfrecovery/__init__.py +++ b/vfrecovery/__init__.py @@ -19,3 +19,4 @@ from vfrecovery.python_interface.predict import predict from vfrecovery.downloaders import Armor3d, Glorys +from vfrecovery.core.db import DB diff --git a/vfrecovery/command_line_interface/group_describe.py b/vfrecovery/command_line_interface/group_describe.py index bd170fe..caf9652 100644 --- a/vfrecovery/command_line_interface/group_describe.py +++ b/vfrecovery/command_line_interface/group_describe.py @@ -90,7 +90,7 @@ def describe_run(wmo, cyc): partial_data = {'wmo': wmo} if len(cyc) > 0: partial_data.update({'cyc': cyc[0]}) - click.echo(DB.from_dict(partial_data).record.T) + click.echo(DB.from_dict(partial_data).record.T.to_string(max_colwidth=15)) def describe_velocity(wmo, cyc): diff --git a/vfrecovery/static/assets/simulations_registry.pkl b/vfrecovery/static/assets/simulations_registry.pkl index 9c779faf6621f579410e4d94f6a0848abaf1950e..ae907d1584d4d2e64db347749f4e82f3a9d4929f 100644 GIT binary patch delta 819 zcmZWnOKTHR7|l#F)1-A_AJV9$2!d2oo#{*_GouyTB$G#z<}rCk1oN0=l4&Ms-VI1~ zBUG|zd#BJvb>l|dD7X?`D2U(#+=wW+sC)l_E1hXt6ug)3JACIH&ixkm^TflG$Zce2 z_t+NlQvX$_+xmEM>s!dw)eTM;oPw^6>JKNw5dt1sgiJtzD98ieL-gc8@GK0FIl^uD zylT4upHXc-mpskdS(2$R zown5YaL`@BG9gLxSEMDIzg^;qNPv(^YjzjT*}6TuL(1kHYm4pLa!^P|X|53DbDAAz zoOPk?!@9|ONRlX#i3Ug_QHVAcg&0G(IFj|(dt{*^M^lf^0$FXhRS#cWQ* zQi7BX7Y56xnR3lmClby`KF_jPQL2_ZHeTXHnPMyPkTbS|dlGDnhT~pv+8Bo3JgXQl hL(p6B$b_BP5A=~nGgEIWK3(6?XjW^A`UQWQ#6M2A_;COL delta 631 zcmZ21c}0Y^fpw}U*F@Gm%o7+GCY~{!EXZguITDB$GFmaRO}@iu%3;6&0h1M(WPA+zjcJ7$i_e#`=s%b2yn zikN_k4lskXJ!D2`GiL#bM6e)4*0H1lEf8T%nmmzJ2gusTn#Nc(S(Q!Mp-`iXV}twu zNr84;ashKo*@mk4=2?0X7++g?HHy78W!@zjt{~;v+SyLQVPTai zreWpARoO0)iQz@1QT0v+Vd+I)X=Q=#o~Gpmo|d^`hGr%p_46*wiGvI(2dxmn-9r&3m}47#Vj>{?4s0dd07Y awYVg)sAP&aLk~-FNq)f;VANalWB>q8$HIvK From c1990c090497229ea519596705e99080223a9140 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Wed, 3 Apr 2024 15:19:24 +0200 Subject: [PATCH 29/38] DB simulations manager --- README.md | 20 ++ vfrecovery/command_line_interface/group_db.py | 88 +++++++ .../command_line_interface/group_predict.py | 9 +- .../virtualfleet_recovery.py | 2 + vfrecovery/core/analysis_handler.py | 9 + vfrecovery/core/db.py | 197 +++++++++++++++- vfrecovery/core/predict.py | 10 +- vfrecovery/core/simulation_handler.py | 218 +++++++++++------- vfrecovery/core/trajfile_handler.py | 83 +++++-- vfrecovery/downloaders/core.py | 9 +- vfrecovery/python_interface/predict.py | 3 + .../static/assets/simulations_registry.pkl | Bin 3235 -> 2562 bytes 12 files changed, 536 insertions(+), 112 deletions(-) create mode 100644 vfrecovery/command_line_interface/group_db.py diff --git a/README.md b/README.md index 953cfb9..bdc3b18 100644 --- a/README.md +++ b/README.md @@ -159,3 +159,23 @@ vfrecovery describe obs WMO CYC1 CYC2 CYC3 ```bash vfrecovery meetwith "cruise_track.csv" WMO CYC0 ``` + +## Data storage +Simulation data are stored on disk under the following architecture: + +``` +./vfrecovery_simulations_data + |- vfrecovery_simulations.log + |- WMO + |----CYC + |----VELOCITY(NAME + DOWNLOAD_DATE + DOMAIN_SIZE) + |- velocity_file.nc + |- figure.png + |---- RUN_PARAMS(NP + CFG + NF) + |- float_configuration.json + |- trajectories.zarr + |- results.json + |- figure.png +``` + +This ensures that for a given velocity field, all possible simulations are unambiguously found under a single folder \ No newline at end of file diff --git a/vfrecovery/command_line_interface/group_db.py b/vfrecovery/command_line_interface/group_db.py new file mode 100644 index 0000000..1ca1cc2 --- /dev/null +++ b/vfrecovery/command_line_interface/group_db.py @@ -0,0 +1,88 @@ +import click +import logging +from argopy.utils import is_wmo, is_cyc, check_cyc, check_wmo +import argopy.plot as argoplot +from argopy import ArgoIndex + +from vfrecovery.utils.misc import list_float_simulation_folders +from vfrecovery.core.db import DB + +root_logger = logging.getLogger("vfrecovery_root_logger") +blank_logger = logging.getLogger("vfrecovery_blank_logger") + + +@click.group() +def cli_group_db() -> None: + pass + + +@cli_group_db.command( + "db", + short_help="Helper for VirtualFleet-Recovery simulations database", + help=""" + + """, + epilog=""" + Examples: + + \b + vfrecovery db info + + \b + vfrecovery db read + + \b + vfrecovery db read --index 3 + + \b + vfrecovery db drop + """, # noqa +) +@click.option( + "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), + default="INFO", + show_default=True, + help=( + "Set the details printed to console by the command " + "(based on standard logging library)." + ), +) +@click.option( + "-i", "--index", + type=int, + required=False, + default=None, + show_default=False, + help="Record index to work with", +) +@click.argument('ACTION', nargs=1, type=str) +def db( + action, + index, + log_level, +) -> None: + if log_level == "QUIET": + root_logger.disabled = True + log_level = "CRITICAL" + root_logger.setLevel(level=getattr(logging, log_level.upper())) + + if root_logger.isEnabledFor(logging.DEBUG): + root_logger.debug("DEBUG mode activated") + + if action == 'read': + df = DB.read_data() + if index is not None: + row = df.loc[index] + click.secho("Row index #%i:" % index, fg='green') + click.echo(row.T.to_string()) + else: + for irow, row in df.iterrows(): + click.secho("Row index #%i:" % irow, fg='green') + click.echo(row.T.to_string()) + + if action == 'drop': + DB.clear() + + if action == 'info': + click.echo(DB.info()) diff --git a/vfrecovery/command_line_interface/group_predict.py b/vfrecovery/command_line_interface/group_predict.py index 19930d9..f6a168f 100644 --- a/vfrecovery/command_line_interface/group_predict.py +++ b/vfrecovery/command_line_interface/group_predict.py @@ -41,7 +41,7 @@ def cli_group_predict() -> None: type=str, required=False, default=None, - help="Simulation data output folder [default: './vfrecovery_simulations_data//']", + help="Simulation root data output folder [default: './vfrecovery_simulations_data']", ) # @click.option( # "-v", "--verbose", @@ -117,6 +117,11 @@ def cli_group_predict() -> None: show_default=True, help="Load velocity data in lazy mode (not saved on file)." ) +@click.option('--figure/--no-figure', + default=True, + show_default=True, + help="Display and save figures on file (png format)", + ) @click.option( "--log_level", type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), @@ -143,6 +148,7 @@ def predict( domain_min_size, overwrite, lazy, + figure, log_level, ) -> None: """ @@ -169,5 +175,6 @@ def predict( domain_min_size=domain_min_size, overwrite=overwrite, lazy=lazy, + figure=figure, log_level=log_level) blank_logger.info(json_dump) diff --git a/vfrecovery/command_line_interface/virtualfleet_recovery.py b/vfrecovery/command_line_interface/virtualfleet_recovery.py index b38adac..4b34779 100644 --- a/vfrecovery/command_line_interface/virtualfleet_recovery.py +++ b/vfrecovery/command_line_interface/virtualfleet_recovery.py @@ -2,12 +2,14 @@ from vfrecovery.command_line_interface.group_describe import cli_group_describe from vfrecovery.command_line_interface.group_predict import cli_group_predict +from vfrecovery.command_line_interface.group_db import cli_group_db @click.command( cls=click.CommandCollection, sources=[ cli_group_describe, cli_group_predict, + cli_group_db, ], context_settings=dict(help_option_names=["-h", "--help"]), ) diff --git a/vfrecovery/core/analysis_handler.py b/vfrecovery/core/analysis_handler.py index 329a720..d0efc6b 100644 --- a/vfrecovery/core/analysis_handler.py +++ b/vfrecovery/core/analysis_handler.py @@ -6,6 +6,7 @@ from sklearn.neighbors import KernelDensity from virtualargofleet import VelocityField +import matplotlib as mpl import matplotlib.pyplot as plt import argopy.plot as argoplot import cartopy.crs as ccrs @@ -391,6 +392,7 @@ def plot_predictions(self, save: bool = False, workdir: Path = Path('.'), fname: str = 'predictions', + mplbackend: str = 'Agg', figsize=None, dpi=120, @@ -492,6 +494,9 @@ def plot_this(this_ax, i_cycle, ip): # this_ax.set_ylabel("Cycle %i predictions" % (i_cycle+1)) this_ax.set_title("%s\nCycle %i predictions" % (title, self.sim_cycles[i_cycle]), fontsize=6) + initial_mplbackend = mpl.get_backend() + mpl.use(mplbackend) + fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, dpi=dpi, subplot_kw={'projection': ccrs.PlateCarree()}, sharex=True, sharey=True) @@ -516,9 +521,13 @@ def plot_this(this_ax, i_cycle, ip): plot_this(ax[ix], i_cycle, ip) plt.tight_layout() + if save: save_figurefile(fig, fname, workdir) + # Rewind mpl backend to initial position: + mpl.use(initial_mplbackend) + return fig, ax diff --git a/vfrecovery/core/db.py b/vfrecovery/core/db.py index 02080b2..accfe13 100644 --- a/vfrecovery/core/db.py +++ b/vfrecovery/core/db.py @@ -26,9 +26,138 @@ from pathlib import Path import pandas as pd import numpy as np +import warnings + from .utils import make_hash_sha256 + +num2str = lambda x: "%s%s" % ("%0.4d" % x, "%0.3d" % (1e3 * np.round(x, 3) - 1e3 * np.round(x))) +str2num = lambda x: int(x[0:4]) + int(x[4:]) / 1e3 + + +class Row2Path: + """ + >>> Row2Path(row).wmo + >>> Row2Path(row).cyc + >>> Row2Path(row).velocity + >>> Row2Path(row).run + >>> Row2Path(row).path + """ + + def __init__(self, row): + self.row = row + + def __repr__(self): + summary = [""] + summary.append("%10s: %s" % ("wmo", self.wmo)) + summary.append("%10s: %s" % ("cyc", self.cyc)) + summary.append("%10s: %s" % ("velocity", self.velocity)) + summary.append("%10s: %s" % ("run", self.run)) + return "\n".join(summary) + + @property + def wmo(self): + return Path(str(self.row['wmo'])) + + @property + def cyc(self): + # |----CYC + last = str(self.row['cyc']) + return self.wmo.joinpath(Path(last)) + + @property + def velocity(self): + # |----VELOCITY(NAME + DOWNLOAD_DATE + DOMAIN_SIZE) + last = "%s_%s_%s" % (self.row['velocity_name'], + num2str(self.row['velocity_domain_size']), + self.row['velocity_download'].strftime("%Y%m%d"), + ) + return self.cyc.joinpath(Path(last)) + + @property + def run(self): + # |---- RUN_PARAMS(NP + CFG + NF) + last = "%s_%s_%s" % ("NP%0.3d" % self.row['n_predictions'], + "SW%0.4d" % self.row['swarm_size'], + "CFG%s" % "c".join( + [num2str(self.row[c]) for c in self.row.index if "cfg_" in c]), + ) + return self.velocity.joinpath(Path(last)) + + @property + def path(self): + return self.run + + +class Path2Row: + """ + >>> Path2Row(path).wmo + >>> Path2Row(path).cyc + >>> Path2Row(path).velocity + >>> Path2Row(path).run + >>> Path2Row(path).row + """ + + cfg_cols: list = ['cfg_cycle_duration', + 'cfg_life_expectancy', + 'cfg_parking_depth', + 'cfg_profile_depth', + 'cfg_reco_free_surface_drift', + 'cfg_vertical_speed'] + + def __init__(self, p): + self.path = p + + def __repr__(self): + summary = [""] + summary.append("%10s: %s" % ("wmo", self.wmo)) + summary.append("%10s: %s" % ("cyc", self.cyc)) + summary.append("%10s: %s" % ("velocity", self.velocity)) + summary.append("%10s: %s" % ("run", self.run)) + return "\n".join(summary) + + @property + def wmo(self): + return int(self.path.parts[0]) + + @property + def cyc(self): + return int(self.path.parts[1]) + + @property + def velocity(self): + velocity = self.path.parts[2] + result = {} + result.update({'velocity_name': velocity.split("_")[0]}) + result.update({'velocity_domain_size': str2num(velocity.split("_")[1])}) + result.update({'velocity_download': pd.to_datetime(velocity.split("_")[2], utc=True)}) + return result + + @property + def run(self): + run = self.path.parts[3] + result = {} + result.update({'n_predictions': int(run.split("_")[0][2:])}) + result.update({'swarm_size': int(run.split("_")[1][2:])}) + for key, value in zip(self.cfg_cols, [str2num(v) for v in run.split("_")[2][3:].split("c")]): + result.update({key: value}) + return result + + @property + def row(self): + row = {} + row.update({'wmo': self.wmo}) + row.update({'cyc': self.cyc}) + row.update({'n_predictions': int(self.run['n_predictions'])}) + for key in self.cfg_cols: + row.update({key: self.run[key]}) + for key in self.velocity: + row.update({key: self.velocity[key]}) + row.update({'swarm_size': int(self.run['swarm_size'])}) + return pd.DataFrame([row]) + + class DB: """ @@ -38,7 +167,7 @@ class DB: >>> DB.isconnected() >>> DB.read_data() # Return db content as :class:`pd.DataFrame` - >>> data = {'wmo': 6903091, 'cyc': 120, 'n_predictions': 0, 'cfg': FloatConfiguration('recovery'), 'velocity': {'name': 'GLORYS', 'download': pd.to_datetime('now', utc=True), 'domain_size': 5}, 'output': Path('.'), 'swarm_size': 1000} + >>> data = {'wmo': 6903091, 'cyc': 120, 'n_predictions': 0, 'cfg': FloatConfiguration('recovery'), 'velocity': {'name': 'GLORYS', 'download': pd.to_datetime('now', utc=True), 'domain_size': 5}, 'path_root': Path('.'), 'swarm_size': 1000} >>> DB.from_dict(data).checkin() # save to db >>> DB.from_dict(data).checkout() # delete from db >>> DB.from_dict(data).checked @@ -56,13 +185,13 @@ class DB: cfg: FloatConfiguration velocity: Dict swarm_size: int - output: Path + path_root: Path required: List = ['wmo', 'cyc', 'n_predictions', 'cfg_cycle_duration', 'cfg_life_expectancy', 'cfg_parking_depth', 'cfg_profile_depth', 'cfg_reco_free_surface_drift', 'cfg_vertical_speed', 'velocity_name', - 'velocity_download', 'velocity_domain_size', 'swarm_size', 'output'] - properties: List = ['wmo', 'cyc', 'n_predictions', 'cfg', 'velocity', 'swarm_size', 'output'] + 'velocity_download', 'velocity_domain_size', 'swarm_size', 'path_root'] + properties: List = ['wmo', 'cyc', 'n_predictions', 'cfg', 'velocity', 'swarm_size', 'path_root'] _data: pd.DataFrame dbfile: Path = (Path(__file__).parent.parent).joinpath('static').joinpath('assets').joinpath( @@ -85,7 +214,19 @@ def isconnected(cls) -> bool: @classmethod def clear(cls): - return cls.dbfile.unlink(missing_ok=True) + def confirm(): + """ + Ask user to enter Y or N (case-insensitive). + :return: True if the answer is Y. + :rtype: bool + """ + answer = "" + while answer not in ["y", "n"]: + answer = input("Confirm to permanently clear the simulations registry [Y/N]? ").lower() + return answer == "y" + + if confirm(): + return cls.dbfile.unlink(missing_ok=True) @classmethod def init(cls): @@ -102,7 +243,8 @@ def init(cls): 'velocity_download': pd.Series(dtype='datetime64[ns]'), 'velocity_domain_size': pd.Series(dtype='float'), 'swarm_size': pd.Series(dtype='int'), - 'output': pd.Series(dtype='string'), + 'path_root': pd.Series(dtype='object'), + 'path': pd.Series(dtype='object'), 'uid': pd.Series(dtype='string'), }) df.name = pd.to_datetime('now', utc=True) @@ -117,9 +259,27 @@ def connect(cls): cls._data.to_pickle(cls.dbfile) else: cls._data = pd.read_pickle(cls.dbfile) - cls._data['uid'] = cls._data.apply(lambda row: make_hash_sha256(row.to_dict()), axis=1) + + # Add read-only columns generated on the fly: + if cls._data.shape[0] > 0: + cls._data['path'] = cls._data.apply(lambda row: Row2Path(row).path, axis=1) + cls._data['uid'] = cls._data.apply(lambda row: make_hash_sha256(row.to_dict()), axis=1) return cls + # @classmethod + # def consolidate(cls): + # """Reconcile DB records with files on disk + # + # Make sure that all records have result files on disk + # """ + # def has_result_file(df_row): + # df_row['output'] + # + # + # cls.connect() + # df = cls._data + # df.apply(has_result_file, axis=1) + @classmethod def read_data(cls): """Return database content as a :class:`pd.DataFrame`""" @@ -138,6 +298,12 @@ def exists(cls, dict_of_values): def put_data(cls, row): if not cls.exists(row): df = cls.read_data() + if 'path' in row: + warnings.warn("'path' is a read only property, removed from input") + row.pop('path', None) + if 'uid' in row: + warnings.warn("'uid' is a read only property, removed from input") + row.pop('uid', None) df = pd.concat([df, pd.DataFrame([row])], ignore_index=True) df.to_pickle(cls.dbfile) else: @@ -166,6 +332,7 @@ def info(cls) -> str: return cls.__repr__(cls) def __repr__(self): + self.connect() summary = [""] summary.append("db_file: %s" % self.dbfile) @@ -183,7 +350,7 @@ def _instance2row(self): for key in ['wmo', 'cyc', 'n_predictions', 'cfg_cycle_duration', 'cfg_life_expectancy', 'cfg_parking_depth', 'cfg_profile_depth', 'cfg_reco_free_surface_drift', 'cfg_vertical_speed', 'velocity_name', - 'velocity_download', 'velocity_domain_size', 'swarm_size', 'output']: + 'velocity_download', 'velocity_domain_size', 'swarm_size', 'path_root']: row.update({key: getattr(self, key, None)}) if hasattr(self, 'cfg'): @@ -195,8 +362,8 @@ def _instance2row(self): if key in self.velocity: row.update({"velocity_%s" % key: self.velocity[key]}) - if hasattr(self, 'output'): - row.update({'output': str(getattr(self, 'output', None))}) + if hasattr(self, 'path_root'): + row.update({'path_root': str(getattr(self, 'path_root', None))}) return row @@ -230,6 +397,16 @@ def uid(self): row = self._instance2row() return self.get_data(row)['uid'].values[0] + @property + def path(self): + row = self._instance2row() + return self.get_data(row)['path'].values[0] + + @property + def path_obj(self): + row = self._instance2row() + return Row2Path(pd.DataFrame([row]).iloc[0]) + @property def record(self) -> pd.DataFrame: row = self._instance2row() diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index e831fa5..0524959 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -52,6 +52,7 @@ def predict_function( domain_min_size: float, overwrite: bool, lazy: bool, + figure: bool, log_level: str, ) -> str: """ @@ -71,7 +72,8 @@ def predict_function( n_floats domain_min_size overwrite - lazy + lazy + figure log_level Returns @@ -109,7 +111,8 @@ def predict_function( if output_path is None: # output_path = "vfrecovery_sims" % pd.to_datetime('now', utc=True).strftime("%Y%m%d%H%M%S") - output_path = os.path.sep.join(["vfrecovery_simulations_data", str(wmo), str(cyc[1])]) + # output_path = os.path.sep.join(["vfrecovery_simulations_data", str(wmo), str(cyc[1])]) + output_path = "vfrecovery_simulations_data" output_path = Path(output_path) output_path.mkdir(parents=True, exist_ok=True) @@ -128,6 +131,7 @@ def predict_function( overwrite=overwrite, lazy=lazy, logger=log_this, + figure=figure, ) S.setup(cfg_parking_depth=cfg_parking_depth, cfg_cycle_duration=cfg_cycle_duration, @@ -140,8 +144,6 @@ def predict_function( S.predict() S.postprocess() S.finish(execution_start, process_start) - # return S.MD.computation.to_json() - # return S.MD.to_json() return S.to_json() else: log_this.info("This simulation already exists, stopping here !") diff --git a/vfrecovery/core/simulation_handler.py b/vfrecovery/core/simulation_handler.py index 63b45d4..3d216a7 100644 --- a/vfrecovery/core/simulation_handler.py +++ b/vfrecovery/core/simulation_handler.py @@ -7,6 +7,8 @@ import os from datetime import timedelta import logging +import tempfile + from vfrecovery.json import MetaData, MetaDataSystem, MetaDataComputation from vfrecovery.downloaders import get_velocity_field @@ -15,7 +17,7 @@ from .deployment_plan import setup_deployment_plan from .trajfile_handler import Trajectories from .analysis_handler import RunAnalyser -from .db import DB +from .db import DB, Row2Path root_logger = logging.getLogger("vfrecovery_root_logger") @@ -44,25 +46,18 @@ def error(txt) -> 'default_logger': return default_logger(txt, 'ERROR') -class Simulation: - """Base class to execute the simulation/prediction workflow - - >>> S = Simulation(wmo, cyc, n_floats=n_floats, velocity=velocity, output_path=Path('.')) - >>> S.setup() - >>> S.execute() - >>> S.predict() - >>> S.postprocess() - >>> S.to_json() - """ - +class Simulation_core: def __init__(self, wmo, cyc, **kwargs): + self.run_file = None + self.wmo = wmo self.cyc = cyc - self.output_path = kwargs['output_path'] + self.path_root = kwargs['output_path'] self.logger = default_logger if 'logger' not in kwargs else kwargs['logger'] - self.overwrite = kwargs['overwrite'] if 'overwrite' in kwargs else False - self.lazy = kwargs['lazy'] if 'lazy' in kwargs else True + self.overwrite = bool(kwargs['overwrite']) if 'overwrite' in kwargs else False + self.lazy = bool(kwargs['lazy']) if 'lazy' in kwargs else True + self.figure = bool(kwargs['figure']) if 'figure' in kwargs else True self.logger.info("%s \\" % ("=" * 55)) self.logger.info("STARTING SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (self.wmo, self.cyc[1])) @@ -84,11 +79,56 @@ def __init__(self, wmo, cyc, **kwargs): 'computation': None, # will be filled later }) + +class Simulation_setup(Simulation_core): + + def _instance2rec(self): + """Convert this instance data to a dictionary to be used with the DB module""" + cyc = self.cyc[1] + n_predictions = len(self.cyc) - 1 - 1 # Remove initial conditions and cyc target, as passed by user + + data = {'wmo': self.wmo, + 'cyc': cyc, + 'n_predictions': n_predictions, + 'cfg': self.MD.vfconfig, + 'velocity': {'name': self.MD.velocity_field, + 'download': pd.to_datetime(self.ds_vel.attrs['access_date']), + 'domain_size': self.domain_min_size}, + 'swarm_size': self.MD.n_floats, + 'path_root': self.path_root, + } + + return data + + @property + def is_registered(self): + """Check if this simulation has already been registered or not""" + return DB.from_dict(self._instance2rec()).checked + + @property + def output_path(self): + """Path to run output""" + p = self.path_root.joinpath(DB.from_dict(self._instance2rec()).path_obj.run) + p.mkdir(parents=True, exist_ok=True) + return p + + @property + def velocity_path(self): + """Path to velocity output""" + p = self.path_root.joinpath(DB.from_dict(self._instance2rec()).path_obj.velocity) + p.mkdir(parents=True, exist_ok=True) + return p + + @property + def temp_path(self): + """A temporary path""" + return tempfile.gettempdir() + def _setup_load_observed_profiles(self): """Load observed float profiles index""" self.logger.info("Loading float profiles index") - self.P_obs, self.df_obs = ArgoIndex2jsProfile(self.wmo, self.cyc, cache=False, cachedir=str(self.output_path)) + self.P_obs, self.df_obs = ArgoIndex2jsProfile(self.wmo, self.cyc, cache=False, cachedir=str(self.path_root)) [self.logger.debug("Observed profiles list: %s" % pp_obj(p)) for p in self.P_obs] if len(self.P_obs) == 1: @@ -109,12 +149,8 @@ def _setup_float_config(self, **kwargs): kwargs['cfg_free_surface_drift'], self.logger, ) - self.MD.vfconfig = self.CFG # Register floats configuration to the simulation meta-data class - - # and save the final virtual float configuration on file: - self.CFG.to_json( - Path(os.path.join(self.output_path, "floats_configuration_%s.json" % get_simulation_suffix(self.MD)))) self.logger.debug(pp_obj(self.CFG)) + self.MD.vfconfig = self.CFG # Register floats configuration to the simulation meta-data class def _setup_load_velocity_data(self, **kwargs): # Define domain to load velocity for: @@ -123,66 +159,62 @@ def _setup_load_velocity_data(self, **kwargs): domain, domain_center = get_domain(self.P_obs, self.domain_min_size) # and time: cycle_period = int(np.round(self.CFG.mission['cycle_duration'] / 24)) # Get the float cycle period (in days) - self.n_days = len(self.cyc) * cycle_period + 1 + self.n_days = (len(self.cyc)-1) * cycle_period - self.logger.info("Velocity field should cover %i cycles of %i hours" % (len(self.cyc), 24 * cycle_period)) - self.logger.info("Loading %i days of %s velocity starting on %s" % ( + self.logger.info("Velocity field should cover %i cycles of %i hours" % (len(self.cyc)-1, 24 * cycle_period)) + self.logger.info("Connecting to %i days of %s velocity starting on %s" % ( self.n_days, self.MD.velocity_field, self.P_obs[0].location.time)) self.ds_vel, velocity_file, new_file = get_velocity_field(domain, self.P_obs[0].location.time, n_days=self.n_days, - output=self.output_path, + output=self.temp_path, dataset=self.MD.velocity_field, logger=self.logger, lazy=self.lazy, ) if new_file: - # We force overwrite results because we're using a new velocity field + # We force overwriting results because we're using a new velocity field self.logger.warning("Found a new velocity field, force overwriting results") self.overwrite = True self.velocity_file = velocity_file self.logger.debug(pp_obj(self.ds_vel)) - self.logger.info("Loaded %s field from %s to %s" % ( + self.logger.info("%s loaded %s field from %s to %s" % ( + "Lazily" if self.lazy else "Hard", self.MD.velocity_field, pd.to_datetime(self.ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), pd.to_datetime(self.ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")) ) - def _instance2rec(self): - """Convert this instance data to a dictionnary to be used with DB""" - cyc = self.cyc[1] - n_predictions = len(self.cyc) - 1 - 1 # Remove initial conditions and cyc target - - data = {'wmo': self.wmo, 'cyc': cyc, 'n_predictions': n_predictions, - 'cfg': self.MD.vfconfig, - 'velocity': {'name': self.MD.velocity_field, - 'download': self.ds_vel.attrs['access_date'], - 'domain_size': self.domain_min_size}, - 'output': self.output_path, - 'swarm_size': self.MD.n_floats} - - return data - - @property - def is_registered(self): - """Check is this simulation has laready been registered""" - return DB.from_dict(self._instance2rec()).checked - def setup(self, **kwargs): """Fulfill all requirements for the simulation""" + + # Load data in memory: self._setup_load_observed_profiles() self._setup_float_config(**kwargs) self._setup_load_velocity_data(**kwargs) - self.logger.info("Simulation data will be registered under: %s%s*%s*" % (self.output_path, - os.path.sep, - get_simulation_suffix(self.MD))) - self.run_file = os.path.join(self.output_path, 'results_%s.json' % get_simulation_suffix(self.MD)) + + # Possibly save setup files to proper final folders: + + # and save the final virtual float configuration on file: + self.CFG.to_json(self.output_path.joinpath("floats_configuration.json")) + + # move velocity file from temporary to final output path: + # self.logger.info("self.temp_path: %s" % self.temp_path) + # self.logger.info("self.velocity_file: %s" % self.velocity_file) + # self.logger.info("self.output_path: %s" % self.output_path) + + # + self.run_file = self.output_path.joinpath("results.json") + self.logger.info("Simulation results will be registered under:\n%s" % self.run_file) self.logger.info("Check if such a simulation has already been registered: %s" % self.is_registered) self.logger.debug("Setup terminated") return self + +class Simulation_execute(Simulation_setup): + def _execute_get_velocity(self): self.logger.info("Create a velocity object") self.VEL = Velocity(model='GLORYS12V1' if self.MD.velocity_field == 'GLORYS' else self.MD.velocity_field, @@ -190,12 +222,21 @@ def _execute_get_velocity(self): logger=self.logger, ) - self.logger.info("Plot velocity") - for it in [0, -1]: - _, _, fname = self.VEL.plot(it=it, iz=0, save=True, workdir=self.output_path) - fname.rename( - str(fname).replace("velocity_%s" % self.VEL.name, Path(self.velocity_file).name.replace(".nc", "")) - ) + if self.figure: + self.logger.info("Plot velocity") + for it in [0, -1]: + _, _, fname = self.VEL.plot(it=it, + iz=0, + save=True, + workdir=self.velocity_path + ) + self.logger.info(fname) + self.logger.info(self.velocity_path.stem) + # fname.rename( + # str(fname).replace("velocity_%s" % self.VEL.name, + # Path(self.velocity_file).name.replace(".nc", "") + # ) + # ) def _execute_get_plan(self): # VirtualFleet, get a deployment plan: @@ -231,7 +272,8 @@ def execute(self): # if os.path.exists(output_path): # shutil.rmtree(output_path) - self.traj_file = os.path.join(self.output_path, 'trajectories_%s.zarr' % get_simulation_suffix(self.MD)) + # self.traj_file = os.path.join(self.output_path, 'trajectories_%s.zarr' % get_simulation_suffix(self.MD)) + self.traj_file = self.output_path.joinpath('trajectories.zarr') if os.path.exists(self.traj_file) and not self.overwrite: self.logger.warning("Using data from a previous similar run (no simulation executed)") else: @@ -240,13 +282,16 @@ def execute(self): record=timedelta(minutes=30), output=True, output_folder=self.output_path, - output_file='trajectories_%s.zarr' % get_simulation_suffix(self.MD), + output_file='trajectories.zarr', verbose_progress=True, ) self.logger.info("Simulation ended with success") self.logger.info(pp_obj(self.VFleet)) return self + +class Simulation_predict(Simulation_execute): + def _predict_read_trajectories(self): # Get simulated profiles index: @@ -256,14 +301,15 @@ def _predict_read_trajectories(self): self.traj.get_index().add_distances(origin=self.P_obs[0]) self.logger.debug(pp_obj(self.traj)) - self.logger.info("Plot swarm initial and final states") - self.traj.plot_positions(domain_scale=2., - vel=self.VEL, - vel_depth=self.CFG.mission['parking_depth'], - save=True, - workdir=self.output_path, - fname='swarm_states_%s' % get_simulation_suffix(self.MD) - ) + if self.figure: + self.logger.info("Plot swarm initial and final states") + self.traj.plot_positions(domain_scale=2., + vel=self.VEL, + vel_depth=self.CFG.mission['parking_depth'], + save=True, + workdir=self.output_path, + fname='swarm_states', + ) def _predict_positions(self): """Make predictions based on simulated profile density""" @@ -272,15 +318,16 @@ def _predict_positions(self): self.run.fit_predict() self.logger.debug(pp_obj(self.run)) - self.logger.info("Plot predictions") - self.run.plot_predictions( - vel=self.VEL, - vel_depth=self.CFG.mission['parking_depth'], - save=True, - workdir=self.output_path, - fname='predictions_%s' % get_simulation_suffix(self.MD), - orient='portrait' - ) + if self.figure: + self.logger.info("Plot predictions") + self.run.plot_predictions( + vel=self.VEL, + vel_depth=self.CFG.mission['parking_depth'], + save=True, + workdir=self.output_path, + fname='predictions', + orient='portrait', + ) def predict(self): """Make float profile predictions based on the swarm simulation""" @@ -288,6 +335,9 @@ def predict(self): self._predict_positions() return self + +class Simulation_postprocess(Simulation_predict): + def _postprocess_metrics(self): if self.run.has_ref: self.logger.info("Computing prediction metrics for past cycles with observed ground truth") @@ -298,7 +348,10 @@ def _postprocess_swarm_metrics(self): Plist_updated = [] for p in self.run.jsobj.predictions: this_cyc = p.virtual_cycle_number - swarm_metrics = self.traj.analyse_pairwise_distances(virtual_cycle_number=this_cyc, show_plot=False) + swarm_metrics = self.traj.analyse_pairwise_distances(virtual_cycle_number=this_cyc, + save_figure=False, + # save_figure=self.save_figure, + ) p.metrics.trajectory_lengths = swarm_metrics.trajectory_lengths p.metrics.pairwise_distances = swarm_metrics.pairwise_distances Plist_updated.append(p) @@ -309,6 +362,17 @@ def postprocess(self): self._postprocess_swarm_metrics() return self + +class Simulation(Simulation_postprocess): + """Base class to execute the simulation/prediction workflow + + >>> S = Simulation(wmo, cyc, n_floats=n_floats, velocity=velocity, output_path=Path('.')) + >>> S.setup() + >>> S.execute() + >>> S.predict() + >>> S.postprocess() + >>> S.to_json() + """ def register(self): """Save simulation to the registry""" return DB.from_dict(self._instance2rec()).checkin() diff --git a/vfrecovery/core/trajfile_handler.py b/vfrecovery/core/trajfile_handler.py index 7718e8e..cc78185 100644 --- a/vfrecovery/core/trajfile_handler.py +++ b/vfrecovery/core/trajfile_handler.py @@ -7,6 +7,7 @@ import matplotlib.pyplot as plt from virtualargofleet import VelocityField from pathlib import Path +import logging from vfrecovery.utils.misc import get_cfg_str from vfrecovery.plots.utils import map_add_features, save_figurefile @@ -14,6 +15,32 @@ from vfrecovery.json import Metrics, TrajectoryLengths, PairwiseDistances, PairwiseDistancesState +root_logger = logging.getLogger("vfrecovery_root_logger") + + +class default_logger: + + def __init__(self, txt, log_level): + """Log text to simulation and possibly root logger(s)""" + getattr(root_logger, log_level.lower())(txt) + + @staticmethod + def info(txt) -> 'default_logger': + return default_logger(txt, 'INFO') + + @staticmethod + def debug(txt) -> 'default_logger': + return default_logger(txt, 'DEBUG') + + @staticmethod + def warning(txt) -> 'default_logger': + return default_logger(txt, 'WARNING') + + @staticmethod + def error(txt) -> 'default_logger': + return default_logger(txt, 'ERROR') + + class Trajectories: """Trajectory file manager for VFrecovery @@ -24,13 +51,14 @@ class Trajectories: T.sim_cycles df = T.to_index() df = T.get_index().add_distances() - jsdata, fig, ax = T.analyse_pairwise_distances(cycle=1, show_plot=True) + jsdata, fig, ax = T.analyse_pairwise_distances(cycle=1, save_figure=True) """ - def __init__(self, zfile): + def __init__(self, zfile, **kwargs): self.zarr_file = zfile self.obj = xr.open_zarr(zfile) self._index = None + self.logger = default_logger if 'logger' not in kwargs else kwargs['logger'] @property def n_floats(self): @@ -137,6 +165,14 @@ def worker(ds, cyc, x0, y0): mask = np.logical_and((ds['cycle_number'] == cyc).compute(), (ds['cycle_phase'] >= 3).compute()) this_cyc = ds.where(mask, drop=True) + + # Check if we didn't lose some particles: + if len(x0) > len(this_cyc.isel(obs=-1)['time'].values): + n = len(x0) - len(this_cyc.isel(obs=-1)['time'].values) + raise ValueError("%i virtual floats did not make it to the end of required cycles. " + "They probably reached the edge of the velocity field domain. You should " + "try to increase the domain size of the simulation." % n) + if len(this_cyc['time']) > 0: data = { 'date': this_cyc.isel(obs=-1)['time'].values, @@ -155,8 +191,9 @@ def worker(ds, cyc, x0, y0): cycles = np.unique(self.obj['cycle_number']) rows = [] for cyc in cycles: - df = worker(self.obj, cyc, deploy_lon, deploy_lat) - rows.append(df) + if ~ np.isnan(cyc): + df = worker(self.obj, cyc, deploy_lon, deploy_lat) + rows.append(df) rows = [r for r in rows if r is not None] df = pd.concat(rows).reset_index() df['wmo'] = df['wmo'].astype(int) @@ -230,12 +267,13 @@ def worker(row): def analyse_pairwise_distances(self, virtual_cycle_number: int = 1, - show_plot: bool = False, save_figure: bool = False, workdir: str = '.', sim_suffix=None, + mplbackend: str = 'Agg', this_cfg=None, - this_args: dict = None): + this_args: dict = None, + ): def pairs_pdf(longitude, latitude): Xi = np.array((longitude, latitude)).T @@ -344,10 +382,9 @@ def pairs_pdf(longitude, latitude): }) # Figure: - if show_plot: - backend = matplotlib.get_backend() - if this_args is not None and this_args.json: - matplotlib.use('Agg') + if save_figure: + initial_mplbackend = matplotlib.get_backend() + matplotlib.use(mplbackend) fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(18, 10), dpi=90) ax, ix = ax.flatten(), -1 @@ -392,17 +429,17 @@ def pairs_pdf(longitude, latitude): fig.suptitle("%s\n%s" % (line0, line1), fontsize=15) plt.tight_layout() - if save_figure: - if sim_suffix is not None: - filename = 'vfrecov_metrics01_%s_cyc%i' % (sim_suffix, virtual_cycle_number) - else: - filename = 'vfrecov_metrics01_cyc%i' % (virtual_cycle_number) - save_figurefile(fig, filename, workdir) + if sim_suffix is not None: + filename = 'vfrecov_metrics01_%s_cyc%i' % (sim_suffix, virtual_cycle_number) + else: + filename = 'vfrecov_metrics01_cyc%i' % (virtual_cycle_number) + save_figurefile(fig, filename, workdir) - if this_args is not None and this_args.json: - matplotlib.use(backend) + # Rewind mpl backend to initial position: + matplotlib.use(initial_mplbackend) - if show_plot: + # Exit + if save_figure: return M, fig, ax else: return M @@ -438,6 +475,7 @@ def plot_positions(self, save: bool = True, workdir: Path = Path('.'), fname: str = 'swarm_positions', + mplbackend: str = 'Agg', ): """ @@ -446,6 +484,9 @@ def plot_positions(self, """ import cartopy.crs as ccrs + initial_mplbackend = matplotlib.get_backend() + matplotlib.use(mplbackend) + ebox = self.HBOX(s=domain_scale) fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(25, 7), dpi=120, @@ -490,4 +531,8 @@ def plot_positions(self, plt.tight_layout() if save: save_figurefile(fig, fname, workdir) + + # Rewind mpl backend to initial position: + matplotlib.use(initial_mplbackend) + return fig, ax diff --git a/vfrecovery/downloaders/core.py b/vfrecovery/downloaders/core.py index fbf31e4..07a5408 100644 --- a/vfrecovery/downloaders/core.py +++ b/vfrecovery/downloaders/core.py @@ -6,7 +6,14 @@ from vfrecovery.core.utils import pp_obj -def get_velocity_field(a_box, a_date, n_days=1, output='.', dataset='ARMOR3D', logger=None, lazy=True) -> tuple: +def get_velocity_field(a_box, + a_date, + n_days=1, + output='.', + dataset='ARMOR3D', + logger=None, + lazy=True, + ) -> tuple: """Return the velocity field as an :class:`xr.Dataset`, force download/save if not lazy Parameters diff --git a/vfrecovery/python_interface/predict.py b/vfrecovery/python_interface/predict.py index b240d23..db5498f 100644 --- a/vfrecovery/python_interface/predict.py +++ b/vfrecovery/python_interface/predict.py @@ -18,6 +18,7 @@ def predict( domain_min_size: float = 5., overwrite: bool = False, lazy: bool = True, + figure: bool = True, log_level: str = 'INFO', ): """ @@ -38,6 +39,7 @@ def predict( domain_min_size overwrite lazy + figure log_level Returns @@ -58,6 +60,7 @@ def predict( domain_min_size=domain_min_size, overwrite=overwrite, lazy=lazy, + figure=figure, log_level=log_level, ) results = json.loads(results_json) diff --git a/vfrecovery/static/assets/simulations_registry.pkl b/vfrecovery/static/assets/simulations_registry.pkl index ae907d1584d4d2e64db347749f4e82f3a9d4929f..28806df718ef0f70b5c8adabcf2220c9d50b03ab 100644 GIT binary patch delta 1098 zcmb7DO-vI(6z+Dn6k7hG{FH!g!-14YVA?{<#tV7^$qb-DbLF zdUVvVZ#tl-42Jy==lTnV$~u<4RsC4d#|QX?3Q#osOpVmrsx}xfc2H?}!`P33`$i{a zgU^i)GgA6EilZS#J1KxIJmy>EHC^scjVs7dlMoPll< z;yHN(@p_#giIO6tZKN~#bV5TqNlwl0;-u`Hg|l)LNr^L-tFtQ2uE#cbSm$t6$Ay#Z z(n#TUOl|xd`9Zy-9VzW(wkODqToVkKd#%KFZSOixsT(@$yJ>V#WGKY#AUb3vfGe=`UT%E9JHFrV`KlKk@zy z2a^^joHcixsB~a)VQD$)otavd^6c7FEV~k1QxjrwMw$rw=iLckJd{iYBFT-R^ynDU z2O`t!;wp%-ZZ$9qJjKG+ly^BE4hNIz=#@*Giy-bxdHrkh=K0Xv7z&ksR_%59LGjdpfbQ&&JJfj&+L30-tGP7 z+o4$S^G9rCO2{1(EeDUu4l*$H`)3hB#bEt}et3`Gp;!DK-l#d$zuaoBngly0(%mrw znW%;xm-<7~x+Q8$;XQoXVe<26fA0txZ68~7N0vPulKQTrufqg{*Qu|j8S&=Bun(K21)cp`ywublBu0?!4TtPAebnYcj1Zi+`o0?ur?<;?o=&pLYSI`r7% zTR>?i<`&D9tt$Pm78o2u3-ogX?f;&#qEs1HWvw0QCjp9mE)I#6K#K#v64k&iL52-o z|IWVz`%=yL^UJ|>EWe|E9t_eqFgwhMg0MAI@EPbi+>&u%5G6BP1BjEgZ8_ZfE# z1Wil8(@%3R8Y5G`Tr4SfTC`{GeL8sdfc9DXyUjMO-2X>sv{vSb@(fK#~mgmAQ<*LN>b%~2Xmj|PIrWTZ5^&=OFOqG zl|(90sAY@lqBCDMv*Wo!uAW`3D2nCfq-I|7%C|?e)-~#1oh;nm8&~tobH(~nb9r;q zHuc0blIrEPiF$L+oXM7o;=+Xg4#9yJ{s>Eu}QIF8N^Z53PHo7m?hU+_%X)=$e$i>BWy4(EV&>Z`dtRX`t|trA8^vn1 zxVo+xX;a9}DqGrUUdhZajBc;2XEmJ!*fq`0O{cZe!tSOp+mxJ1oKa_1xyr=c!sNV= zt_h-&*ut8yB8S+wb31m%No~wlB`Kdz)HG|So~dXB#hlC=>&vridUAdvzm{EJxJKu` z_&Y3cbXZPv!>v^#l=11i?Ng3ihmy=2u*-9lh}c)dc=%LUK-69TjkFZjEw|21YVbZk4r3@a{X+9VG$2}V&)h=4PZuv zuLf3&{VxdKzyt-wxdqcFA_Tq-?f_t3)=@23UPQWX)q(vy8)j2@3Gq4-u Date: Thu, 11 Apr 2024 10:54:27 +0200 Subject: [PATCH 30/38] Refactor n_floats in swarm_size --- schemas/VFrecovery-schema-metadata.json | 4 +- .../command_line_interface/group_predict.py | 25 +++----- vfrecovery/core/deployment_plan.py | 10 ++-- vfrecovery/core/predict.py | 37 +++++++----- vfrecovery/core/simulation_handler.py | 28 ++++----- vfrecovery/core/trajfile_handler.py | 58 ++++++++++--------- vfrecovery/core/utils.py | 14 ++++- vfrecovery/json/VFRschema_meta.py | 8 +-- vfrecovery/python_interface/predict.py | 6 +- 9 files changed, 100 insertions(+), 90 deletions(-) diff --git a/schemas/VFrecovery-schema-metadata.json b/schemas/VFrecovery-schema-metadata.json index 9b47094..465f62c 100644 --- a/schemas/VFrecovery-schema-metadata.json +++ b/schemas/VFrecovery-schema-metadata.json @@ -6,10 +6,10 @@ "format_version": { "const": "0.1" }, - "required": ["n_floats", "velocity_field", "vfconfig"], + "required": ["swarm_size", "velocity_field", "vfconfig"], "type": "object", "properties": { - "n_floats": { + "swarm_size": { "description": "Number of virtual floats simulated", "type": "integer" }, diff --git a/vfrecovery/command_line_interface/group_predict.py b/vfrecovery/command_line_interface/group_predict.py index f6a168f..d19f54f 100644 --- a/vfrecovery/command_line_interface/group_predict.py +++ b/vfrecovery/command_line_interface/group_predict.py @@ -20,7 +20,7 @@ def cli_group_predict() -> None: WMO is the float World Meteorological Organisation number. - CYC is the cycle number location to predict. If you want to simulate more than 1 cycle, use the `n_predictions` option (see below). + CYC is the cycle number to predict. If you want to simulate more than 1 cycle, use the `n_predictions` option (see below). """, epilog=""" Examples: @@ -34,7 +34,7 @@ def cli_group_predict() -> None: required=False, default='GLORYS', show_default=True, - help="Velocity field to use. Possible values are: 'GLORYS', 'ARMOR3D'", + help="Velocity field to use. Velocity data are downloaded with the Copernicus Marine Toolbox. Possible values are: 'GLORYS', 'ARMOR3D'", ) @click.option( "--output_path", @@ -43,15 +43,6 @@ def cli_group_predict() -> None: default=None, help="Simulation root data output folder [default: './vfrecovery_simulations_data']", ) -# @click.option( -# "-v", "--verbose", -# type=bool, -# required=False, -# is_flag=True, -# default=True, -# show_default=True, -# help="Display verbose information along the execution", -# ) @click.option( "--cfg_parking_depth", type=float, @@ -85,7 +76,7 @@ def cli_group_predict() -> None: help="Virtual cycle number to start free surface drift, inclusive", ) @click.option( - "-np", "--n_predictions", + "-n", "--n_predictions", type=int, required=False, default=0, @@ -93,15 +84,15 @@ def cli_group_predict() -> None: help="Number of profiles to predict after cycle specified with argument 'CYC'", ) @click.option( - "-nf", "--n_floats", + "-s", "--swarm_size", type=int, required=False, default=100, show_default=True, - help="Swarm size, i.e. the number of virtual floats simulated to make predictions", + help="Swarm size, i.e. the number of virtual floats simulated to make predictions for 1 real float", ) @click.option( - "-s", "--domain_min_size", + "-d", "--domain_min_size", type=float, required=False, default=5, @@ -144,7 +135,7 @@ def predict( cfg_cycle_duration, cfg_profile_depth, cfg_free_surface_drift, - n_floats, + swarm_size, domain_min_size, overwrite, lazy, @@ -171,7 +162,7 @@ def predict( cfg_cycle_duration=cfg_cycle_duration, cfg_profile_depth=cfg_profile_depth, cfg_free_surface_drift=cfg_free_surface_drift, - n_floats=n_floats, + swarm_size=swarm_size, domain_min_size=domain_min_size, overwrite=overwrite, lazy=lazy, diff --git a/vfrecovery/core/deployment_plan.py b/vfrecovery/core/deployment_plan.py index f73c071..edb9a20 100644 --- a/vfrecovery/core/deployment_plan.py +++ b/vfrecovery/core/deployment_plan.py @@ -3,7 +3,7 @@ from vfrecovery.json import Profile -def setup_deployment_plan(P: Profile, nfloats: int = 120) -> pd.DataFrame: +def setup_deployment_plan(P: Profile, swarm_size: int = 120) -> pd.DataFrame: """Create a deployment plan as a :class:`pandas.DataFrame` We will deploy a collection of virtual floats that are located around the real float with random perturbations in space and time @@ -19,13 +19,13 @@ def setup_deployment_plan(P: Profile, nfloats: int = 120) -> pd.DataFrame: # box = [lonc - rx / 2, lonc + rx / 2, latc - ry / 2, latc + ry / 2] a, b = lonc - rx / 2, lonc + rx / 2 - lon = (b - a) * np.random.random_sample((nfloats,)) + a + lon = (b - a) * np.random.random_sample((swarm_size,)) + a a, b = latc - ry / 2, latc + ry / 2 - lat = (b - a) * np.random.random_sample((nfloats,)) + a + lat = (b - a) * np.random.random_sample((swarm_size,)) + a a, b = 0, rt - dtim = (b - a) * np.random.random_sample((nfloats,)) + a + dtim = (b - a) * np.random.random_sample((swarm_size,)) + a dtim = np.round(dtim).astype(int) tim = pd.to_datetime([P.location.time + np.timedelta64(dt, 'h') for dt in dtim]) # dtim = (b-a) * np.random.random_sample((nfloats, )) + a @@ -38,7 +38,7 @@ def setup_deployment_plan(P: Profile, nfloats: int = 120) -> pd.DataFrame: # df = pd.DataFrame( - [tim, lat, lon, np.arange(0, nfloats) + 9000000, np.full_like(lon, 0), ['VF' for l in lon], ['?' for l in lon]], + [tim, lat, lon, np.arange(0, swarm_size) + 9000000, np.full_like(lon, 0), ['VF' for l in lon], ['?' for l in lon]], index=['date', 'latitude', 'longitude', 'wmo', 'cycle_number', 'institution_code', 'file']).T df['date'] = pd.to_datetime(df['date']) diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index 0524959..644c09e 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -5,10 +5,10 @@ import os import logging import json - +import pandas as pd from .simulation_handler import Simulation -from .utils import pp_obj +from .utils import pp_obj, get_a_log_filename root_logger = logging.getLogger("vfrecovery_root_logger") sim_logger = logging.getLogger("vfrecovery_simulation") @@ -48,7 +48,7 @@ def predict_function( cfg_cycle_duration: float, cfg_profile_depth: float, cfg_free_surface_drift: int, - n_floats: int, + swarm_size: int, domain_min_size: float, overwrite: bool, lazy: bool, @@ -69,7 +69,7 @@ def predict_function( cfg_cycle_duration cfg_profile_depth cfg_free_surface_drift - n_floats + swarm_size domain_min_size overwrite lazy @@ -110,22 +110,24 @@ def predict_function( [cyc.append(cyc[1] + n + 1) for n in range(n_predictions)] if output_path is None: - # output_path = "vfrecovery_sims" % pd.to_datetime('now', utc=True).strftime("%Y%m%d%H%M%S") - # output_path = os.path.sep.join(["vfrecovery_simulations_data", str(wmo), str(cyc[1])]) - output_path = "vfrecovery_simulations_data" + output_path = Path(__file__).parents[2].joinpath("vfrecovery_simulations_data") output_path = Path(output_path) output_path.mkdir(parents=True, exist_ok=True) # Set-up simulation logger - simlogfile = logging.FileHandler(os.path.join(output_path, "vfrecovery_simulations.log"), mode='a') + templogfile = get_a_log_filename(output_path, name='simulation_') + simlogfile = logging.FileHandler(templogfile, mode='a') simlogfile.setFormatter(logging.Formatter("%(asctime)s | %(levelname)s | %(name)s:%(filename)s | %(message)s", datefmt='%Y/%m/%d %I:%M:%S')) sim_logger.handlers = [] sim_logger.addHandler(simlogfile) + # Redirect all warnings to log files + logging.captureWarnings(True) + # S = Simulation(wmo, cyc, - n_floats=n_floats, + swarm_size=swarm_size, velocity=velocity, output_path=output_path, overwrite=overwrite, @@ -143,14 +145,17 @@ def predict_function( S.execute() S.predict() S.postprocess() - S.finish(execution_start, process_start) - return S.to_json() + S.finish(execution_start, process_start) # Save on disk in json file else: - log_this.info("This simulation already exists, stopping here !") - # Loading json results from previous run - with open(S.run_file, 'r') as f: - jsdata = json.load(f) - return json.dumps(jsdata, indent=4) + log_this.info("This simulation already exists, stop here and return existing results") + + # Move log file to the appropriate final destination: + templogfile.rename(get_a_log_filename(S.output_path)) + + # Load json results to return + with open(S.run_file, 'r') as f: + jsdata = json.load(f) + return json.dumps(jsdata, indent=4) diff --git a/vfrecovery/core/simulation_handler.py b/vfrecovery/core/simulation_handler.py index 3d216a7..443676e 100644 --- a/vfrecovery/core/simulation_handler.py +++ b/vfrecovery/core/simulation_handler.py @@ -12,7 +12,7 @@ from vfrecovery.json import MetaData, MetaDataSystem, MetaDataComputation from vfrecovery.downloaders import get_velocity_field -from .utils import ArgoIndex2jsProfile, get_simulation_suffix, get_domain, pp_obj +from .utils import ArgoIndex2jsProfile, get_domain, pp_obj from .floats_config import setup_floats_config from .deployment_plan import setup_deployment_plan from .trajfile_handler import Trajectories @@ -72,7 +72,7 @@ def __init__(self, wmo, cyc, **kwargs): # Create Simulation Meta-data class holder self.MD = MetaData.from_dict({ - 'n_floats': kwargs['n_floats'], + 'swarm_size': kwargs['swarm_size'], 'velocity_field': kwargs['velocity'], 'system': MetaDataSystem.auto_load(), 'vfconfig': None, # will be filled later @@ -94,7 +94,7 @@ def _instance2rec(self): 'velocity': {'name': self.MD.velocity_field, 'download': pd.to_datetime(self.ds_vel.attrs['access_date']), 'domain_size': self.domain_min_size}, - 'swarm_size': self.MD.n_floats, + 'swarm_size': self.MD.swarm_size, 'path_root': self.path_root, } @@ -161,9 +161,11 @@ def _setup_load_velocity_data(self, **kwargs): cycle_period = int(np.round(self.CFG.mission['cycle_duration'] / 24)) # Get the float cycle period (in days) self.n_days = (len(self.cyc)-1) * cycle_period - self.logger.info("Velocity field should cover %i cycles of %i hours" % (len(self.cyc)-1, 24 * cycle_period)) - self.logger.info("Connecting to %i days of %s velocity starting on %s" % ( - self.n_days, self.MD.velocity_field, self.P_obs[0].location.time)) + self.logger.info("Velocity field should cover %i cycles of %i hours (%i days)" % (len(self.cyc)-1, + 24 * cycle_period, + self.n_days)) + self.logger.info("Retrieve info for %s velocity starting on %s" % ( + self.MD.velocity_field, self.P_obs[0].location.time)) self.ds_vel, velocity_file, new_file = get_velocity_field(domain, self.P_obs[0].location.time, n_days=self.n_days, @@ -206,7 +208,7 @@ def setup(self, **kwargs): # self.run_file = self.output_path.joinpath("results.json") - self.logger.info("Simulation results will be registered under:\n%s" % self.run_file) + # self.logger.info("Simulation results will be registered under:\n%s" % self.run_file) self.logger.info("Check if such a simulation has already been registered: %s" % self.is_registered) self.logger.debug("Setup terminated") @@ -216,7 +218,7 @@ def setup(self, **kwargs): class Simulation_execute(Simulation_setup): def _execute_get_velocity(self): - self.logger.info("Create a velocity object") + self.logger.info("Create a velocity object (this can take a while)") self.VEL = Velocity(model='GLORYS12V1' if self.MD.velocity_field == 'GLORYS' else self.MD.velocity_field, src=self.ds_vel, logger=self.logger, @@ -230,8 +232,8 @@ def _execute_get_velocity(self): save=True, workdir=self.velocity_path ) - self.logger.info(fname) - self.logger.info(self.velocity_path.stem) + # self.logger.info(fname) + # self.logger.info(self.velocity_path.stem) # fname.rename( # str(fname).replace("velocity_%s" % self.VEL.name, # Path(self.velocity_file).name.replace(".nc", "") @@ -241,7 +243,7 @@ def _execute_get_velocity(self): def _execute_get_plan(self): # VirtualFleet, get a deployment plan: self.logger.info("Create a deployment plan") - df_plan = setup_deployment_plan(self.P_obs[0], nfloats=self.MD.n_floats) + df_plan = setup_deployment_plan(self.P_obs[0], swarm_size=self.MD.swarm_size) self.logger.info( "Set %i virtual floats to deploy (i.e. swarm size = %i)" % (df_plan.shape[0], df_plan.shape[0])) @@ -366,7 +368,7 @@ def postprocess(self): class Simulation(Simulation_postprocess): """Base class to execute the simulation/prediction workflow - >>> S = Simulation(wmo, cyc, n_floats=n_floats, velocity=velocity, output_path=Path('.')) + >>> S = Simulation(wmo, cyc, swarm_size=swarm_size, velocity=velocity, output_path=Path('.')) >>> S.setup() >>> S.execute() >>> S.predict() @@ -388,7 +390,7 @@ def finish(self, execution_start: float, process_start: float): self.logger.debug(pp_obj(self.MD.computation)) self.to_json(fp=self.run_file) - self.logger.info("Simulation results and analysis saved in: %s" % self.run_file) + self.logger.info("Simulation results and analysis saved in:\n%s" % self.run_file) self.register() self.logger.debug("Simulation recorded in registry") diff --git a/vfrecovery/core/trajfile_handler.py b/vfrecovery/core/trajfile_handler.py index cc78185..72a82da 100644 --- a/vfrecovery/core/trajfile_handler.py +++ b/vfrecovery/core/trajfile_handler.py @@ -47,7 +47,7 @@ class Trajectories: Examples: --------- T = Trajectories(traj_zarr_file) - T.n_floats + T.swarm_size T.sim_cycles df = T.to_index() df = T.get_index().add_distances() @@ -61,7 +61,7 @@ def __init__(self, zfile, **kwargs): self.logger = default_logger if 'logger' not in kwargs else kwargs['logger'] @property - def n_floats(self): + def swarm_size(self): # len(self.obj['trajectory']) return self.obj['trajectory'].shape[0] @@ -78,7 +78,7 @@ def sim_cycles(self): def __repr__(self): summary = [""] - summary.append("Swarm size: %i floats" % self.n_floats) + summary.append("Swarm size: %i floats" % self.swarm_size) start_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=0).values) end_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=-1).values) summary.append("Simulation length: %s, from %s to %s" % ( @@ -166,25 +166,26 @@ def worker(ds, cyc, x0, y0): (ds['cycle_phase'] >= 3).compute()) this_cyc = ds.where(mask, drop=True) - # Check if we didn't lose some particles: - if len(x0) > len(this_cyc.isel(obs=-1)['time'].values): - n = len(x0) - len(this_cyc.isel(obs=-1)['time'].values) - raise ValueError("%i virtual floats did not make it to the end of required cycles. " - "They probably reached the edge of the velocity field domain. You should " - "try to increase the domain size of the simulation." % n) - if len(this_cyc['time']) > 0: - data = { - 'date': this_cyc.isel(obs=-1)['time'].values, - 'latitude': this_cyc.isel(obs=-1)['lat'].values, - 'longitude': this_cyc.isel(obs=-1)['lon'].values, - 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, - 'cyc': cyc, - # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, - 'deploy_lon': x0, - 'deploy_lat': y0, - } - return pd.DataFrame(data) + + # Check if we didn't lose some particles: + n = len(x0) - len(this_cyc.isel(obs=-1)['time'].values) + if n > 0: + raise ValueError("%i virtual floats did not make all required cycles. They probably reached " + "the edge of the velocity field domain. You should try to increase the domain " + "size of the simulation." % n) + else: + data = { + 'date': this_cyc.isel(obs=-1)['time'].values, + 'latitude': this_cyc.isel(obs=-1)['lat'].values, + 'longitude': this_cyc.isel(obs=-1)['lon'].values, + 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, + 'cyc': this_cyc.isel(obs=-1)['cycle_number'].values, + # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, + 'deploy_lon': x0, + 'deploy_lat': y0, + } + return pd.DataFrame(data) else: return None @@ -195,11 +196,14 @@ def worker(ds, cyc, x0, y0): df = worker(self.obj, cyc, deploy_lon, deploy_lat) rows.append(df) rows = [r for r in rows if r is not None] - df = pd.concat(rows).reset_index() - df['wmo'] = df['wmo'].astype(int) - df['cyc'] = df['cyc'].astype(int) - # df['cycle_phase'] = df['cycle_phase'].astype(int) - self._index = df + if len(rows) > 0: + df = pd.concat(rows).reset_index() + df['wmo'] = df['wmo'].astype(int) + df['cyc'] = df['cyc'].astype(int) + # df['cycle_phase'] = df['cycle_phase'].astype(int) + self._index = df + else: + raise ValueError("") return self._index @@ -424,7 +428,7 @@ def pairs_pdf(longitude, latitude): line1 = "Simulation made with %s and %i virtual floats" % (this_args.velocity, this_args.nfloats) else: line0 = "VirtualFleet recovery swarm simulation for cycle %i" % virtual_cycle_number - line1 = "Simulation made with %i virtual floats" % (self.n_floats) + line1 = "Simulation made with %i virtual floats" % (self.swarm_size) fig.suptitle("%s\n%s" % (line0, line1), fontsize=15) plt.tight_layout() diff --git a/vfrecovery/core/utils.py b/vfrecovery/core/utils.py index ca91eb2..9b92c90 100644 --- a/vfrecovery/core/utils.py +++ b/vfrecovery/core/utils.py @@ -64,8 +64,8 @@ def ArgoIndex2jsProfile(a_wmo, a_cyc, cache:bool=False, cachedir:str='.') -> Lis def get_simulation_suffix(md: MetaData) -> str: """Compose a simulation unique ID for output files""" # suf = '%s_%i' % (this_args.velocity, this_args.nfloats) - suf = 'VEL%s_NFL%i_CYT%i_PKD%i_PFD%i_FSD%i' % (md.velocity_field, - md.n_floats, + suf = 'VEL%s_SWS%i_CYT%i_PKD%i_PFD%i_FSD%i' % (md.velocity_field, + md.swarm_size, int(md.vfconfig.mission['cycle_duration']), int(md.vfconfig.mission['parking_depth']), int(md.vfconfig.mission['profile_depth']), @@ -99,4 +99,12 @@ def make_hashable(o): if isinstance(o, (set, frozenset)): return tuple(sorted(make_hashable(e) for e in o)) - return o \ No newline at end of file + return o + + +def get_a_log_filename(op, name='simulation'): + fname = lambda i: "%s%0.3d.log" % (name, i) + i = 1 + while op.joinpath(fname(i)).exists(): + i += 1 + return op.joinpath(fname(i)) \ No newline at end of file diff --git a/vfrecovery/json/VFRschema_meta.py b/vfrecovery/json/VFRschema_meta.py index 46f9f88..f3dfbbe 100644 --- a/vfrecovery/json/VFRschema_meta.py +++ b/vfrecovery/json/VFRschema_meta.py @@ -86,7 +86,7 @@ def from_dict(obj: Dict) -> 'MetaDataComputation': class MetaData(VFvalidators): - n_floats: int = None + swarm_size: int = None velocity_field: str = None vfconfig: VFschema_configuration = None computation: MetaDataComputation = None @@ -94,14 +94,14 @@ class MetaData(VFvalidators): schema: str = "VFrecovery-schema-metadata" description: str = "A set of meta-data to describe one simulation" - required: List = ["n_floats", "velocity_field", "vfconfig"] + required: List = ["swarm_size", "velocity_field", "vfconfig"] properties: List = ["description", - "n_floats", "velocity_field", + "swarm_size", "velocity_field", "vfconfig", "computation", "system"] def __init__(self, **kwargs): super().__init__(**kwargs) - self._is_integer(self.n_floats) + self._is_integer(self.swarm_size) if 'vfconfig' not in kwargs: self.vfconfig = None diff --git a/vfrecovery/python_interface/predict.py b/vfrecovery/python_interface/predict.py index db5498f..d0c0055 100644 --- a/vfrecovery/python_interface/predict.py +++ b/vfrecovery/python_interface/predict.py @@ -14,7 +14,7 @@ def predict( cfg_cycle_duration: float = None, cfg_profile_depth: float = None, cfg_free_surface_drift: int = 9999, - n_floats: int = 100, + swarm_size: int = 100, domain_min_size: float = 5., overwrite: bool = False, lazy: bool = True, @@ -35,7 +35,7 @@ def predict( cfg_cycle_duration cfg_profile_depth cfg_free_surface_drift - n_floats + swarm_size domain_min_size overwrite lazy @@ -56,7 +56,7 @@ def predict( cfg_cycle_duration=cfg_cycle_duration, cfg_profile_depth=cfg_profile_depth, cfg_free_surface_drift=cfg_free_surface_drift, - n_floats=n_floats, + swarm_size=swarm_size, domain_min_size=domain_min_size, overwrite=overwrite, lazy=lazy, From 035d9e9e64b19cab0faa49196d971fd017a811a6 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Thu, 11 Apr 2024 10:54:42 +0200 Subject: [PATCH 31/38] Update .gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c61f7bc..8a7b11a 100644 --- a/.gitignore +++ b/.gitignore @@ -143,4 +143,5 @@ cli/build-pypi *.npi* cli/vfrecov/ webapi/myapp/static/data -vfrecovery_simulations_data/ \ No newline at end of file +vfrecovery_simulations_data/ +vfrecovery/static/assets/simulations_registry.pkl From f15f771bb1ebab92ea3ebae562576f7671cb61a9 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Thu, 11 Apr 2024 10:55:10 +0200 Subject: [PATCH 32/38] default velocity depth loaded down to 6000 --- vfrecovery/downloaders/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vfrecovery/downloaders/core.py b/vfrecovery/downloaders/core.py index 07a5408..331912b 100644 --- a/vfrecovery/downloaders/core.py +++ b/vfrecovery/downloaders/core.py @@ -11,6 +11,7 @@ def get_velocity_field(a_box, n_days=1, output='.', dataset='ARMOR3D', + max_depth=6000, logger=None, lazy=True, ) -> tuple: @@ -23,6 +24,7 @@ def get_velocity_field(a_box, n_days output dataset + max_depth logger lazy @@ -46,7 +48,7 @@ def get_velocity_filename(dataset, n_days): # Make an instance # (we add a 1-day security delay at the beginning to make sure that we have velocity at the deployment time) - loader = loader(a_box, a_date - pd.Timedelta(1, 'D'), n_days=n_days+1, logger=logger) + loader = loader(a_box, a_date - pd.Timedelta(1, 'D'), n_days=n_days+1, logger=logger, max_depth=max_depth) # Load data from the Copernicus Marine Data store: ds = loader.to_xarray() # Lazy by default From 61ec29cc57be5128a4863a63b8a5fbc41ebc6c56 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Thu, 11 Apr 2024 10:55:25 +0200 Subject: [PATCH 33/38] Fix glorys dataset name --- vfrecovery/downloaders/glorys.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vfrecovery/downloaders/glorys.py b/vfrecovery/downloaders/glorys.py index 70c2f29..d5958f1 100644 --- a/vfrecovery/downloaders/glorys.py +++ b/vfrecovery/downloaders/glorys.py @@ -116,7 +116,7 @@ def __init__(self, box, start_date, n_days=1, max_depth=2500, **kwargs): dt = pd.Timedelta(n_days, 'D') if n_days > 1 else pd.Timedelta(0, 'D') if start_date + dt <= pd.to_datetime('2021-01-09', utc=True): self._loader = self._get_reanalysis - self.dataset_id = "cmems_mod_glo_phy_my_0.083_P1D-m" + self.dataset_id = "cmems_mod_glo_phy_my_0.083deg_P1D-m" else: self._loader = self._get_forecast self.dataset_id = "cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m" From 24c310c6acd69e39f8797154975468f3495c50b8 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Thu, 11 Apr 2024 10:55:33 +0200 Subject: [PATCH 34/38] misc --- README.md | 42 ++++++++++++++++--- vfrecovery/command_line_interface/group_db.py | 1 + 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index bdc3b18..24bb88e 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ More about Argo floats recovery in here: ## Command Line Interface -Primary groups of commands are ``predict`` and ``describe``. +Primary groups of commands are ``predict``, ``describe`` and ``db``. ### vfrecovery predict ``` @@ -66,11 +66,14 @@ Options: ### vfrecovery describe ``` -Usage: vfrecovery describe [OPTIONS] WMO [CYC]... +Usage: vfrecovery describe [OPTIONS] TARGET WMO [CYC]... - Returns data about an existing VirtualFleet-Recovery prediction + TARGET select what is to be described. A string in: ['obs', 'velocity', + 'run']. - Data could be a JSON file, specific metrics or images + WMO is the float World Meteorological Organisation number + + CYC is the cycle number location to restrict description to Options: --log-level [DEBUG|INFO|WARN|ERROR|CRITICAL|QUIET] @@ -81,11 +84,38 @@ Options: Examples: - vfrecovery describe 6903091 + vfrecovery describe velocity 6903091 - vfrecovery describe 6903091 112 + vfrecovery describe obs 6903091 112 ``` +### vfrecovery db + +``` +Usage: vfrecovery db [OPTIONS] ACTION + + Internal simulation database helper + +Options: + --log-level [DEBUG|INFO|WARN|ERROR|CRITICAL|QUIET] + Set the details printed to console by the + command (based on standard logging library). + [default: INFO] + -i, --index INTEGER Record index to work with + -h, --help Show this message and exit. + + Examples: + + vfrecovery db info + + vfrecovery db read + + vfrecovery db read --index 3 + + vfrecovery db drop +``` + + ## Python interface diff --git a/vfrecovery/command_line_interface/group_db.py b/vfrecovery/command_line_interface/group_db.py index 1ca1cc2..21cb38c 100644 --- a/vfrecovery/command_line_interface/group_db.py +++ b/vfrecovery/command_line_interface/group_db.py @@ -20,6 +20,7 @@ def cli_group_db() -> None: "db", short_help="Helper for VirtualFleet-Recovery simulations database", help=""" + Internal simulation database helper """, epilog=""" From 9f3d6a9214da1568360b10dcc2a8c080badd9c9e Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 12 Apr 2024 10:58:23 +0200 Subject: [PATCH 35/38] misc --- schemas/VFrecovery-schema-location.json | 2 +- schemas/VFrecovery-schema-profile.json | 3 +- schemas/VFrecovery-schema-trajectory.json | 20 +++++++ vfrecovery/command_line_interface/group_db.py | 11 +++- .../command_line_interface/group_describe.py | 15 +++-- vfrecovery/core/db.py | 60 +++++++++++++++---- vfrecovery/core/predict.py | 2 +- vfrecovery/core/trajfile_handler.py | 19 +++--- vfrecovery/core/utils.py | 2 +- vfrecovery/json/VFRschema.py | 4 +- vfrecovery/json/VFRschema_profile.py | 50 +++++++++++++++- vfrecovery/json/__init__.py | 2 +- 12 files changed, 155 insertions(+), 35 deletions(-) create mode 100644 schemas/VFrecovery-schema-trajectory.json diff --git a/schemas/VFrecovery-schema-location.json b/schemas/VFrecovery-schema-location.json index a6d6663..ac898dd 100644 --- a/schemas/VFrecovery-schema-location.json +++ b/schemas/VFrecovery-schema-location.json @@ -13,7 +13,7 @@ "type": "number", "minimum": -180, "maximum": 180, - "description": "Longitude of the geo-location, [-180-180] convention" + "description": "Longitude of the geo-location, [-180/180] convention" }, "latitude": { "type": "number", diff --git a/schemas/VFrecovery-schema-profile.json b/schemas/VFrecovery-schema-profile.json index 072eba4..fcdd2cb 100644 --- a/schemas/VFrecovery-schema-profile.json +++ b/schemas/VFrecovery-schema-profile.json @@ -39,6 +39,7 @@ "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-metrics.json" }, "dependencies": { - "virtual_cycle_number": ["metrics"]} + "virtual_cycle_number": ["metrics"] + } } } diff --git a/schemas/VFrecovery-schema-trajectory.json b/schemas/VFrecovery-schema-trajectory.json new file mode 100644 index 0000000..ab2cdca --- /dev/null +++ b/schemas/VFrecovery-schema-trajectory.json @@ -0,0 +1,20 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-trajectory.json", + "title": "VirtualFleet-Recovery trajectory", + "description": "Represents two or more VirtualFleet-Recovery locations that share a relationship", + "format_version": { + "const": "0.1" + }, + "required": [ "locations" ], + "type": "object", + "properties": { + "locations": { + "type": "array", + "items": { + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-location.json" + }, + "uniqueItems": false + } + } +} diff --git a/vfrecovery/command_line_interface/group_db.py b/vfrecovery/command_line_interface/group_db.py index 21cb38c..051aa6c 100644 --- a/vfrecovery/command_line_interface/group_db.py +++ b/vfrecovery/command_line_interface/group_db.py @@ -71,6 +71,10 @@ def db( if root_logger.isEnabledFor(logging.DEBUG): root_logger.debug("DEBUG mode activated") + # Validate arguments: + if action.lower() not in ["read", "info", "drop"]: + raise ValueError("The first argument ACTION must be one in ['read', 'info', 'drop']") + if action == 'read': df = DB.read_data() if index is not None: @@ -82,8 +86,11 @@ def db( click.secho("Row index #%i:" % irow, fg='green') click.echo(row.T.to_string()) - if action == 'drop': + elif action == 'drop': DB.clear() - if action == 'info': + elif action == 'info': click.echo(DB.info()) + + else: + raise click.BadParameter("Unknown DB action '%s'" % action) diff --git a/vfrecovery/command_line_interface/group_describe.py b/vfrecovery/command_line_interface/group_describe.py index caf9652..b62d505 100644 --- a/vfrecovery/command_line_interface/group_describe.py +++ b/vfrecovery/command_line_interface/group_describe.py @@ -3,6 +3,7 @@ from argopy.utils import is_wmo, is_cyc, check_cyc, check_wmo import argopy.plot as argoplot from argopy import ArgoIndex +from pathlib import Path from vfrecovery.utils.misc import list_float_simulation_folders from vfrecovery.core.db import DB @@ -85,6 +86,9 @@ def describe( elif target == 'run': describe_run(wmo, cyc) + else: + raise click.BadParameter("Unknown describe target '%s'" % target) + def describe_run(wmo, cyc): partial_data = {'wmo': wmo} @@ -94,14 +98,13 @@ def describe_run(wmo, cyc): def describe_velocity(wmo, cyc): + cyc = cyc[0] if len(cyc) > 0 else None - # List folders to examine: - plist = list_float_simulation_folders(wmo, cyc) + for ii, item in DB.from_dict({'wmo': wmo, 'cyc': cyc}).items: + p = Path(item.path_root).joinpath(item.path_obj.velocity) - # List all available velocity files: - for c in plist.keys(): - p = plist[c] - click.secho("Velocity data for WMO=%s / CYC=%s:" % (wmo, c), fg='blue') + click.secho("Velocity data for WMO=%s / CYC=%s / DOMAIN-SIZE=%0.2f / DOWNLOAD-DATE=%s" + % (item.wmo, item.cyc, item.velocity['domain_size'], item.velocity['download']), fg='blue') click.secho("\tNetcdf files:") vlist = sorted(p.glob("velocity_*.nc")) diff --git a/vfrecovery/core/db.py b/vfrecovery/core/db.py index accfe13..6adc9c7 100644 --- a/vfrecovery/core/db.py +++ b/vfrecovery/core/db.py @@ -21,7 +21,7 @@ This first implementation relies on a simple local pickle file with a panda dataframe """ -from typing import List, Dict +from typing import List, Dict, Iterable, Hashable from virtualargofleet import FloatConfiguration from pathlib import Path import pandas as pd @@ -167,7 +167,14 @@ class DB: >>> DB.isconnected() >>> DB.read_data() # Return db content as :class:`pd.DataFrame` - >>> data = {'wmo': 6903091, 'cyc': 120, 'n_predictions': 0, 'cfg': FloatConfiguration('recovery'), 'velocity': {'name': 'GLORYS', 'download': pd.to_datetime('now', utc=True), 'domain_size': 5}, 'path_root': Path('.'), 'swarm_size': 1000} + >>> data = {'wmo': 6903091, 'cyc': 120, 'n_predictions': 0, + >>> 'cfg': FloatConfiguration('recovery'), + >>> 'velocity': {'name': 'GLORYS', + >>> 'download': pd.to_datetime('now', utc=True), + >>> 'domain_size': 5}, + >>> 'path_root': Path('.'), + >>> 'swarm_size': 1000} + >>> >>> DB.from_dict(data).checkin() # save to db >>> DB.from_dict(data).checkout() # delete from db >>> DB.from_dict(data).checked @@ -198,9 +205,6 @@ class DB: "simulations_registry.pkl") def __init__(self, **kwargs): - # for key in self.required: - # if key not in kwargs: - # raise ValueError("Missing '%s' property" % key) for key in kwargs: if key in self.properties: setattr(self, key, kwargs[key]) @@ -252,7 +256,7 @@ def init(cls): return cls @classmethod - def connect(cls): + def connect(cls) -> "DB": """Connect to database and refresh data holder""" if not cls.isconnected(): cls.init() @@ -281,13 +285,14 @@ def connect(cls): # df.apply(has_result_file, axis=1) @classmethod - def read_data(cls): + def read_data(cls) -> pd.DataFrame: """Return database content as a :class:`pd.DataFrame`""" cls.connect() return cls._data @classmethod - def exists(cls, dict_of_values): + def exists(cls, dict_of_values) -> bool: + """Return True if an exact match on all properties is found""" df = cls.read_data() v = df.iloc[:, 0] == df.iloc[:, 0] for key, value in dict_of_values.items(): @@ -319,7 +324,8 @@ def del_data(cls, row): df.to_pickle(cls.dbfile) @classmethod - def get_data(cls, row): + def get_data(cls, row) -> pd.DataFrame: + """Return records matching no-None properties""" df = cls.read_data() mask = df.iloc[:, 0] == df.iloc[:, 0] for key in row: @@ -327,10 +333,6 @@ def get_data(cls, row): mask &= df[key] == row[key] return df[mask] - @classmethod - def info(cls) -> str: - return cls.__repr__(cls) - def __repr__(self): self.connect() summary = [""] @@ -341,6 +343,10 @@ def __repr__(self): return "\n".join(summary) + @classmethod + def info(cls) -> str: + return cls.__repr__(cls) + @staticmethod def from_dict(obj: Dict) -> "DB": return DB(**obj) @@ -367,6 +373,29 @@ def _instance2row(self): return row + @classmethod + def _row2dict(self, row) -> dict: + """Convert a db row to a dictionary input""" + data = {} + data.update({'wmo': row['wmo']}) + data.update({'cyc': row['cyc']}) + data.update({'n_predictions': row['n_predictions']}) + + cfg = FloatConfiguration('recovery') + for key in cfg.mission: + cfg.update(key, row["cfg_%s" % key]) + data.update({'cfg': cfg}) + + vel = {'name': None, 'download': None, 'domain_size': None} + for key in vel: + vel.update({key: row["velocity_%s" % key]}) + data.update({'velocity': vel}) + + data.update({'swarm_size': row['swarm_size']}) + data.update({'path_root': row['path_root']}) + + return data + def checkin(self): """Add one new record to the database""" new_row = self._instance2row() @@ -411,3 +440,8 @@ def path_obj(self): def record(self) -> pd.DataFrame: row = self._instance2row() return self.get_data(row) + + @property + def items(self) -> Iterable[tuple[Hashable, "DB"]]: + for irow, df_row in self.record.iterrows(): + yield irow, DB.from_dict(self._row2dict(df_row)) diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py index 644c09e..b604a05 100644 --- a/vfrecovery/core/predict.py +++ b/vfrecovery/core/predict.py @@ -115,7 +115,7 @@ def predict_function( output_path.mkdir(parents=True, exist_ok=True) # Set-up simulation logger - templogfile = get_a_log_filename(output_path, name='simulation_') + templogfile = get_a_log_filename(output_path) simlogfile = logging.FileHandler(templogfile, mode='a') simlogfile.setFormatter(logging.Formatter("%(asctime)s | %(levelname)s | %(name)s:%(filename)s | %(message)s", datefmt='%Y/%m/%d %I:%M:%S')) diff --git a/vfrecovery/core/trajfile_handler.py b/vfrecovery/core/trajfile_handler.py index 72a82da..199e8d1 100644 --- a/vfrecovery/core/trajfile_handler.py +++ b/vfrecovery/core/trajfile_handler.py @@ -11,7 +11,7 @@ from vfrecovery.utils.misc import get_cfg_str from vfrecovery.plots.utils import map_add_features, save_figurefile -from vfrecovery.json import Profile +from vfrecovery.json import Profile, Location from vfrecovery.json import Metrics, TrajectoryLengths, PairwiseDistances, PairwiseDistancesState @@ -162,8 +162,9 @@ def to_index(self) -> pd.DataFrame: deploy_lon, deploy_lat = self.obj.isel(obs=0)['lon'].values, self.obj.isel(obs=0)['lat'].values def worker(ds, cyc, x0, y0): - mask = np.logical_and((ds['cycle_number'] == cyc).compute(), - (ds['cycle_phase'] >= 3).compute()) + mask_end_of_cycle = np.logical_or((ds['cycle_phase'] == 3).compute(), (ds['cycle_phase'] == 4).compute()) + + mask = np.logical_and((ds['cycle_number'] == cyc).compute(), mask_end_of_cycle) this_cyc = ds.where(mask, drop=True) if len(this_cyc['time']) > 0: @@ -217,9 +218,13 @@ def index(self): self.get_index() return self._index - def add_distances(self, origin: Profile = None) -> pd.DataFrame: + def add_distances(self, origin: Location = None) -> pd.DataFrame: """Compute profiles distance to some origin + Parameters + ---------- + origin: :class:`Location` + Returns ------- :class:`pandas.dataframe` @@ -235,9 +240,9 @@ def add_distances(self, origin: Profile = None) -> pd.DataFrame: # Simulated cycles: # sim_cyc = np.unique(this_df['cyc']) - df = self._index + df = self.index - x2, y2 = origin.location.longitude, origin.location.latitude # real float initial position + x2, y2 = origin.longitude, origin.latitude # real float initial position df['distance'] = np.nan df['rel_lon'] = np.nan df['rel_lat'] = np.nan @@ -267,7 +272,7 @@ def worker(row): df = df.apply(worker, axis=1) self._index = df - return self._index + return self.index def analyse_pairwise_distances(self, virtual_cycle_number: int = 1, diff --git a/vfrecovery/core/utils.py b/vfrecovery/core/utils.py index 9b92c90..8760ea4 100644 --- a/vfrecovery/core/utils.py +++ b/vfrecovery/core/utils.py @@ -102,7 +102,7 @@ def make_hashable(o): return o -def get_a_log_filename(op, name='simulation'): +def get_a_log_filename(op, name='simulation_'): fname = lambda i: "%s%0.3d.log" % (name, i) i = 1 while op.joinpath(fname(i)).exists(): diff --git a/vfrecovery/json/VFRschema.py b/vfrecovery/json/VFRschema.py index 0a4b0ca..c60e373 100644 --- a/vfrecovery/json/VFRschema.py +++ b/vfrecovery/json/VFRschema.py @@ -70,7 +70,9 @@ def default(self, obj): return obj.isoformat() if isinstance(obj, np.float32): return float(obj) - if getattr(type(obj), '__name__') in ['Location', 'Profile', + if isinstance(obj, np.int64): + return int(obj) + if getattr(type(obj), '__name__') in ['Location', 'Profile', 'Trajectory', 'Metrics', 'TrajectoryLengths', 'PairwiseDistances', 'PairwiseDistancesState', 'SurfaceDrift', 'Transit', 'Location_error', 'MetaDataSystem', 'MetaDataComputation', 'MetaData']: diff --git a/vfrecovery/json/VFRschema_profile.py b/vfrecovery/json/VFRschema_profile.py index 262b3ca..18651f5 100644 --- a/vfrecovery/json/VFRschema_profile.py +++ b/vfrecovery/json/VFRschema_profile.py @@ -1,6 +1,6 @@ import pandas as pd import numpy as np -from typing import List, Dict +from typing import List, Dict, Iterable import argopy.plot as argoplot from .VFRschema import VFvalidators @@ -42,6 +42,16 @@ def __init__(self, **kwargs): def from_dict(obj: Dict) -> 'Location': return Location(**obj) + @staticmethod + def from_tuple(obj: tuple) -> 'Location': + if len(obj) == 2: + d = {'longitude': obj[0], 'latitude': obj[1]} + elif len(obj) == 3: + d = {'longitude': obj[0], 'latitude': obj[1], 'time': obj[2]} + elif len(obj) == 4: + d = {'longitude': obj[0], 'latitude': obj[1], 'time': obj[2], 'description': obj[3]} + return Location(**d) + class Profile(VFvalidators): location: Location @@ -99,3 +109,41 @@ def from_ArgoIndex(df: pd.DataFrame) -> List['Profile']: }) Plist.append(p) return Plist + + +class Trajectory(VFvalidators): + locations: List[Location] + + schema: str = "VFrecovery-schema-trajectory" + description: str = "Represents two or more VirtualFleet-Recovery locations that share a relationship" + required: List = ["locations"] + properties: List = ["locations", "description"] + + def __init__(self, **kwargs): + super().__init__(**kwargs) + if len(kwargs['locations']) < 2: + raise ValueError("'locations' must a be list with at least 2 elements") + L = [] + for location in kwargs['locations']: + if isinstance(location, dict): + loc = Location.from_dict(location) + elif isinstance(location, Iterable): + loc = Location.from_tuple(location) + else: + raise ValueError("'locations' item must be a dictionary or an Iterable") + L.append(loc) + self.locations = L + + @staticmethod + def from_dict(obj: Dict) -> 'Trajectory': + """ + + Parameters + ---------- + locations: List[Location] + """ + return Trajectory(**obj) + + @staticmethod + def from_tuple(obj: tuple) -> 'Trajectory': + return Trajectory(**{'locations': obj}) diff --git a/vfrecovery/json/__init__.py b/vfrecovery/json/__init__.py index d7153c1..f697ee7 100644 --- a/vfrecovery/json/__init__.py +++ b/vfrecovery/json/__init__.py @@ -1,4 +1,4 @@ -from .VFRschema_profile import Profile, Location +from .VFRschema_profile import Profile, Location, Trajectory from .VFRschema_simulation import Simulation from .VFRschema_meta import MetaData, MetaDataSystem, MetaDataComputation from .VFRschema_metrics import Metrics, TrajectoryLengths, PairwiseDistances, PairwiseDistancesState, Transit, SurfaceDrift, Location_error \ No newline at end of file From 74281508ae777c2c0af09fb99515751e0d2da161 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 12 Apr 2024 10:58:38 +0200 Subject: [PATCH 36/38] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8a7b11a..a2f6a82 100644 --- a/.gitignore +++ b/.gitignore @@ -145,3 +145,4 @@ cli/vfrecov/ webapi/myapp/static/data vfrecovery_simulations_data/ vfrecovery/static/assets/simulations_registry.pkl +vfrecovery/static/assets/simulations_registry.pkl From 5b4096d5b45e474238a17fa3836f4a5111a774b7 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 12 Apr 2024 11:01:55 +0200 Subject: [PATCH 37/38] Update README.md --- README.md | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/README.md b/README.md index 24bb88e..22716d3 100644 --- a/README.md +++ b/README.md @@ -152,38 +152,6 @@ vfrecovery.predict( # API Design -## Making predictions - -```bash -vfrecovery predict WMO CYC -vfrecovery predict WMO CYC1 CYC2 CYC3 -``` - -Options: -```bash -vfrecovery predict --n_predictions 3 WMO CYC0 -vfrecovery predict -np 3 WMO CYC0 - -vfrecovery predict --n_floats 2000 WMO CYC -vfrecovery predict -nf 2000 WMO CYC - -vfrecovery predict --velocity GLORYS WMO CYC -vfrecovery predict -v GLORYS WMO CYC - -vfrecovery predict --cfg_parking_depth 200 WMO CYC - -vfrecovery predict --cfg_cycle_duration 60 WMO CYC - -vfrecovery predict --cfg_profile_depth 1000 WMO CYC -``` - -## Describe results - -```bash -vfrecovery describe velocity WMO CYC -vfrecovery describe obs WMO CYC1 CYC2 CYC3 -``` - ## Other possible commands ```bash From 35c96171e37aeea4c77c08adaee395515aae6e35 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Fri, 12 Apr 2024 11:02:44 +0200 Subject: [PATCH 38/38] Delete simulations_registry.pkl --- .../static/assets/simulations_registry.pkl | Bin 2562 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 vfrecovery/static/assets/simulations_registry.pkl diff --git a/vfrecovery/static/assets/simulations_registry.pkl b/vfrecovery/static/assets/simulations_registry.pkl deleted file mode 100644 index 28806df718ef0f70b5c8adabcf2220c9d50b03ab..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2562 zcmbtWOK;mo5SA>-j^B1(c90+idMI2pP-)9`ETD!TN@K^g6~~72DuTt5T#5@(B*TX- z1udE$YRA9=J!G3xZ~Y5xb5D;w6+QG{6zHkFwX;hprX(AM5enk$^PAz!H?#XZ_ub$9 zDfzH*4?6}9^1ADjyy@c_;jI4_4)JZ-e#t-lnt#csSQgbPK4;gH%c&C*-@$f}uVDvQ zh#zn^v|_t@bu;esm$374a|^Pm7UbXV;cOH|jz{&XO^_xw!D@EiiE3UWwg#HSr#feY z$n@Qs7MUjT1>~!>*!Waom&#VBtT*Rd^8IGNB{Ho>Q*xG*dO;dt!z03y9y-sDUL5jm zKBZ<@`cqymm)HM=N50fl$Tc;iW)3(@mv{)v*f}ktc1RuYLN2hWE@0FYEJ_O$wMV?I zL2G2{)nuFgwWYY*lFkyh&a)mfq2IcEg7&n~Ze0<&t?QsB`+tgPQ_->H)(2`kOjgZx z!zgX`9&eY!OW($@CrAO`(`IE(?dyh5y5srbFA^>(yRGa~&z%l4@W&6{!ocgrPT37} zPVH}pVV4s8t3?n^?EAO@v_?MBT_*_rNDm>d))?!9ypfY4%HQx(TH`TW(oL5_4DTjl z$kJbQaA*-Ybo2yLq<3}W?)IY{-rXRu1cKYI4s;lLp)ISVA;7z92zibAabA^gEejJ@ zuwEsGVD+_R{>HoV1`bI`YXtTWL*~o2+y)!g-|PJ=Tb9g)&)GcwoNN~*p-Pl`fc)_F z@C*3?WQAtkvv!1ctlci(oE_|_1WcLyb@cl;6P;4%5@jj}rGh-=W!8&ytXl+X7t2Nt z4I!bywjer`{v1#T+x^*{l^0`Hr;cSee+Jc|(_svm9jR2Ob$5^W4HQsdDGn)66G0p?XYVlEB-^k$au##?JRDk3+u_{Uasyi5f-W#;iuKJE zm4Z^7<}5Qkqi{A6vm7ak@(KLWj50H;%qj)W7Ry^=VCR7{4We{=RZ(W=VMJGE;GK{# zE&D}ysdFd4Uc-+Sqc|NbZz?m5`m?pdLvw4Z zRCTmFpY7dO%sIPovqJVhTD`ZxJ8c_P$C9>%TojebWU5ogAaxStH5|YJKY$#vkM4*u zQOkZ~xmH``P*xJDJ9Eb%jbjF8_G>OgRd2xIJnSIPCkE9e3E>3S%?g4UIMt2Fm(=9! zf*7=^Nf4=fL=Ul}Ls1KoX~y674R_zMT?}h3CdO)*I*?4ygmZ#@?AMx| zenD>JyDlioiZ*<9L=7%T!Lky&936-g_;+^YB!~OhsR+<*AroG<{w%{SA;f+Nrw7Xh zq3dzDXF!8MBP}tB0EU|vTuC6gL{wTAG*lxYmSK7Vmkk_-aP@*p*NaRWS7dVn+qSz8 WMIrLwDgsp<*;pfXz+VKBmj4e!7-f6_