diff --git a/.gitignore b/.gitignore index dbce53c..a2f6a82 100644 --- a/.gitignore +++ b/.gitignore @@ -142,4 +142,7 @@ cli/build-pypi *.nc *.npi* cli/vfrecov/ -webapi/myapp/static/data \ No newline at end of file +webapi/myapp/static/data +vfrecovery_simulations_data/ +vfrecovery/static/assets/simulations_registry.pkl +vfrecovery/static/assets/simulations_registry.pkl diff --git a/README.md b/README.md index 9c04712..22716d3 100644 --- a/README.md +++ b/README.md @@ -2,112 +2,178 @@ |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| | [![DOI](https://zenodo.org/badge/543618989.svg)](https://zenodo.org/badge/latestdoi/543618989) | -The goal of this repository is to provide a library to make Argo floats trajectory predictions easy, in order to facilitate recovery. -The library produces a prediction _patch_ or _cone_ that could be displayed on a map like here: https://floatrecovery.euro-argo.eu -More about Argo floats recovery in here: https://github.com/euroargodev/recovery/issues +The goal of this repository is to provide a CLI and Python library to make Argo floats trajectory predictions easy, in order to facilitate recovery. -New version compatible with [VirtualFleet 0.4.0](https://virtualfleet.readthedocs.io/en/latest/whats-new.html#v0-4-0-2-feb-2024) and using the [Copernicus Marine Toolbox](https://help.marine.copernicus.eu/en/collections/5821001-python-library-api) to retrieve GLORYS or ARMOR3D velocity fields. +More about Argo floats recovery in here: +- https://floatrecovery.euro-argo.eu +- https://github.com/euroargodev/recovery/issues -# Documentation (preliminary) -## Overall prediction procedure -1. Given a specific float cycle to predict ``C``, we extract: - - space/time position of the previous cycle ``C-1``, - - configuration parameters of the previous cycle ``C-1``, such as parking depth, profiling depth and cycling period using the EA API (but these can be overwritten if necessary). +# Documentation -2. We download the daily CMEMS velocity fields for a region around the previous cycle ``C-1`` coordinates +## Command Line Interface -3. We run a VirtualFleet simulation: - - where we use a large number of virtual floats located with a random perturbations around the float cycle ``C-1`` position in space/time - - for the cycle ``C-1`` duration +Primary groups of commands are ``predict``, ``describe`` and ``db``. -4. We compute the most probable position of the float cycle ``C`` and prediction metrics and figures. +### vfrecovery predict +``` +Usage: vfrecovery predict [OPTIONS] WMO CYC + + Execute the VirtualFleet-Recovery predictor + + WMO is the float World Meteorological Organisation number. + + CYC is the cycle number location to predict. If you want to simulate more + than 1 cycle, use the `n_predictions` option (see below). + +Options: + -v, --velocity TEXT Velocity field to use. Possible values are: + 'GLORYS', 'ARMOR3D' [default: GLORYS] + --output_path TEXT Simulation data output folder [default: + './vfrecovery_simulations_data//'] + --cfg_parking_depth FLOAT Virtual floats parking depth in db [default: + previous cycle value] + --cfg_cycle_duration FLOAT Virtual floats cycle duration in hours + [default: previous cycle value] + --cfg_profile_depth FLOAT Virtual floats profile depth in db [default: + previous cycle value] + --cfg_free_surface_drift INTEGER + Virtual cycle number to start free surface + drift, inclusive [default: 9999] + -np, --n_predictions INTEGER Number of profiles to predict after cycle + specified with argument 'CYC' [default: 0] + -nf, --n_floats INTEGER Swarm size, i.e. the number of virtual + floats simulated to make predictions + [default: 100] + -s, --domain_min_size FLOAT Minimal size (deg) of the simulation domain + around the initial float position [default: + 5] + --overwrite Should past simulation data be overwritten + or not, for a similar set of arguments + --lazy / --no-lazy Load velocity data in lazy mode (not saved + on file). [default: lazy] + --log_level [DEBUG|INFO|WARN|ERROR|CRITICAL|QUIET] + Set the details printed to console by the + command (based on standard logging library). + [default: INFO] + -h, --help Show this message and exit. + + Examples: + + vfrecovery predict 6903091 112 + ``` + +### vfrecovery describe -The reason why we make random perturbations of the float cycle ``C-1`` position is not because the float position is uncertain (with GPS it is fairly accurate most of the time), but because it is a cheap way to account for errors in the velocity field. Indeed, we assume that the _phase_ of the velocity field used to advect floats is the primary source of uncertainties to predict the final position. We do not account for velocity shear/strain errors at this point. +``` +Usage: vfrecovery describe [OPTIONS] TARGET WMO [CYC]... -## Installation + TARGET select what is to be described. A string in: ['obs', 'velocity', + 'run']. -Our goal is to distribute VFrecovery as a standalone pipy package. In the meantime, one need to work with this repo only. + WMO is the float World Meteorological Organisation number -- Download his repository: -```bash -git clone git@github.com:euroargodev/VirtualFleet_recovery.git -``` -- Add the ``cli`` folder to your path, eg: -```bash -export PATH="/Users/gmaze/git/github/euroargodev/VirtualFleet_recovery/cli:$PATH" -``` -- Make sure to get the appropriate Python 3.9 environment ([using this conda file](environment.yml)): -```bash -mamba env create -f environment.yml -``` -- Install the last VirtualFleet version: -```bash -git clone git@github.com:euroargodev/VirtualFleet.git -``` + CYC is the cycle number location to restrict description to -## Command line instructions +Options: + --log-level [DEBUG|INFO|WARN|ERROR|CRITICAL|QUIET] + Set the details printed to console by the + command (based on standard logging library). + [default: INFO] + -h, --help Show this message and exit. -### Usage -The ``recovery_prediction.py`` script allows making predictions, i.e. at this point to produce: -- a json data files with predictions information for machine/machine applications, -- and a few figures to indicate where the float will make surface contact and how the probability patch was created. + Examples: + + vfrecovery describe velocity 6903091 + + vfrecovery describe obs 6903091 112 + ``` + +### vfrecovery db -For a simple help, you can type: -``` -recovery_prediction.py -h ``` +Usage: vfrecovery db [OPTIONS] ACTION + + Internal simulation database helper + +Options: + --log-level [DEBUG|INFO|WARN|ERROR|CRITICAL|QUIET] + Set the details printed to console by the + command (based on standard logging library). + [default: INFO] + -i, --index INTEGER Record index to work with + -h, --help Show this message and exit. + + Examples: -To make prediction of where the 99th cycle of the 6902919 float will be, just type: + vfrecovery db info + + vfrecovery db read + + vfrecovery db read --index 3 + + vfrecovery db drop ``` -recovery_prediction.py 6902919 99 + + +## Python interface + + +### vfrecovery.predict + +```python +import vfrecovery + +wmo, cyc = 6903091, 126 +results = vfrecovery.predict(wmo, cyc) ``` -A few options are available: +Signature: ``` -usage: recovery_prediction.py [-h] [--nfloats NFLOATS] [--output OUTPUT] [--velocity VELOCITY] [--save_figure SAVE_FIGURE] [--save_sim SAVE_SIM] [--vf VF] [--json] - [--cfg_parking_depth CFG_PARKING_DEPTH] [--cfg_cycle_duration CFG_CYCLE_DURATION] - wmo cyc - -VirtualFleet recovery predictor - -positional arguments: - wmo Float WMO number - cyc Cycle number to predict - -optional arguments: - -h, --help show this help message and exit - --nfloats NFLOATS Number of virtual floats used to make the prediction, default: 2000 - --output OUTPUT Output folder, default: webAPI internal folder - --velocity VELOCITY Velocity field to use. Possible values are: 'ARMOR3D' (default), 'GLORYS' - --save_figure SAVE_FIGURE - Should we save figure on file or not ? Default: True - --save_sim SAVE_SIM Should we save the simulation on file or not ? Default: False - --vf VF Parent folder to the VirtualFleet repository clone - --json Use to only return a json file and stay quiet - --cfg_parking_depth CFG_PARKING_DEPTH - Virtual floats parking depth in [db], default: use previous cycle value - --cfg_cycle_duration CFG_CYCLE_DURATION - Virtual floats cycle duration in [hours], default: use previous cycle value - -This script can be used to make prediction of a specific float cycle position. - This script can be used on past or unknown float cycles. - Note that in order to download online velocity fields from the Copernicus Marine Data Store, you need to have the - appropriate credentials file setup. - -(c) Argo-France/Ifremer/LOPS, 2022-2024 +vfrecovery.predict( + wmo: int, + cyc: int, + velocity: str = 'GLORYS', + output_path: Union[str, pathlib.Path] = None, + n_predictions: int = 0, + cfg_parking_depth: float = None, + cfg_cycle_duration: float = None, + cfg_profile_depth: float = None, + cfg_free_surface_drift: int = 9999, + n_floats: int = 100, + domain_min_size: float = 5.0, + overwrite: bool = False, + lazy: bool = True, + log_level: str = 'INFO', +) ``` -So, don't forget to: -- set up your environment to be able to download velocity fields from the Copernicus Marine Toolbox -- use the option ``vf`` to specify where the VirtualFleet software has been cloned (this is temporary and will change once VirtualFleet will be available on Pypi). -### Example + +# API Design + +## Other possible commands + +```bash +vfrecovery meetwith "cruise_track.csv" WMO CYC0 +``` + +## Data storage +Simulation data are stored on disk under the following architecture: ``` -recovery_prediction.py 6902915 116 +./vfrecovery_simulations_data + |- vfrecovery_simulations.log + |- WMO + |----CYC + |----VELOCITY(NAME + DOWNLOAD_DATE + DOMAIN_SIZE) + |- velocity_file.nc + |- figure.png + |---- RUN_PARAMS(NP + CFG + NF) + |- float_configuration.json + |- trajectories.zarr + |- results.json + |- figure.png ``` -Below is an example of this prediction for the 99th cycle of the 6902919 float. -The really observed 99th cycle is shown at the tip of the arrow (red point) starting from the previous 98th cycle. -The VirtualFleet Recovery prediction is in the probabilistic red shading: the most probable position predicted is in the redder region. -![Figure](docs/img/vfrecov_predictions_recap_VELARMOR3D_NF2000_CYCDUR240_PDPTH1000.png) \ No newline at end of file + +This ensures that for a given velocity field, all possible simulations are unambiguously found under a single folder \ No newline at end of file diff --git a/cli/launch_webapi b/cli/launch_webapi deleted file mode 100755 index a05b90c..0000000 --- a/cli/launch_webapi +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env bash - -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -APIDIR="$SCRIPT_DIR/../webapi" -APIDIR=$(python -c "import os,sys; print(os.path.realpath(sys.argv[1]))" $APIDIR) -#echo $APIDIR - -POSITIONAL_ARGS=() -WHERETO="local" -while [[ $# -gt 0 ]]; do - case $1 in - -l|--local) - WHERETO="local" - shift # past argument - shift # past value - ;; - -p|--pacnet) - WHERETO="pacnet" - shift # past argument - shift # past value - ;; - -*|--*) - echo "Unknown option $1" - exit 1 - ;; - esac -done -#echo $WHERETO - -IP=$(ifconfig en12 | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*' | grep -v '127.0.0.1') -#echo $IP -case $WHERETO in - "pacnet") - IP="134.246.146.54" - ;; - -esac - -echo "Launching VirtualFleet-Recovery webapi with Flask on $WHERETO at $IP ..." - -export FLASK_DEBUG=True -export FLASK_APP=myapp -cd $APIDIR -flask -A myapp routes -flask -A myapp run --host=$IP - -exit 0 diff --git a/cli/recovery_prediction.py b/cli/recovery_prediction.py index 50079a6..859710b 100755 --- a/cli/recovery_prediction.py +++ b/cli/recovery_prediction.py @@ -48,1679 +48,6 @@ log = logging.getLogger("virtualfleet.recovery") -PREF = "\033[" -RESET = f"{PREF}0m" -class COLORS: - black = "30m" - red = "31m" - green = "32m" - yellow = "33m" - blue = "34m" - magenta = "35m" - cyan = "36m" - white = "37m" - - -def get_package_dir(): - fpath = Path(__file__) - return str(fpath.parent.parent) - - -def puts(text, color=None, bold=False, file=sys.stdout): - """Alternative to print, uses no color by default but accepts any color from the COLORS class. - - Parameters - ---------- - text - color=None - bold=False - file=sys.stdout - """ - if color is None: - txt = f'{PREF}{1 if bold else 0}m' + text + RESET - print(txt, file=file) - else: - txt = f'{PREF}{1 if bold else 0};{color}' + text + RESET - print(txt, file=file) - log.info(text) - - -def haversine(lon1, lat1, lon2, lat2): - """ - Calculate the great circle distance (in [km]) between two points - on the earth (specified in decimal degrees) - - see: https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points - - Parameters - ---------- - lon1 - lat1 - lon2 - lat2 - - Returns - ------- - km - """ - from math import radians, cos, sin, asin, sqrt - # convert decimal degrees to radians - lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) - - # haversine formula - dlon = lon2 - lon1 - dlat = lat2 - lat1 - a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2 - c = 2 * asin(sqrt(a)) - r = 6371 # Radius of earth in kilometers. - return c * r - - -def bearing(lon1, lat1, lon2, lat2): - """ - - Parameters - ---------- - lon1 - lat1 - lon2 - lat2 - - Returns - ------- - - """ - # from math import cos, sin, atan2, degrees - # b = atan2(cos(lat1) * sin(lat2) - sin(lat1) * cos(lat2) * cos(lon2 - lon1), sin(lon2 - lon1) * cos(lat2)) - # b = degrees(b) - # return b - - import pyproj - geodesic = pyproj.Geod(ellps='WGS84') - fwd_azimuth, back_azimuth, distance = geodesic.inv(lon1, lat1, lon2, lat2) - return fwd_azimuth - - -def strfdelta(tdelta, fmt): - """ - - Parameters - ---------- - tdelta - fmt - - Returns - ------- - - """ - d = {"days": tdelta.days} - d["hours"], rem = divmod(tdelta.seconds, 3600) - d["minutes"], d["seconds"] = divmod(rem, 60) - return fmt.format(**d) - - -def fixLON(x): - """Ensure a 0-360 longitude""" - if x < 0: - x = 360 + x - return x - - -def getSystemInfo(): - """Return system information as a dict""" - try: - info = {} - info['platform']=platform.system() - info['platform-release']=platform.release() - info['platform-version']=platform.version() - info['architecture']=platform.machine() - info['hostname']=socket.gethostname() - info['ip-address']=socket.gethostbyname(socket.gethostname()) - # info['mac-address']=':'.join(re.findall('..', '%012x' % uuid.getnode())) - info['processor']=platform.processor() - info['ram']=str(round(psutil.virtual_memory().total / (1024.0 **3)))+" GB" - return info - except Exception as e: - logging.exception(e) - - -def get_glorys_forecast_from_datarmor(a_box, a_start_date, n_days=1): - """Load Datarmor Global Ocean 1/12° Physics Analysis and Forecast updated Daily - - Fields: daily, from 2020-11-25T12:00 to 'now' + 5 days - Src: /home/ref-ocean-model-public/multiparameter/physic/global/cmems/global-analysis-forecast-phy-001-024 - Info: https://resources.marine.copernicus.eu/product-detail/GLOBAL_ANALYSISFORECAST_PHY_001_024/INFORMATION - - Parameters - ---------- - a_box - a_start_date - n_days - """ - def get_forecast_files(a_date, n_days=1): - file_list = [] - for n in range(0, n_days): - t = a_date + pd.Timedelta(n, 'D') - p = os.path.join(src, "%i" % t.year, "%0.3d" % t.day_of_year) - # print(p, os.path.exists(p)) - if os.path.exists(p): - file_list.append(sorted(glob.glob(os.path.join(p, "*.nc")))[0]) - return file_list - - def preprocess(this_ds): - idpt = np.argwhere(this_ds['depth'].values > 2000)[0][0] - ilon = np.argwhere(this_ds['longitude'].values >= a_box[0])[0][0], \ - np.argwhere(this_ds['longitude'].values >= a_box[1])[0][0] - ilat = np.argwhere(this_ds['latitude'].values >= a_box[2])[0][0], \ - np.argwhere(this_ds['latitude'].values >= a_box[3])[0][0] - this_ds = this_ds.isel({'depth': range(0, idpt), - 'longitude': range(ilon[0], ilon[1]), - 'latitude': range(ilat[0], ilat[1])}) - return this_ds - - root = "/home/ref-ocean-model-public" if not os.uname()[0] == 'Darwin' else "/Volumes/MODEL-PUBLIC/" - src = os.path.join(root, "multiparameter/physic/global/cmems/global-analysis-forecast-phy-001-024") - # puts("\t%s" % src, color=COLORS.green) - flist = get_forecast_files(a_start_date, n_days=n_days) - if len(flist) == 0: - raise ValueError("This float cycle is too old for this velocity field.") - glorys = xr.open_mfdataset(flist, preprocess=preprocess, combine='nested', concat_dim='time', parallel=True) - # - return glorys - - -class Armor3d: - """Global Ocean 1/4° Multi Observation Product ARMOR3D - - Product description: - https://data.marine.copernicus.eu/product/MULTIOBS_GLO_PHY_TSUV_3D_MYNRT_015_012 - - If start_date + n_days <= 2022-12-28: - Delivers the multi-year reprocessed (REP) weekly data - - otherwise: - Delivers the near-real-time (NRT) weekly data - - Examples - -------- - >>> Armor3d([-25, -13, 6.5, 13], pd.to_datetime('20091130', utc=True)).to_xarray() - >>> Armor3d([-25, -13, 6.5, 13], pd.to_datetime('20231121', utc=True), n_days=10).to_xarray() - - """ - - def __init__(self, box, start_date, n_days=1, max_depth=2500): - """ - Parameters - ---------- - box: list(float) - Define domain to load: [lon_min, lon_max, lat_min, lat_max] - start_date: :class:`pandas.Timestamp` - Starting date of the time series to load. Since ARMOR3D is weekly, the effective starting - date will be the first weekly period including the user-defined ``start_date`` - n_days: int (default=1) - Number of days to load data for. - max_depth: float (default=2500) - Maximum depth levels to load data for. - """ - self.box = box - self.start_date = start_date - self.n_days = n_days - self.max_depth = max_depth - - dt = pd.Timedelta(n_days, 'D') if n_days > 1 else pd.Timedelta(0, 'D') - if start_date + dt <= pd.to_datetime('2022-12-28', utc=True): - self._loader = self._get_rep - self.dataset_id = "dataset-armor-3d-rep-weekly" - self.time_axis = pd.Series(pd.date_range('19930106', '20221228', freq='7D').tz_localize("UTC")) - else: - self._loader = self._get_nrt - self.dataset_id = "dataset-armor-3d-nrt-weekly" - self.time_axis = pd.Series( - pd.date_range('20190102', pd.to_datetime('now', utc=True).strftime("%Y%m%d"), freq='7D').tz_localize( - "UTC")[0:-1]) - - if start_date < self.time_axis.iloc[0]: - raise ValueError('Date out of bounds') - elif start_date + dt > self.time_axis.iloc[-1]: - raise ValueError('Date out of bounds, %s > %s' % ( - start_date + dt, self.time_axis.iloc[-1])) - - def _get_this(self, dataset_id): - start_date = self.time_axis[self.time_axis <= self.start_date].iloc[-1] - if self.n_days == 1: - end_date = start_date - else: - end_date = \ - self.time_axis[self.time_axis <= self.start_date + (self.n_days + 1) * pd.Timedelta(1, 'D')].iloc[-1] - - ds = copernicusmarine.open_dataset( - dataset_id=dataset_id, - minimum_longitude=self.box[0], - maximum_longitude=self.box[1], - minimum_latitude=self.box[2], - maximum_latitude=self.box[3], - maximum_depth=self.max_depth, - start_datetime=start_date.strftime("%Y-%m-%dT%H:%M:%S"), - end_datetime=end_date.strftime("%Y-%m-%dT%H:%M:%S"), - variables=['ugo', 'vgo'] - ) - return ds - - def _get_rep(self): - """multi-year reprocessed (REP) weekly data - - Returns - ------- - :class:xarray.dataset - """ - return self._get_this(self.dataset_id) - - def _get_nrt(self): - """near-real-time (NRT) weekly data - - Returns - ------- - :class:xarray.dataset - """ - return self._get_this(self.dataset_id) - - def to_xarray(self): - """Load and return data as a :class:`xarray.dataset` - - Returns - ------- - :class:xarray.dataset - """ - return self._loader() - - def __repr__(self): - summary = [""] - summary.append("dataset_id: %s" % self.dataset_id) - summary.append("First day: %s" % self.start_date) - summary.append("N days: %s" % self.n_days) - summary.append("Domain: %s" % self.box) - summary.append("Max depth (m): %s" % self.max_depth) - return "\n".join(summary) - - -class Glorys: - """Global Ocean 1/12° Physics Re-Analysis or Forecast - - If start_date + n_days <= 2021-01-09: - Delivers the multi-year reprocessed (REP) daily data - https://resources.marine.copernicus.eu/product-detail/GLOBAL_MULTIYEAR_PHY_001_030 - - otherwise: - Delivers the near-real-time (NRT) Analysis and Forecast daily data - https://resources.marine.copernicus.eu/product-detail/GLOBAL_ANALYSISFORECAST_PHY_001_024 - - Examples - -------- - >>> Glorys([-25, -13, 6.5, 13], pd.to_datetime('20091130', utc=True)).to_xarray() - >>> Glorys([-25, -13, 6.5, 13], pd.to_datetime('20231121', utc=True), n_days=10).to_xarray() - - """ - - def __init__(self, box, start_date, n_days=1, max_depth=2500): - """ - Parameters - ---------- - box: list(float) - Define domain to load: [lon_min, lon_max, lat_min, lat_max] - start_date: :class:`pandas.Timestamp` - Starting date of the time series to load. - n_days: int (default=1) - Number of days to load data for. - max_depth: float (default=2500) - Maximum depth levels to load data for. - """ - self.box = box - self.start_date = start_date - self.n_days = n_days - self.max_depth = max_depth - - dt = pd.Timedelta(n_days, 'D') if n_days > 1 else pd.Timedelta(0, 'D') - if start_date + dt <= pd.to_datetime('2021-01-09', utc=True): - self._loader = self._get_reanalysis - self.dataset_id = "cmems_mod_glo_phy_my_0.083_P1D-m" - else: - self._loader = self._get_forecast - self.dataset_id = "cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m" - - def _get_this(self, dataset_id, dates): - ds = copernicusmarine.open_dataset( - dataset_id=dataset_id, - minimum_longitude=self.box[0], - maximum_longitude=self.box[1], - minimum_latitude=self.box[2], - maximum_latitude=self.box[3], - maximum_depth=self.max_depth, - start_datetime=dates[0].strftime("%Y-%m-%dT%H:%M:%S"), - end_datetime=dates[1].strftime("%Y-%m-%dT%H:%M:%S"), - variables=['uo', 'vo'] - ) - return ds - - def _get_forecast(self): - """ - Returns - ------- - :class:`xarray.dataset` - """ - start_date = self.start_date - if self.n_days == 1: - end_date = start_date - else: - end_date = start_date + pd.Timedelta(self.n_days - 1, 'D') - return self._get_this(self.dataset_id, [start_date, end_date]) - - def _get_reanalysis(self): - """ - Returns - ------- - :class:`xarray.dataset` - """ - start_date = self.start_date - if self.n_days == 1: - end_date = start_date - else: - end_date = self.start_date + pd.Timedelta(self.n_days - 1, 'D') - return self._get_this(self.dataset_id, [start_date, end_date]) - - def to_xarray(self): - """ Load and return data as a :class:`xarray.dataset` - Returns - ------- - :class:`xarray.dataset` - """ - return self._loader() - - def __repr__(self): - summary = [""] - summary.append("dataset_id: %s" % self.dataset_id) - summary.append("First day: %s" % self.start_date) - summary.append("N days: %s" % self.n_days) - summary.append("Domain: %s" % self.box) - summary.append("Max depth (m): %s" % self.max_depth) - return "\n".join(summary) - - -def get_velocity_field(a_box, a_date, n_days=1, output='.', dataset='ARMOR3D'): - """Return the velocity field as an :class:xr.Dataset, download if needed - - Parameters - ---------- - a_box - a_date - n_days - output - dataset - """ - def get_velocity_filename(dataset, n_days): - download_date = pd.to_datetime('now', utc='now').strftime("%Y%m%d") - fname = os.path.join(output, 'velocity_%s_%idays_%s.nc' % (dataset, n_days, download_date)) - return fname - - velocity_file = get_velocity_filename(dataset, n_days) - if not os.path.exists(velocity_file): - # Define Data loader: - loader = Armor3d if dataset == 'ARMOR3D' else Glorys - loader = loader(a_box, a_date, n_days=n_days) - puts(str(loader), color=COLORS.magenta) - - # Load data from Copernicus Marine Data store: - ds = loader.to_xarray() - - # Save on file for later re-used: - ds.to_netcdf(velocity_file) - else: - ds = xr.open_dataset(velocity_file) - - return ds, velocity_file - - -def get_HBOX(df_sim, dd=1): - """ - - Parameters - ---------- - dd: how much to extend maps outward the deployment 'box' - - Returns - ------- - list - """ - rx = df_sim['deploy_lon'].max() - df_sim['deploy_lon'].min() - ry = df_sim['deploy_lat'].max() - df_sim['deploy_lat'].min() - lonc, latc = df_sim['deploy_lon'].mean(), df_sim['deploy_lat'].mean() - box = [lonc - rx / 2, lonc + rx / 2, latc - ry / 2, latc + ry / 2] - ebox = [box[i] + [-dd, dd, -dd, dd][i] for i in range(0, 4)] # Extended 'box' - - return ebox - - -def get_EBOX(df_sim, df_plan, this_profile, s=1): - """Get a box for maps - - Use all data positions from DF_SIM to make sure all points are visible - Extend the domain by a 's' scaling factor of the deployment plan domain - - Parameters - ---------- - s: float, default:1 - - Returns - ------- - list - """ - box = [np.min([df_sim['deploy_lon'].min(), df_sim['longitude'].min(), df_sim['rel_lon'].min(), this_profile['longitude'].min()]), - np.max([df_sim['deploy_lon'].max(), df_sim['longitude'].max(), df_sim['rel_lon'].max(), this_profile['longitude'].max()]), - np.min([df_sim['deploy_lat'].min(), df_sim['latitude'].min(), df_sim['rel_lat'].min(), this_profile['latitude'].min()]), - np.max([df_sim['deploy_lat'].max(), df_sim['latitude'].max(), df_sim['rel_lat'].max(), this_profile['latitude'].max()])] - rx, ry = df_plan['longitude'].max() - df_plan['longitude'].min(), df_plan['latitude'].max() - df_plan['latitude'].min() - r = np.min([rx, ry]) - ebox = [box[0]-s*r, box[1]+s*r, box[2]-s*r, box[3]+s*r] - - return ebox - - -def get_cfg_str(a_cfg): - txt = "VFloat configuration: (Parking depth: %i [db], Cycle duration: %i [hours], Profile depth: %i [db])" % ( - a_cfg.mission['parking_depth'], - a_cfg.mission['cycle_duration'], - a_cfg.mission['profile_depth'], - ) - return txt - - -def save_figurefile(this_fig, a_name, folder='.'): - """ - - Parameters - ---------- - this_fig - a_name - - Returns - ------- - path - """ - figname = os.path.join(folder, "%s.png" % a_name) - log.debug("Saving %s ..." % figname) - this_fig.savefig(figname) - return figname - - -def map_add_profiles(this_ax, this_profile): - """ - - Parameters - ---------- - this_ax - - Returns - ------- - this_ax - """ - this_ax.plot(this_profile['longitude'][0], this_profile['latitude'][0], 'k.', markersize=10, markeredgecolor='w') - if this_profile.shape[0] > 1: - this_ax.plot(this_profile['longitude'][1], this_profile['latitude'][1], 'r.', markersize=10, markeredgecolor='w') - this_ax.arrow(this_profile['longitude'][0], - this_profile['latitude'][0], - this_profile['longitude'][1] - this_profile['longitude'][0], - this_profile['latitude'][1] - this_profile['latitude'][0], - length_includes_head=True, fc='k', ec='k', head_width=0.025, zorder=10) - - return this_ax - - -def map_add_features(this_ax): - """ - - Parameters - ---------- - this_ax - - Returns - ------- - this_ax - """ - argoplot.utils.latlongrid(this_ax) - this_ax.add_feature(argoplot.utils.land_feature, edgecolor="black") - return this_ax - - -def map_add_cyc_nb(this_ax, this_df, lon='lon', lat='lat', cyc='cyc', pos='bt', fs=6, color='black'): - """ Add cycle number labels next to axis - - Parameters - ---------- - ax - df - - Returns - ------- - list of text label - """ - t = [] - if pos == 'bt': - ha, va, label = 'center', 'top', "\n{}".format - if pos == 'tp': - ha, va, label = 'center', 'bottom', "{}\n".format - for irow, row in this_df.iterrows(): - this_t = this_ax.text(row[lon], row[lat], label(int(row[cyc])), ha=ha, va=va, fontsize=fs, color=color) - t.append(this_t) - return t - - -def figure_velocity(box, - vel, vel_name, this_profile, wmo, cyc, - save_figure=False, workdir='.'): - """ - - Parameters - ---------- - box - - Returns - ------- - None - """ - fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(20, 20), dpi=100, subplot_kw={'projection': ccrs.PlateCarree()}) - ax.set_extent(box) - ax = map_add_features(ax) - ax = map_add_profiles(ax, this_profile) - - vel.field.isel(time=0, depth=0).plot.quiver(x="longitude", y="latitude", - u=vel.var['U'], v=vel.var['V'], ax=ax, color='grey', alpha=0.5, - add_guide=False) - - txt = "starting from cycle %i, predicting cycle %i" % (cyc[0], cyc[1]) - ax.set_title( - "VirtualFleet recovery system for WMO %i: %s\n" - "%s velocity snapshot to illustrate the simulation domain\n" - "Vectors: Velocity field at z=%0.2fm, t=%s" % - (wmo, txt, vel_name, vel.field['depth'][0].values[np.newaxis][0], - pd.to_datetime(vel.field['time'][0].values).strftime("%Y/%m/%d %H:%M")), fontsize=15) - - plt.tight_layout() - if save_figure: - save_figurefile(fig, 'vfrecov_velocity_%s' % vel_name, workdir) - return fig, ax - - -def figure_positions(this_args, vel, df_sim, df_plan, this_profile, cfg, wmo, cyc, vel_name, - dd=1, save_figure=False, workdir='.'): - log.debug("Starts figure_positions") - ebox = get_HBOX(df_sim, dd=dd) - nfloats = df_plan.shape[0] - - fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(25, 7), dpi=120, - subplot_kw={'projection': ccrs.PlateCarree()}, - sharex=True, sharey=True) - ax = ax.flatten() - - for ix in [0, 1, 2]: - ax[ix].set_extent(ebox) - ax[ix] = map_add_features(ax[ix]) - - v = vel.field.isel(time=0).interp(depth=cfg.mission['parking_depth']).plot.quiver(x="longitude", - y="latitude", - u=vel.var['U'], - v=vel.var['V'], - ax=ax[ix], - color='grey', - alpha=0.5, - add_guide=False) - - ax[ix].plot(df_sim['deploy_lon'], df_sim['deploy_lat'], '.', - markersize=3, color='grey', alpha=0.1, markeredgecolor=None, zorder=0) - if ix == 0: - title = 'Velocity field at %0.2fm and deployment plan' % cfg.mission['parking_depth'] - v.set_alpha(1) - # v.set_color('black') - elif ix == 1: - x, y, c = df_sim['longitude'], df_sim['latitude'], df_sim['cyc'] - title = 'Final float positions' - # sc = ax[ix].plot(x, y, '.', markersize=3, color='cyan', alpha=0.9, markeredgecolor=None) - sc = ax[ix].scatter(x, y, c=c, s=3, alpha=0.9, edgecolors=None) - elif ix == 2: - x, y, c = df_sim['rel_lon'], df_sim['rel_lat'], df_sim['cyc'] - title = 'Final floats position relative to last float position' - # sc = ax[ix].plot(x, y, '.', markersize=3, color='cyan', alpha=0.9, markeredgecolor=None) - sc = ax[ix].scatter(x, y, c=c, s=3, alpha=0.9, edgecolors=None) - - ax[ix] = map_add_profiles(ax[ix], this_profile) - ax[ix].set_title(title) - - fig.suptitle("VirtualFleet recovery prediction for WMO %i: starting from cycle %i, predicting cycle %s\n%s" % - (wmo, cyc[0], cyc[1:], get_cfg_str(cfg)), fontsize=15) - plt.tight_layout() - if save_figure: - save_figurefile(fig, "vfrecov_positions_%s" % get_sim_suffix(this_args, cfg), workdir) - return fig, ax - - -def setup_deployment_plan(a_profile, a_date, nfloats=15000): - # We will deploy a collection of virtual floats that are located around the real float with random perturbations in space and time - - # Amplitude of the profile position perturbations in the zonal (deg), meridional (deg), and temporal (hours) directions: - rx = 0.5 - ry = 0.5 - rt = 0 - - # - lonc, latc = a_profile - # box = [lonc - rx / 2, lonc + rx / 2, latc - ry / 2, latc + ry / 2] - - a, b = lonc - rx / 2, lonc + rx / 2 - lon = (b - a) * np.random.random_sample((nfloats,)) + a - - a, b = latc - ry / 2, latc + ry / 2 - lat = (b - a) * np.random.random_sample((nfloats,)) + a - - a, b = 0, rt - dtim = (b - a) * np.random.random_sample((nfloats,)) + a - dtim = np.round(dtim).astype(int) - tim = pd.to_datetime([a_date + np.timedelta64(dt, 'h') for dt in dtim]) - # dtim = (b-a) * np.random.random_sample((nfloats, )) + a - # dtim = np.round(dtim).astype(int) - # tim2 = pd.to_datetime([this_date - np.timedelta64(dt, 'h') for dt in dtim]) - # tim = np.sort(np.concatenate([tim2, tim1])) - - # Round time to the o(5mins), same as step=timedelta(minutes=5) in the simulation params - tim = tim.round(freq='5min') - - # - df = pd.DataFrame( - [tim, lat, lon, np.arange(0, nfloats) + 9000000, np.full_like(lon, 0), ['VF' for l in lon], ['?' for l in lon]], - index=['date', 'latitude', 'longitude', 'wmo', 'cycle_number', 'institution_code', 'file']).T - df['date'] = pd.to_datetime(df['date']) - - return df - - -class Trajectories: - """Trajectory file manager for VFrecovery - - Examples: - --------- - T = Trajectories(traj_zarr_file) - T.n_floats - T.sim_cycles - df = T.to_index() - df = T.get_index().add_distances() - jsdata, fig, ax = T.analyse_pairwise_distances(cycle=1, show_plot=True) - """ - - def __init__(self, zfile): - self.zarr_file = zfile - self.obj = xr.open_zarr(zfile) - self._index = None - - @property - def n_floats(self): - # len(self.obj['trajectory']) - return self.obj['trajectory'].shape[0] - - @property - def sim_cycles(self): - """Return list of cycles simulated""" - cycs = np.unique(self.obj['cycle_number']) - last_obs_phase = \ - self.obj.where(self.obj['cycle_number'] == cycs[-1])['cycle_phase'].isel(trajectory=0).isel(obs=-1).values[ - np.newaxis][0] - if last_obs_phase < 3: - cycs = cycs[0:-1] - return cycs - - def __repr__(self): - summary = [""] - summary.append("Swarm size: %i floats" % self.n_floats) - start_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=0).values) - end_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=-1).values) - summary.append("Simulation length: %s, from %s to %s" % ( - pd.Timedelta(end_date - start_date, 'd'), start_date.strftime("%Y/%m/%d"), end_date.strftime("%Y/%m/%d"))) - return "\n".join(summary) - - def to_index_par(self) -> pd.DataFrame: - # Deployment loc: - deploy_lon, deploy_lat = self.obj.isel(obs=0)['lon'].values, self.obj.isel(obs=0)['lat'].values - - def worker(ds, cyc, x0, y0): - mask = np.logical_and((ds['cycle_number'] == cyc).compute(), - (ds['cycle_phase'] >= 3).compute()) - this_cyc = ds.where(mask, drop=True) - if len(this_cyc['time']) > 0: - data = { - 'date': this_cyc.isel(obs=-1)['time'].values, - 'latitude': this_cyc.isel(obs=-1)['lat'].values, - 'longitude': this_cyc.isel(obs=-1)['lon'].values, - 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, - 'cyc': cyc, - # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, - 'deploy_lon': x0, - 'deploy_lat': y0, - } - return pd.DataFrame(data) - else: - return None - - cycles = np.unique(self.obj['cycle_number']) - rows = [] - with concurrent.futures.ThreadPoolExecutor() as executor: - future_to_url = { - executor.submit( - worker, - self.obj, - cyc, - deploy_lon, - deploy_lat - ): cyc - for cyc in cycles - } - futures = concurrent.futures.as_completed(future_to_url) - for future in futures: - data = None - try: - data = future.result() - except Exception: - raise - finally: - rows.append(data) - - rows = [r for r in rows if r is not None] - df = pd.concat(rows).reset_index() - df['wmo'] = df['wmo'].astype(int) - df['cyc'] = df['cyc'].astype(int) - # df['cycle_phase'] = df['cycle_phase'].astype(int) - self._index = df - - return self._index - - def to_index(self) -> pd.DataFrame: - """Compute and return index (profile dataframe from trajectory dataset) - - Create a Profile index :class:`pandas.dataframe` with columns: [data, latitude ,longitude, wmo, cyc, deploy_lon, deploy_lat] - from a trajectory :class:`xarray.dataset`. - - There is one dataframe row for each dataset trajectory cycle. - - We use the last trajectory point of given cycle number (with cycle phase >= 3) to identify a profile location. - - If they are N trajectories simulating C cycles, there will be about a maximum of N*C rows in the dataframe. - - Returns - ------- - :class:`pandas.dataframe` - """ - if self._index is None: - - # Deployment loc: - deploy_lon, deploy_lat = self.obj.isel(obs=0)['lon'].values, self.obj.isel(obs=0)['lat'].values - - def worker(ds, cyc, x0, y0): - mask = np.logical_and((ds['cycle_number'] == cyc).compute(), - (ds['cycle_phase'] >= 3).compute()) - this_cyc = ds.where(mask, drop=True) - if len(this_cyc['time']) > 0: - data = { - 'date': this_cyc.isel(obs=-1)['time'].values, - 'latitude': this_cyc.isel(obs=-1)['lat'].values, - 'longitude': this_cyc.isel(obs=-1)['lon'].values, - 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, - 'cyc': cyc, - # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, - 'deploy_lon': x0, - 'deploy_lat': y0, - } - return pd.DataFrame(data) - else: - return None - - cycles = np.unique(self.obj['cycle_number']) - rows = [] - for cyc in cycles: - df = worker(self.obj, cyc, deploy_lon, deploy_lat) - rows.append(df) - rows = [r for r in rows if r is not None] - df = pd.concat(rows).reset_index() - df['wmo'] = df['wmo'].astype(int) - df['cyc'] = df['cyc'].astype(int) - # df['cycle_phase'] = df['cycle_phase'].astype(int) - self._index = df - - return self._index - - def get_index(self): - """Compute index and return self""" - self.to_index() - return self - - def add_distances(self, origin: None) -> pd.DataFrame: - """Compute profiles distance to some origin - - Returns - ------- - :class:`pandas.dataframe` - """ - - # Compute distance between the predicted profile and the initial profile location from the deployment plan - # We assume that virtual floats are sequentially taken from the deployment plan - # Since distances are very short, we compute a simple rectangular distance - - # Observed cycles: - # obs_cyc = np.unique(this_profile['cyc']) - - # Simulated cycles: - # sim_cyc = np.unique(this_df['cyc']) - - df = self._index - - x2, y2 = origin # real float initial position - df['distance'] = np.nan - df['rel_lon'] = np.nan - df['rel_lat'] = np.nan - df['distance_origin'] = np.nan - - def worker(row): - # Simulation profile coordinates: - x0, y0 = row['deploy_lon'], row['deploy_lat'] # virtual float initial position - x1, y1 = row['longitude'], row['latitude'] # virtual float position - - # Distance between each pair of cycles of virtual floats: - dist = np.sqrt((y1 - y0) ** 2 + (x1 - x0) ** 2) - row['distance'] = dist - - # Shift between each pair of cycles: - dx, dy = x1 - x0, y1 - y0 - # Get a relative displacement from real float initial position: - row['rel_lon'] = x2 + dx - row['rel_lat'] = y2 + dy - - # Distance between the predicted profile and the observed initial profile - dist = np.sqrt((y2 - y0) ** 2 + (x2 - x0) ** 2) - row['distance_origin'] = dist - - return row - - df = df.apply(worker, axis=1) - self._index = df - - return self._index - - def analyse_pairwise_distances(self, - cycle: int = 1, - show_plot: bool = True, - save_figure: bool = False, - workdir: str = '.', - sim_suffix = None, - this_cfg = None, - this_args: dict = None): - - def get_hist_and_peaks(this_d): - x = this_d.flatten() - x = x[~np.isnan(x)] - x = x[:, np.newaxis] - hist, bin_edges = np.histogram(x, bins=100, density=1) - # dh = np.diff(bin_edges[0:2]) - peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) - return {'pdf': hist, 'bins': bin_edges[0:-1], 'Npeaks': len(peaks)} - - # Squeeze traj file to the first predicted cycle (sim can have more than 1 cycle) - ds = self.obj.where((self.obj['cycle_number'] == cycle).compute(), drop=True) - ds = ds.compute() - - # Compute trajectories relative to the single/only real float initial position: - lon0, lat0 = self.obj.isel(obs=0)['lon'].values[0], self.obj.isel(obs=0)['lat'].values[0] - lon, lat = ds['lon'].values, ds['lat'].values - ds['lonc'] = xr.DataArray(lon - np.broadcast_to(lon[:, 0][:, np.newaxis], lon.shape) + lon0, - dims=['trajectory', 'obs']) - ds['latc'] = xr.DataArray(lat - np.broadcast_to(lat[:, 0][:, np.newaxis], lat.shape) + lat0, - dims=['trajectory', 'obs']) - - # Compute trajectory lengths: - ds['length'] = np.sqrt(ds.diff(dim='obs')['lon'] ** 2 + ds.diff(dim='obs')['lat'] ** 2).sum(dim='obs') - ds['lengthc'] = np.sqrt(ds.diff(dim='obs')['lonc'] ** 2 + ds.diff(dim='obs')['latc'] ** 2).sum(dim='obs') - - # Compute initial points pairwise distances, PDF and nb of peaks: - X = ds.isel(obs=0) - X = X.isel(trajectory=~np.isnan(X['lon'])) - X0 = np.array((X['lon'].values, X['lat'].values)).T - d0 = pairwise_distances(X0, n_jobs=-1) - d0 = np.triu(d0) - d0[d0 == 0] = np.nan - - x0 = d0.flatten() - x0 = x0[~np.isnan(x0)] - x0 = x0[:, np.newaxis] - - hist0, bin_edges0 = np.histogram(x0, bins=100, density=1) - dh0 = np.diff(bin_edges0[0:2]) - peaks0, _ = find_peaks(hist0 / np.max(hist0), height=.4, distance=20) - - # Compute final points pairwise distances, PDF and nb of peaks: - X = ds.isel(obs=-1) - X = X.isel(trajectory=~np.isnan(X['lon'])) - dsf = X - X = np.array((X['lon'].values, X['lat'].values)).T - d = pairwise_distances(X, n_jobs=-1) - d = np.triu(d) - d[d == 0] = np.nan - - x = d.flatten() - x = x[~np.isnan(x)] - x = x[:, np.newaxis] - - hist, bin_edges = np.histogram(x, bins=100, density=1) - dh = np.diff(bin_edges[0:2]) - peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) - - # Compute final points pairwise distances (relative traj), PDF and nb of peaks: - X1 = ds.isel(obs=-1) - X1 = X1.isel(trajectory=~np.isnan(X1['lonc'])) - dsfc = X1 - X1 = np.array((X1['lonc'].values, X1['latc'].values)).T - d1 = pairwise_distances(X1, n_jobs=-1) - d1 = np.triu(d1) - d1[d1 == 0] = np.nan - - x1 = d1.flatten() - x1 = x1[~np.isnan(x1)] - x1 = x1[:, np.newaxis] - - hist1, bin_edges1 = np.histogram(x1, bins=100, density=1) - dh1 = np.diff(bin_edges1[0:2]) - peaks1, _ = find_peaks(hist1 / np.max(hist1), height=.4, distance=20) - - # Compute the overlapping between the initial and relative state PDFs: - bin_unif = np.arange(0, np.max([bin_edges0, bin_edges1]), np.min([dh0, dh1])) - dh_unif = np.diff(bin_unif[0:2]) - hist0_unif = np.interp(bin_unif, bin_edges0[0:-1], hist0) - hist_unif = np.interp(bin_unif, bin_edges[0:-1], hist) - hist1_unif = np.interp(bin_unif, bin_edges1[0:-1], hist1) - - # Area under hist1 AND hist0: - # overlapping = np.sum(hist1_unif[hist0_unif >= hist1_unif]*dh_unif) - overlapping = np.sum(hist_unif[hist0_unif >= hist_unif] * dh_unif) - - # Ratio of the max PDF ranges: - # staggering = np.max(bin_edges1)/np.max(bin_edges0) - staggering = np.max(bin_edges) / np.max(bin_edges0) - - # Store metrics in a dict: - prediction_metrics = {} - - prediction_metrics['trajectory_lengths'] = {'median': np.nanmedian(ds['length'].values), - 'std': np.nanstd(ds['length'].values)} - - prediction_metrics['pairwise_distances'] = { - 'initial_state': {'median': np.nanmedian(d0), 'std': np.nanstd(d0), 'nPDFpeaks': len(peaks0)}, - 'final_state': {'median': np.nanmedian(d), 'std': np.nanstd(d), 'nPDFpeaks': len(peaks)}, - 'relative_state': {'median': np.nanmedian(d1), 'std': np.nanstd(d1), 'nPDFpeaks': len(peaks1)}, - 'overlapping': {'value': overlapping, - 'comment': 'Overlapping area between PDF(initial_state) and PDF(final_state)'}, - 'staggering': {'value': staggering, 'comment': 'Ratio of PDF(initial_state) vs PDF(final_state) ranges'}, - 'score': {'value': overlapping / len(peaks), 'comment': 'overlapping/nPDFpeaks(final_state)'}} - - if np.isinf(overlapping / len(peaks)): - raise ValueError("Can't compute the prediction score, infinity !") - - ratio = prediction_metrics['pairwise_distances']['final_state']['std'] / \ - prediction_metrics['pairwise_distances']['initial_state']['std'] - prediction_metrics['pairwise_distances']['std_ratio'] = ratio - - # Figure: - if show_plot: - backend = matplotlib.get_backend() - if this_args is not None and this_args.json: - matplotlib.use('Agg') - - fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(18, 10), dpi=90) - ax, ix = ax.flatten(), -1 - cmap = plt.cm.coolwarm - - ix += 1 - dd = dsf['length'].values - ax[ix].plot(X0[:, 0], X0[:, 1], '.', markersize=3, color='grey', alpha=0.5, markeredgecolor=None, zorder=0) - ax[ix].scatter(X[:, 0], X[:, 1], c=dd, zorder=10, s=3, cmap=cmap) - ax[ix].grid() - this_traj = int(dsf.isel(trajectory=np.argmax(dd))['trajectory'].values[np.newaxis][0]) - ax[ix].plot(ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lon'], - ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lat'], 'r', - zorder=13, label='Longest traj.') - this_traj = int(dsf.isel(trajectory=np.argmin(dd))['trajectory'].values[np.newaxis][0]) - ax[ix].plot(ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lon'], - ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lat'], 'b', - zorder=13, label='Shortest traj.') - ax[ix].legend() - ax[ix].set_title('Trajectory lengths') - - ix += 1 - ax[ix].plot(bin_edges0[0:-1], hist0, label='Initial (%i peak)' % len(peaks0), color='gray') - ax[ix].plot(bin_edges[0:-1], hist, label='Final (%i peak)' % len(peaks), color='lightblue') - ax[ix].plot(bin_edges[peaks], hist[peaks], "x", label='Peaks') - ax[ix].legend() - ax[ix].grid() - ax[ix].set_xlabel('Pairwise distance [degree]') - line1 = "Staggering: %0.4f" % staggering - line2 = "Overlapping: %0.4f" % overlapping - line3 = "Score: %0.4f" % (overlapping / len(peaks)) - ax[ix].set_title("Pairwise distances PDF: [%s / %s / %s]" % (line1, line2, line3)) - - if this_args is not None: - line0 = "VirtualFleet recovery swarm simulation for WMO %i, starting from cycle %i, predicting cycle %i\n%s" % \ - (this_args.wmo, this_args.cyc[0] - 1, this_args.cyc[0], get_cfg_str(this_cfg)) - line1 = "Simulation made with %s and %i virtual floats" % (this_args.velocity, this_args.nfloats) - else: - line0 = "VirtualFleet recovery swarm simulation for cycle %i" % cycle - line1 = "Simulation made with %i virtual floats" % (self.n_floats) - - fig.suptitle("%s\n%s" % (line0, line1), fontsize=15) - plt.tight_layout() - - if save_figure: - if sim_suffix is not None: - filename = 'vfrecov_metrics01_%s_cyc%i' % (sim_suffix, cycle) - else: - filename = 'vfrecov_metrics01_cyc%i' % (cycle) - save_figurefile(fig, filename, workdir) - - if this_args is not None and this_args.json: - matplotlib.use(backend) - - if show_plot: - return prediction_metrics, fig, ax - else: - return prediction_metrics - - -class SimPredictor_0: - """ - - Examples - -------- - T = Trajectories(traj_zarr_file) - df = T.get_index().add_distances() - - SP = SimPredictor(df) - SP.fit_predict() - SP.add_metrics(VFvelocity) - SP.bbox() - SP.plot_predictions(VFvelocity) - SP.plan - SP.n_cycles - SP.trajectory - SP.prediction - """ - - def __init__(self, df_sim: pd.DataFrame, df_obs: pd.DataFrame): - self.swarm = df_sim - self.obs = df_obs - # self.set_weights() - self.WMO = np.unique(df_obs['wmo'])[0] - self._json = None - - def __repr__(self): - summary = [""] - summary.append("Simulation target: %i / %i" % (self.WMO, self.sim_cycles[0])) - summary.append("Swarm size: %i floats" % len(np.unique(self.swarm['wmo']))) - summary.append("Number of simulated cycles: %i profile(s) for cycle number(s): [%s]" % ( - self.n_cycles, ",".join([str(c) for c in self.sim_cycles]))) - summary.append("Observed reference: %i profile(s) for cycle number(s): [%s]" % ( - self.obs.shape[0], ",".join([str(c) for c in self.obs_cycles]))) - return "\n".join(summary) - - @property - def n_cycles(self): - """Number of simulated cycles""" - return len(np.unique(self.swarm['cyc'])) - # return len(self.sim_cycles) - - @property - def obs_cycles(self): - """Observed cycle numbers""" - return np.unique(self.obs['cyc']) - - @property - def sim_cycles(self): - """Simulated cycle numbers""" - return self.obs_cycles[0] + 1 + range(self.n_cycles) - - @property - def plan(self) -> pd.DataFrame: - if not hasattr(self, '_plan'): - df_plan = self.swarm[self.swarm['cyc'] == 1][['date', 'deploy_lon', 'deploy_lat']] - df_plan = df_plan.rename(columns={'deploy_lon': 'longitude', 'deploy_lat': 'latitude'}) - self._plan = df_plan - return self._plan - - @property - def trajectory(self): - """Return the predicted trajectory as a simple :class:`np.array` - - First row is longitude, 2nd is latitude and 3rd is date of simulated profiles - - Return - ------ - :class:`np.array` - - """ - if self._json is None: - raise ValueError("Please call `fit_predict` first") - - traj_prediction = np.array([self.obs['longitude'].values[0], - self.obs['latitude'].values[0], - self.obs['date'].values[0]])[ - np.newaxis] # Starting point where swarm was deployed - for cyc in self._json['predictions'].keys(): - xpred = self._json['predictions'][cyc]['location']['longitude'] - ypred = self._json['predictions'][cyc]['location']['latitude'] - tpred = pd.to_datetime(self._json['predictions'][cyc]['location']['time']) - traj_prediction = np.concatenate((traj_prediction, - np.array([xpred, ypred, tpred])[np.newaxis]), - axis=0) - return traj_prediction - - @property - def predictions(self): - if self._json is None: - raise ValueError("Please call `fit_predict` first") - return self._json - - def bbox(self, s: float = 1) -> list: - """Get a bounding box for maps - - Parameters - ---------- - s: float, default:1 - - Returns - ------- - list - """ - df_sim = self.swarm - df_obs = self.obs - - box = [np.min([df_sim['deploy_lon'].min(), - df_sim['longitude'].min(), - df_sim['rel_lon'].min(), - df_obs['longitude'].min()]), - np.max([df_sim['deploy_lon'].max(), - df_sim['longitude'].max(), - df_sim['rel_lon'].max(), - df_obs['longitude'].max()]), - np.min([df_sim['deploy_lat'].min(), - df_sim['latitude'].min(), - df_sim['rel_lat'].min(), - df_obs['latitude'].min()]), - np.max([df_sim['deploy_lat'].max(), - df_sim['latitude'].max(), - df_sim['rel_lat'].max(), - df_obs['latitude'].max()])] - rx, ry = box[1] - box[0], box[3] - box[2] - r = np.min([rx, ry]) - ebox = [box[0] - s * r, box[1] + s * r, box[2] - s * r, box[3] + s * r] - - return ebox - -class SimPredictor_1(SimPredictor_0): - - def set_weights(self, scale: float = 20): - """Compute weights for predictions - - Add weights column to swarm :class:`pandas.DataFrame` as a gaussian distance - with a std based on the size of the deployment domain - - Parameters - ---------- - scale: float (default=20.) - """ - rx, ry = self.plan['longitude'].max() - self.plan['longitude'].min(), \ - self.plan['latitude'].max() - self.plan['latitude'].min() - r = np.min([rx, ry]) # Minimal size of the deployment domain - weights = np.exp(-(self.swarm['distance_origin'] ** 2) / (r / scale)) - weights[np.isnan(weights)] = 0 - self.swarm['weights'] = weights - return self - - def fit_predict(self, weights_scale: float = 20.) -> dict: - """Predict profile positions from simulated float swarm - - Prediction is based on a :class:`klearn.neighbors._kde.KernelDensity` estimate of the N_FLOATS - simulated, weighted by their deployment distance to the observed previous cycle position. - - Parameters - ---------- - weights_scale: float (default=20) - Scale (in deg) to use to weight the deployment distance to the observed previous cycle position - - Returns - ------- - dict - """ - - def blank_prediction() -> dict: - return {'location': { - 'longitude': None, - 'latitude': None, - 'time': None}, - 'cycle_number': None, - 'wmo': int(self.WMO), - } - - # Compute weights of the swarm float profiles locations - self.set_weights(scale=weights_scale) - - self._prediction_data = {'weights_scale': weights_scale, 'cyc': {}} - - cycles = np.unique(self.swarm['cyc']).astype(int) # 1, 2, ... - recovery_predictions = {} - for icyc, this_sim_cyc in enumerate(cycles): - this_cyc_df = self.swarm[self.swarm['cyc'] == this_sim_cyc] - weights = this_cyc_df['weights'] - x, y = this_cyc_df['rel_lon'], this_cyc_df['rel_lat'] - - w = weights / np.max(np.abs(weights), axis=0) - X = np.array([x, y]).T - kde = KernelDensity(kernel='gaussian', bandwidth=0.15).fit(X, sample_weight=w) - - xg, yg = (np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 100), - np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), 100)) - xg, yg = np.meshgrid(xg, yg) - Xg = np.array([xg.flatten(), yg.flatten(), ]).T - llh = kde.score_samples(Xg) - xpred = Xg[np.argmax(llh), 0] - ypred = Xg[np.argmax(llh), 1] - tpred = this_cyc_df['date'].mean() - - # Store results - recovery = blank_prediction() - recovery['location']['longitude'] = xpred - recovery['location']['latitude'] = ypred - recovery['location']['time'] = tpred.isoformat() - recovery['cycle_number'] = int(self.sim_cycles[icyc]) - recovery['virtual_cycle_number'] = int(self.sim_cycles[icyc]) - recovery_predictions.update({int(this_sim_cyc): recovery}) - - # - self._prediction_data['cyc'].update({this_sim_cyc: {'weights': this_cyc_df['weights']}}) - - # Store results internally - self._json = {'predictions': recovery_predictions} - - # Add more stuff to internal storage: - self._predict_errors() - self._add_ref() - self.add_metrics() - - # - return self - - -class SimPredictor_2(SimPredictor_1): - - def _predict_errors(self) -> dict: - """Compute error metrics for the predicted positions - - This is for past cycles, for which we have observed positions of the predicted profiles - - This adds more keys to self._json['predictions'] created by the fit_predict method - - Returns - ------- - dict - """ - - def blank_error(): - return {'distance': {'value': None, - 'unit': 'km'}, - 'bearing': {'value': None, - 'unit': 'degree'}, - 'time': {'value': None, - 'unit': 'hour'} - } - - cyc0 = self.obs_cycles[0] - if self._json is None: - raise ValueError("Please call `fit_predict` first") - recovery_predictions = self._json['predictions'] - - for sim_c in recovery_predictions.keys(): - this_prediction = recovery_predictions[sim_c] - if sim_c + cyc0 in self.obs_cycles: - error = blank_error() - - this_obs_profile = self.obs[self.obs['cyc'] == sim_c + cyc0] - xobs = this_obs_profile['longitude'].iloc[0] - yobs = this_obs_profile['latitude'].iloc[0] - tobs = this_obs_profile['date'].iloc[0] - - prev_obs_profile = self.obs[self.obs['cyc'] == sim_c + cyc0 - 1] - xobs0 = prev_obs_profile['longitude'].iloc[0] - yobs0 = prev_obs_profile['latitude'].iloc[0] - - xpred = this_prediction['location']['longitude'] - ypred = this_prediction['location']['latitude'] - tpred = pd.to_datetime(this_prediction['location']['time']) - - dd = haversine(xobs, yobs, xpred, ypred) - error['distance']['value'] = dd - - observed_bearing = bearing(xobs0, yobs0, xobs, yobs) - sim_bearing = bearing(xobs0, yobs0, xpred, ypred) - error['bearing']['value'] = sim_bearing - observed_bearing - - dt = pd.Timedelta(tpred - tobs) / np.timedelta64(1, 's') - # print(tpred, tobs, pd.Timedelta(tpred - tobs)) - error['time']['value'] = dt / 3600 # From seconds to hours - - this_prediction['location_error'] = error - recovery_predictions.update({sim_c: this_prediction}) - - self._json.update({'predictions': recovery_predictions}) - return self - - def _add_ref(self): - """Add observations data to internal data structure - - This adds more keys to self._json['predictions'] created by the fit_predict method - - """ - if self._json is None: - raise ValueError("Please call `predict` first") - - # Observed profiles that were simulated: - profiles_to_predict = [] - for cyc in self.sim_cycles: - this = {'wmo': int(self.WMO), - 'cycle_number': int(cyc), - 'url_float': argoplot.dashboard(self.WMO, url_only=True), - 'url_profile': "", - 'location': {'longitude': None, - 'latitude': None, - 'time': None} - } - if cyc in self.obs_cycles: - this['url_profile'] = get_ea_profile_page_url(self.WMO, cyc) - this_df = self.obs[self.obs['cyc'] == cyc] - this['location']['longitude'] = this_df['longitude'].iloc[0] - this['location']['latitude'] = this_df['latitude'].iloc[0] - this['location']['time'] = this_df['date'].iloc[0].isoformat() - profiles_to_predict.append(this) - - self._json.update({'observations': profiles_to_predict}) - - # Observed profile used as initial conditions to the simulation: - cyc = self.obs_cycles[0] - this_df = self.obs[self.obs['cyc'] == cyc] - self._json.update({'initial_profile': {'wmo': int(self.WMO), - 'cycle_number': int(cyc), - 'url_float': argoplot.dashboard(self.WMO, url_only=True), - 'url_profile': get_ea_profile_page_url(self.WMO, cyc), - 'location': {'longitude': this_df['longitude'].iloc[0], - 'latitude': this_df['latitude'].iloc[0], - 'time': this_df['date'].iloc[0].isoformat() - } - }}) - - # - return self - - def add_metrics(self, VFvel=None): - """Compute more metrics to understand the prediction error - - 1. Compute a transit time to cover the distance error - (assume a 12 kts boat speed with 1 kt = 1.852 km/h) - - 1. Compute the possible drift due to the time lag between the predicted profile timing and the expected one - - This adds more keys to self._json['predictions'] created by the fit_predict method - - """ - cyc0 = self.obs_cycles[0] - if self._json is None: - raise ValueError("Please call `predict` first") - recovery_predictions = self._json['predictions'] - - for sim_c in recovery_predictions.keys(): - this_prediction = recovery_predictions[sim_c] - if sim_c + cyc0 in self.obs_cycles and 'location_error' in this_prediction.keys(): - - error = this_prediction['location_error'] - metrics = {} - - # Compute a transit time to cover the distance error: - metrics['transit'] = {'value': None, - 'unit': 'hour', - 'comment': 'Transit time to cover the distance error ' - '(assume a 12 kts boat speed with 1 kt = 1.852 km/h)'} - - if error['distance']['value'] is not None: - metrics['transit']['value'] = pd.Timedelta(error['distance']['value'] / (12 * 1.852), - 'h').seconds / 3600. - - # Compute the possible drift due to the time lag between the predicted profile timing and the expected one: - if VFvel is not None: - xpred = this_prediction['location']['longitude'] - ypred = this_prediction['location']['latitude'] - tpred = this_prediction['location']['time'] - dsc = VFvel.field.interp( - {VFvel.dim['lon']: xpred, - VFvel.dim['lat']: ypred, - VFvel.dim['time']: tpred, - VFvel.dim['depth']: - VFvel.field[{VFvel.dim['depth']: 0}][VFvel.dim['depth']].values[np.newaxis][0]} - ) - velc = np.sqrt(dsc[VFvel.var['U']] ** 2 + dsc[VFvel.var['V']] ** 2).values[np.newaxis][0] - metrics['surface_drift'] = {'value': None, - 'unit': 'km', - 'surface_currents_speed': None, - 'surface_currents_speed_unit': 'm/s', - 'comment': 'Drift by surface currents due to the float ascent time error ' - '(difference between simulated profile time and the observed one).'} - if error['time']['value'] is not None: - metrics['surface_drift']['value'] = (error['time']['value'] * 3600 * velc / 1e3) - metrics['surface_drift']['surface_currents_speed'] = velc - - # - this_prediction['metrics'] = metrics - recovery_predictions.update({sim_c: this_prediction}) - - self._json.update({"predictions": recovery_predictions}) - return self - - -class SimPredictor_3(SimPredictor_2): - - def plot_predictions(self, - VFvel, - cfg, - sim_suffix='', # get_sim_suffix(this_args, cfg) - s=0.2, - alpha=False, - save_figure=False, - workdir='.', - figsize=None, - dpi=120, - orient='portrait'): - ebox = self.bbox(s=s) - pred_traj = self.trajectory - - if orient == 'portrait': - if self.n_cycles == 1: - nrows, ncols = 2, 1 - if figsize is None: - figsize = (5, 5) - else: - nrows, ncols = self.n_cycles, 2 - if figsize is None: - figsize = (5, (self.n_cycles-1)*5) - else: - if self.n_cycles == 1: - nrows, ncols = 1, 2 - else: - nrows, ncols = 2, self.n_cycles - if figsize is None: - figsize = (ncols*5, 5) - - def plot_this(this_ax, i_cycle, ip): - df_sim = self.swarm[self.swarm['cyc'] == i_cycle + 1] - weights = self._prediction_data['cyc'][i_cycle + 1]['weights'].values - if self.sim_cycles[i_cycle] in self.obs_cycles: - this_profile = self.obs[self.obs['cyc'] == self.sim_cycles[i_cycle]] - else: - this_profile = None - - xpred = self.predictions['predictions'][i_cycle + 1]['location']['longitude'] - ypred = self.predictions['predictions'][i_cycle + 1]['location']['latitude'] - - this_ax.set_extent(ebox) - this_ax = map_add_features(ax[ix]) - - v = VFvel.field.isel(time=0).interp(depth=cfg.mission['parking_depth']) - v.plot.quiver(x="longitude", - y="latitude", - u=VFvel.var['U'], - v=VFvel.var['V'], - ax=this_ax, - color='grey', - alpha=0.5, - scale=5, - add_guide=False) - - this_ax.plot(df_sim['deploy_lon'], df_sim['deploy_lat'], '.', - markersize=3, - color='grey', - alpha=0.1, - markeredgecolor=None, - zorder=0) - - this_ax.plot(pred_traj[:, 0], pred_traj[:, 1], color='k', linewidth=1, marker='+') - this_ax.plot(xpred, ypred, color='g', marker='+') - - w = weights / np.max(np.abs(weights), axis=0) - ii = np.argsort(w) - cmap = plt.cm.cool - # cmap = plt.cm.Reds - - if ip == 0: - x, y = df_sim['deploy_lon'], df_sim['deploy_lat'] - title = 'Initial virtual float positions' - if not alpha: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - else: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - alpha=w[ii], - marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - elif ip == 1: - x, y = df_sim['longitude'], df_sim['latitude'] - title = 'Final virtual float positions' - if not alpha: - this_ax.scatter(x, y, c=w, marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - else: - this_ax.scatter(x, y, c=w, marker='o', s=4, alpha=w, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - elif ip == 2: - x, y = df_sim['rel_lon'], df_sim['rel_lat'] - title = 'Final virtual floats positions relative to observed float' - if not alpha: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) - else: - this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], - marker='o', s=4, alpha=w[ii], edgecolor=None, vmin=0, vmax=1, cmap=cmap) - - # Display full trajectory prediction: - if ip != 0 and this_profile is not None: - this_ax.arrow(this_profile['longitude'].iloc[0], - this_profile['latitude'].iloc[0], - xpred - this_profile['longitude'].iloc[0], - ypred - this_profile['latitude'].iloc[0], - length_includes_head=True, fc='k', ec='c', head_width=0.025, zorder=10) - this_ax.plot(xpred, ypred, 'k+', zorder=10) - - this_ax.set_title("") - # this_ax.set_ylabel("Cycle %i predictions" % (i_cycle+1)) - this_ax.set_title("%s\nCycle %i predictions" % (title, self.sim_cycles[i_cycle]), fontsize=6) - - fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, dpi=dpi, - subplot_kw={'projection': ccrs.PlateCarree()}, - sharex=True, sharey=True) - ax, ix = ax.flatten(), -1 - - if orient == 'portrait': - rows = range(self.n_cycles) - cols = [1, 2] - else: - rows = [1, 2] - cols = range(self.n_cycles) - - if orient == 'portrait': - for i_cycle in rows: - for ip in cols: - ix += 1 - plot_this(ax[ix], i_cycle, ip) - else: - for ip in rows: - for i_cycle in cols: - ix += 1 - plot_this(ax[ix], i_cycle, ip) - - # log.debug("Start to write metrics string") - # - # xpred = SP.prediction[i_cycle + 1]['location']['longitude']['value'] - # - # err = recovery['prediction_location_error'] - # met = recovery['prediction_metrics'] - # if this_profile.shape[0] > 1: - # # err_str = "Prediction vs Truth: [%0.2fkm, $%0.2f^o$]" % (err['distance'], err['bearing']) - # err_str = "Prediction errors: [dist=%0.2f%s, bearing=$%0.2f^o$, time=%s]\n" \ - # "Distance error represents %s of transit at 12kt" % (err['distance']['value'], - # err['distance']['unit'], - # err['bearing']['value'], - # strfdelta(pd.Timedelta(err['time']['value'], 'h'), - # "{hours}H{minutes:02d}"), - # strfdelta(pd.Timedelta(met['transit']['value'], 'h'), - # "{hours}H{minutes:02d}")) - # else: - # err_str = "" - # - # fig.suptitle("VirtualFleet recovery prediction for WMO %i: \ - # starting from cycle %i, predicting cycle %i\n%s\n%s\n%s" % - # (wmo, cyc[0], cyc[1], get_cfg_str(cfg), err_str, "Prediction based on %s" % vel_name), fontsize=15) - - plt.tight_layout() - if save_figure: - save_figurefile(fig, 'vfrecov_predictions_%s' % sim_suffix, workdir) - - return fig, ax - - -class SimPredictor(SimPredictor_3): - - def to_json(self, fp=None): - kw = {'indent': 4, 'sort_keys': True, 'default': str} - if fp is not None: - if hasattr(fp, 'write'): - json.dump(self._json, fp, **kw) - else: - with open(fp, 'w') as f: - json.dump(self._json, f, **kw) - else: - results_js = json.dumps(self._json, **kw) - return results_js - - -def get_ea_profile_page_url(wmo, cyc): - try: - url = argoplot.dashboard(wmo, cyc, url_only=True) - except: - log.info("EA dashboard page not available for this profile: %i/%i" % (wmo, cyc)) - url = "404" - return url - - def setup_args(): icons_help_string = """This script can be used to make prediction of a specific float cycle position. This script can be used on past or unknown float cycles. @@ -1755,272 +82,6 @@ def setup_args(): return parser -def get_sim_suffix(this_args, this_cfg): - """Compose a string suffix for output files""" - # suf = '%s_%i' % (this_args.velocity, this_args.nfloats) - suf = 'VEL%s_NF%i_CYCDUR%i_PARKD%i_PROFD%i_SFD%i' % (this_args.velocity, - this_args.nfloats, - int(this_cfg.mission['cycle_duration']), - int(this_cfg.mission['parking_depth']), - int(this_cfg.mission['profile_depth']), - int(this_cfg.mission['reco_free_surface_drift'])) - return suf - - -def predictor(args): - """Prediction manager""" - execution_start = time.time() - process_start = time.process_time() - - if is_wmo(args.wmo): - WMO = args.wmo - if is_cyc(args.cyc): - CYC = [check_cyc(args.cyc)[0]-1] - [CYC.append(c) for c in check_cyc(args.cyc)] - if args.velocity not in ['ARMOR3D', 'GLORYS']: - raise ValueError("Velocity field must be one in: ['ARMOR3D', 'GLORYS']") - else: - VEL_NAME = args.velocity.upper() - - puts('CYC = %s' % CYC, color=COLORS.magenta) - # raise ValueError('stophere') - - if args.save_figure: - mplbackend = matplotlib.get_backend() - matplotlib.use('Agg') - - # Where do we find the VirtualFleet repository ? - if not args.vf: - if os.uname()[1] == 'data-app-virtualfleet-recovery': - euroargodev = os.path.expanduser('/home/ubuntu') - else: - euroargodev = os.path.expanduser('~/git/github/euroargodev') - else: - euroargodev = os.path.abspath(args.vf) - if not os.path.exists(os.path.join(euroargodev, "VirtualFleet")): - raise ValueError("VirtualFleet can't be found at '%s'" % euroargodev) - - # Import the VirtualFleet library - sys.path.insert(0, os.path.join(euroargodev, "VirtualFleet")) - from virtualargofleet import Velocity, VirtualFleet, FloatConfiguration, ConfigParam - # from virtualargofleet.app_parcels import ArgoParticle - - # Set up the working directory: - if not args.output: - WORKDIR = os.path.sep.join([get_package_dir(), "webapi", "myapp", "static", "data", str(WMO), str(CYC[1])]) - else: - WORKDIR = os.path.sep.join([args.output, str(WMO), str(CYC[1])]) - WORKDIR = os.path.abspath(WORKDIR) - if not os.path.exists(WORKDIR): - os.makedirs(WORKDIR) - args.output = WORKDIR - - if not args.json: - puts("\nData will be saved in:") - puts("\t%s" % WORKDIR, color=COLORS.green) - - # Set-up logger - logging.basicConfig( - level=logging.DEBUG, - format=DEBUGFORMATTER, - datefmt='%m/%d/%Y %I:%M:%S %p', - handlers=[logging.FileHandler(os.path.join(WORKDIR, "vfpred.log"), mode='a')] - ) - - # Load these profiles' information: - if not args.json: - puts("\nYou can check this float dashboard while we prepare the prediction:") - puts("\t%s" % argoplot.dashboard(WMO, url_only=True), color=COLORS.green) - puts("\nLoading float profiles index ...") - host = "https://data-argo.ifremer.fr" - # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" - # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" - THIS_PROFILE = store(host=host).search_wmo_cyc(WMO, CYC).to_dataframe() - THIS_DATE = pd.to_datetime(THIS_PROFILE['date'].values[0], utc=True) - CENTER = [THIS_PROFILE['longitude'].values[0], THIS_PROFILE['latitude'].values[0]] - if not args.json: - puts("\nProfiles to work with:") - puts(THIS_PROFILE.to_string(max_colwidth=15), color=COLORS.green) - if THIS_PROFILE.shape[0] == 1: - puts('\nReal-case scenario: True position unknown !', color=COLORS.yellow) - else: - puts('\nEvaluation scenario: historical position known', color=COLORS.yellow) - - # Load real float configuration at the previous cycle: - if not args.json: - puts("\nLoading float configuration...") - try: - CFG = FloatConfiguration([WMO, CYC[0]]) - except: - if not args.json: - puts("Can't load this profile config, falling back on default values", color=COLORS.red) - CFG = FloatConfiguration('default') - - if args.cfg_parking_depth is not None: - puts("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], - float(args.cfg_parking_depth))) - CFG.update('parking_depth', float(args.cfg_parking_depth)) - - if args.cfg_cycle_duration is not None: - puts("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], - float(args.cfg_cycle_duration))) - CFG.update('cycle_duration', float(args.cfg_cycle_duration)) - - if args.cfg_profile_depth is not None: - puts("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], - float(args.cfg_profile_depth))) - CFG.update('profile_depth', float(args.cfg_profile_depth)) - - CFG.params = ConfigParam(key='reco_free_surface_drift', - value=int(args.cfg_free_surface_drift), - unit='cycle', - description='First cycle with free surface drift', - dtype=int) - - # Save virtual float configuration on file: - CFG.to_json(os.path.join(WORKDIR, "floats_configuration_%s.json" % get_sim_suffix(args, CFG))) - - if not args.json: - puts("\n".join(["\t%s" % line for line in CFG.__repr__().split("\n")]), color=COLORS.green) - - # Get the cycling frequency (in days, this is more a period then...): - CYCLING_FREQUENCY = int(np.round(CFG.mission['cycle_duration']/24)) - - # Define domain to load velocity for, and get it: - width = args.domain_size + np.abs(np.ceil(THIS_PROFILE['longitude'].values[-1] - CENTER[0])) - height = args.domain_size + np.abs(np.ceil(THIS_PROFILE['latitude'].values[-1] - CENTER[1])) - VBOX = [CENTER[0] - width / 2, CENTER[0] + width / 2, CENTER[1] - height / 2, CENTER[1] + height / 2] - N_DAYS = (len(CYC)-1)*CYCLING_FREQUENCY+1 - if not args.json: - puts("\nLoading %s velocity field to cover %i days..." % (VEL_NAME, N_DAYS)) - ds_vel, velocity_file = get_velocity_field(VBOX, THIS_DATE, - n_days=N_DAYS, - output=WORKDIR, - dataset=VEL_NAME) - VEL = Velocity(model='GLORYS12V1' if VEL_NAME == 'GLORYS' else VEL_NAME, src=ds_vel) - if not args.json: - puts("\n\t%s" % str(ds_vel), color=COLORS.green) - puts("\n\tLoaded velocity field from %s to %s" % - (pd.to_datetime(ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), - pd.to_datetime(ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")), color=COLORS.green) - figure_velocity(VBOX, VEL, VEL_NAME, THIS_PROFILE, WMO, CYC, save_figure=args.save_figure, workdir=WORKDIR) - - # raise ValueError('stophere') - - # VirtualFleet, get a deployment plan: - if not args.json: - puts("\nVirtualFleet, get a deployment plan...") - DF_PLAN = setup_deployment_plan(CENTER, THIS_DATE, nfloats=args.nfloats) - PLAN = {'lon': DF_PLAN['longitude'], - 'lat': DF_PLAN['latitude'], - 'time': np.array([np.datetime64(t) for t in DF_PLAN['date'].dt.strftime('%Y-%m-%d %H:%M').array]), - } - if not args.json: - puts("\t%i virtual floats to deploy" % DF_PLAN.shape[0], color=COLORS.green) - - # Set up VirtualFleet: - if not args.json: - puts("\nVirtualFleet, set-up the fleet...") - VFleet = VirtualFleet(plan=PLAN, - fieldset=VEL, - mission=CFG) - - # VirtualFleet, execute the simulation: - if not args.json: - puts("\nVirtualFleet, execute the simulation...") - - # Remove traj file if exists: - output_path = os.path.join(WORKDIR, 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) - # if os.path.exists(output_path): - # shutil.rmtree(output_path) - # - # VFleet.simulate(duration=timedelta(hours=N_DAYS*24+1), - # step=timedelta(minutes=5), - # record=timedelta(minutes=30), - # output=True, - # output_folder=WORKDIR, - # output_file='trajectories_%s.zarr' % get_sim_suffix(args, CFG), - # verbose_progress=not args.json, - # ) - - # VirtualFleet, get simulated profiles index: - if not args.json: - puts("\nExtract swarm profiles index...") - - T = Trajectories(WORKDIR + "/" + 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) - DF_SIM = T.get_index().add_distances(origin=[THIS_PROFILE['longitude'].values[0], THIS_PROFILE['latitude'].values[0]]) - if not args.json: - puts(str(T), color=COLORS.magenta) - puts(DF_SIM.head().to_string(), color=COLORS.green) - figure_positions(args, VEL, DF_SIM, DF_PLAN, THIS_PROFILE, CFG, WMO, CYC, VEL_NAME, - dd=1, save_figure=args.save_figure, workdir=WORKDIR) - - # Recovery, make predictions based on simulated profile density: - SP = SimPredictor(DF_SIM, THIS_PROFILE) - if not args.json: - puts("\nPredict float cycle position(s) from swarm simulation...", color=COLORS.white) - puts(str(SP), color=COLORS.magenta) - SP.fit_predict() - SP.add_metrics(VEL) - SP.plot_predictions(VEL, - CFG, - sim_suffix=get_sim_suffix(args, CFG), - save_figure=args.save_figure, - workdir=WORKDIR, - orient='portrait') - results = SP.predictions - - # Recovery, compute more swarm metrics: - for this_cyc in T.sim_cycles: - jsmetrics, fig, ax = T.analyse_pairwise_distances(cycle=this_cyc, - save_figure=True, - this_args=args, - this_cfg=CFG, - sim_suffix=get_sim_suffix(args, CFG), - workdir=WORKDIR, - ) - if 'metrics' in results['predictions'][this_cyc]: - for key in jsmetrics.keys(): - results['predictions'][this_cyc]['metrics'].update({key: jsmetrics[key]}) - else: - results['predictions'][this_cyc].update({'metrics': jsmetrics}) - - # Recovery, finalize JSON output: - execution_end = time.time() - process_end = time.process_time() - computation = { - 'Date': pd.to_datetime('now', utc=True), - 'Wall-time': pd.Timedelta(execution_end - execution_start, 's'), - 'CPU-time': pd.Timedelta(process_end - process_start, 's'), - 'system': getSystemInfo() - } - results['meta'] = {'Velocity field': VEL_NAME, - 'Nfloats': args.nfloats, - 'Computation': computation, - 'VFloats_config': CFG.to_json(), - } - - if not args.json: - puts("\nPredictions:") - results_js = json.dumps(results, indent=4, sort_keys=True, default=str) - - with open(os.path.join(WORKDIR, 'prediction_%s.json' % get_sim_suffix(args, CFG)), 'w', encoding='utf-8') as f: - json.dump(results, f, ensure_ascii=False, indent=4, default=str, sort_keys=True) - - if not args.json: - puts(results_js, color=COLORS.green) - puts("\nCheck results at:") - puts("\t%s" % WORKDIR, color=COLORS.green) - - if args.save_figure: - plt.close('all') - # Restore Matplotlib backend - matplotlib.use(mplbackend) - - if not args.save_sim: - shutil.rmtree(output_path) - - return results_js if __name__ == '__main__': diff --git a/environment.yml b/environment.yml index fd404e0..1b38c7a 100644 --- a/environment.yml +++ b/environment.yml @@ -7,7 +7,7 @@ dependencies: - parcels>=3.0.0 - dask - distributed - - dask-kubernetes +# - dask-kubernetes - bottleneck - gcsfs - zarr @@ -57,4 +57,4 @@ dependencies: - geojson - dominate - copernicusmarine>=1.0<=2.0 - - git+https://github.com/euroargodev/VirtualFleet.git@master +# - git+https://github.com/euroargodev/VirtualFleet.git@master # better with `pip install --editable` diff --git a/schemas/VFrecovery-schema-computation.json b/schemas/VFrecovery-schema-computation.json index 5405e00..714d083 100644 --- a/schemas/VFrecovery-schema-computation.json +++ b/schemas/VFrecovery-schema-computation.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-computation.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-computation.json", "title": "VirtualFleet-Recovery Simulation Computation", "description": "A set of meta-data documenting one computation run", "format_version": { @@ -14,10 +14,6 @@ "type": ["string", "null"], "format": "date-time" }, - "system": { - "description": "System the computation was executed on", - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-system.json" - }, "cpu_time": { "description": "CPU time used by the computation", "type": ["string", "null"], @@ -28,6 +24,5 @@ "type": ["string", "null"], "format": "time-delta" } - }, - "maxProperties": 4 + } } diff --git a/schemas/VFrecovery-schema-location.json b/schemas/VFrecovery-schema-location.json index c4a3e2c..ac898dd 100644 --- a/schemas/VFrecovery-schema-location.json +++ b/schemas/VFrecovery-schema-location.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-location.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-location.json", "title": "VirtualFleet-Recovery location", "description": "A set of longitude/latitude/time coordinates on Earth", "format_version": { @@ -13,7 +13,7 @@ "type": "number", "minimum": -180, "maximum": 180, - "description": "Longitude of the geo-location, [-180-180] convention" + "description": "Longitude of the geo-location, [-180/180] convention" }, "latitude": { "type": "number", @@ -26,6 +26,5 @@ "format": "date-time", "description": "Date and time of the geo-location" } - }, - "maxProperties": 3 + } } \ No newline at end of file diff --git a/schemas/VFrecovery-schema-metadata.json b/schemas/VFrecovery-schema-metadata.json index f7cd5f0..465f62c 100644 --- a/schemas/VFrecovery-schema-metadata.json +++ b/schemas/VFrecovery-schema-metadata.json @@ -1,15 +1,15 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-metadata.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-metadata.json", "title": "VirtualFleet-Recovery Simulation Meta-data", "description": "A set of meta-data documenting one simulation", "format_version": { "const": "0.1" }, - "required": ["nfloats", "velocity_field", "vfconfig"], + "required": ["swarm_size", "velocity_field", "vfconfig"], "type": "object", "properties": { - "nfloats": { + "swarm_size": { "description": "Number of virtual floats simulated", "type": "integer" }, @@ -19,12 +19,14 @@ "enum": ["ARMOR3D", "GLORYS"] }, "vfconfig": { - "description": "Configuration of the virtual floats", + "description": "Configuration of virtual floats", "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet/json-schemas-FloatConfiguration/schemas/VF-ArgoFloat-Configuration.json" }, "computation": { - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-computation.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-computation.json" + }, + "system": { + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-system.json" } - }, - "maxProperties": 4 + } } diff --git a/schemas/VFrecovery-schema-metrics.json b/schemas/VFrecovery-schema-metrics.json index 16be612..05d9aa2 100644 --- a/schemas/VFrecovery-schema-metrics.json +++ b/schemas/VFrecovery-schema-metrics.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-metrics.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-metrics.json", "title": "VirtualFleet-Recovery Predicted profile metrics", "description": "A set of metrics to describe/interpret one VFrecovery predicted profile location", "format_version": { @@ -71,7 +71,7 @@ }, "surface_drift": { "description": "Drift by surface currents due to the float ascent time error (difference between simulated profile time and the observed one)", - "type": "object", + "type": ["object", "null"], "properties": { "surface_currents_speed": {"type": "number"}, "surface_currents_speed_unit": {"type": "string"}, @@ -81,12 +81,20 @@ }, "transit": { "description": "Transit time to cover the distance error (assume a 12 kts boat speed with 1 kt = 1.852 km/h)", - "type": "object", + "type": ["object", "null"], "properties": { "unit": {"type": "string"}, "value": {"type": "number"} } + }, + "error": { + "description": "Error amplitude in space/time", + "type": ["object", "null"], + "properties": { + "distance": {"type": "number", "unit": "km"}, + "bearing": {"type": "number", "unit": "degree"}, + "time": {"type": ["string", "null"], "format": "time-delta"} + } } - }, - "maxProperties": 4 + } } \ No newline at end of file diff --git a/schemas/VFrecovery-schema-profile.json b/schemas/VFrecovery-schema-profile.json index 7afa9d9..fcdd2cb 100644 --- a/schemas/VFrecovery-schema-profile.json +++ b/schemas/VFrecovery-schema-profile.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-profile.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-profile.json", "title": "VirtualFleet-Recovery Argo float profile location", "description": "A set of meta-data and longitude/latitude/time coordinates on Earth, for an Argo float vertical profile location", "format_version": { @@ -11,7 +11,7 @@ "properties": { "location": { "description": "Space/time coordinates of the profile", - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-location.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-location.json" }, "cycle_number":{ "description": "Cycle number of the profile", @@ -36,10 +36,10 @@ "minimum": 0 }, "metrics": { - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-metrics.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-metrics.json" }, "dependencies": { - "virtual_cycle_number": ["metrics"]} - }, - "maxProperties": 8 + "virtual_cycle_number": ["metrics"] + } + } } diff --git a/schemas/VFrecovery-schema-simulation.json b/schemas/VFrecovery-schema-simulation.json index 3f38a2b..a51ea80 100644 --- a/schemas/VFrecovery-schema-simulation.json +++ b/schemas/VFrecovery-schema-simulation.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-simulation.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-simulation.json", "title": "VirtualFleet-Recovery Simulation", "description": "This document records details of one VirtualFleet-Recovery simulation and Argo float profile predictions", "format_version": { @@ -14,32 +14,32 @@ ], "type": "object", "properties": { - "initial_profile": { - "description": "Argo float profile used as initial conditions to the simulation", - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-profile.json" - }, "meta_data": { "description": "Meta-data of the simulation", - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-metadata.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-metadata.json" + }, + "initial_profile": { + "description": "Argo float profile used as initial conditions to the simulation", + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-profile.json", + "minItems": 1, + "uniqueItems": true }, "observations": { "description": "Data from observed Argo float profiles relevant to the simulation predictions", "type": "array", "items": { - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-profile.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-profile.json" }, - "minItems": 1, "uniqueItems": true }, "predictions": { "description": "Data from the simulated virtual float profiles", "type": "array", "items": { - "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-profile.json" + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-profile.json" }, "minItems": 1, "uniqueItems": true } - }, - "maxProperties": 4 + } } \ No newline at end of file diff --git a/schemas/VFrecovery-schema-system.json b/schemas/VFrecovery-schema-system.json index d266254..e1b4b7c 100644 --- a/schemas/VFrecovery-schema-system.json +++ b/schemas/VFrecovery-schema-system.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas/VFrecovery-schema-system.json", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-system.json", "title": "VirtualFleet-Recovery Simulation System", "description": "A set of meta-data documenting a system where a simulation is executed", "format_version": { @@ -41,6 +41,5 @@ "type":["string", "null"], "description": "" } - }, - "maxProperties": 8 + } } \ No newline at end of file diff --git a/schemas/VFrecovery-schema-trajectory.json b/schemas/VFrecovery-schema-trajectory.json new file mode 100644 index 0000000..ab2cdca --- /dev/null +++ b/schemas/VFrecovery-schema-trajectory.json @@ -0,0 +1,20 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-trajectory.json", + "title": "VirtualFleet-Recovery trajectory", + "description": "Represents two or more VirtualFleet-Recovery locations that share a relationship", + "format_version": { + "const": "0.1" + }, + "required": [ "locations" ], + "type": "object", + "properties": { + "locations": { + "type": "array", + "items": { + "$ref": "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas/VFrecovery-schema-location.json" + }, + "uniqueItems": false + } + } +} diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..0cd8f35 --- /dev/null +++ b/setup.py @@ -0,0 +1,16 @@ +from setuptools import setup, find_packages + +setup( + name='vfrecovery', + version='2.0', + packages=find_packages(), + include_package_data=True, + install_requires=[ + 'Click', + ], + entry_points={ + 'console_scripts': [ + 'vfrecovery = vfrecovery.command_line_interface.virtualfleet_recovery:base_command_line_interface', + ], + }, +) \ No newline at end of file diff --git a/vfrecovery/__init__.py b/vfrecovery/__init__.py index 1f36d3d..ea6c165 100644 --- a/vfrecovery/__init__.py +++ b/vfrecovery/__init__.py @@ -1,2 +1,22 @@ -# from importlib.metadata import version -# __version__ = version("vfrecovery") +import json +import logging.config +import time +import pathlib +from importlib.metadata import version + + +__version__ = version("vfrecovery") + +log_configuration_dict = json.load( + open( + pathlib.Path( + pathlib.Path(__file__).parent, "logging_conf.json" + ) + ) +) +logging.config.dictConfig(log_configuration_dict) +logging.Formatter.converter = time.gmtime + +from vfrecovery.python_interface.predict import predict +from vfrecovery.downloaders import Armor3d, Glorys +from vfrecovery.core.db import DB diff --git a/vfrecovery/command_line_interface/__init__.py b/vfrecovery/command_line_interface/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vfrecovery/command_line_interface/group_db.py b/vfrecovery/command_line_interface/group_db.py new file mode 100644 index 0000000..051aa6c --- /dev/null +++ b/vfrecovery/command_line_interface/group_db.py @@ -0,0 +1,96 @@ +import click +import logging +from argopy.utils import is_wmo, is_cyc, check_cyc, check_wmo +import argopy.plot as argoplot +from argopy import ArgoIndex + +from vfrecovery.utils.misc import list_float_simulation_folders +from vfrecovery.core.db import DB + +root_logger = logging.getLogger("vfrecovery_root_logger") +blank_logger = logging.getLogger("vfrecovery_blank_logger") + + +@click.group() +def cli_group_db() -> None: + pass + + +@cli_group_db.command( + "db", + short_help="Helper for VirtualFleet-Recovery simulations database", + help=""" + Internal simulation database helper + + """, + epilog=""" + Examples: + + \b + vfrecovery db info + + \b + vfrecovery db read + + \b + vfrecovery db read --index 3 + + \b + vfrecovery db drop + """, # noqa +) +@click.option( + "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), + default="INFO", + show_default=True, + help=( + "Set the details printed to console by the command " + "(based on standard logging library)." + ), +) +@click.option( + "-i", "--index", + type=int, + required=False, + default=None, + show_default=False, + help="Record index to work with", +) +@click.argument('ACTION', nargs=1, type=str) +def db( + action, + index, + log_level, +) -> None: + if log_level == "QUIET": + root_logger.disabled = True + log_level = "CRITICAL" + root_logger.setLevel(level=getattr(logging, log_level.upper())) + + if root_logger.isEnabledFor(logging.DEBUG): + root_logger.debug("DEBUG mode activated") + + # Validate arguments: + if action.lower() not in ["read", "info", "drop"]: + raise ValueError("The first argument ACTION must be one in ['read', 'info', 'drop']") + + if action == 'read': + df = DB.read_data() + if index is not None: + row = df.loc[index] + click.secho("Row index #%i:" % index, fg='green') + click.echo(row.T.to_string()) + else: + for irow, row in df.iterrows(): + click.secho("Row index #%i:" % irow, fg='green') + click.echo(row.T.to_string()) + + elif action == 'drop': + DB.clear() + + elif action == 'info': + click.echo(DB.info()) + + else: + raise click.BadParameter("Unknown DB action '%s'" % action) diff --git a/vfrecovery/command_line_interface/group_describe.py b/vfrecovery/command_line_interface/group_describe.py new file mode 100644 index 0000000..b62d505 --- /dev/null +++ b/vfrecovery/command_line_interface/group_describe.py @@ -0,0 +1,151 @@ +import click +import logging +from argopy.utils import is_wmo, is_cyc, check_cyc, check_wmo +import argopy.plot as argoplot +from argopy import ArgoIndex +from pathlib import Path + +from vfrecovery.utils.misc import list_float_simulation_folders +from vfrecovery.core.db import DB + + +root_logger = logging.getLogger("vfrecovery_root_logger") +blank_logger = logging.getLogger("vfrecovery_blank_logger") + + +@click.group() +def cli_group_describe() -> None: + pass + + +@cli_group_describe.command( + "describe", + short_help="Describe VirtualFleet-Recovery data and simulation results", + help=""" + + TARGET select what is to be described. A string in: ['obs', 'velocity', 'run']. + + WMO is the float World Meteorological Organisation number + + CYC is the cycle number location to restrict description to + """, + epilog=""" + Examples: + + \b + vfrecovery describe velocity 6903091 + + \b + vfrecovery describe obs 6903091 112 + """, # noqa +) +@click.option( + "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), + default="INFO", + show_default=True, + help=( + "Set the details printed to console by the command " + "(based on standard logging library)." + ), +) +@click.argument('TARGET', nargs=1, type=str) +@click.argument('WMO', nargs=1, type=int) +@click.argument("CYC", nargs=-1, type=int) +def describe( + target, + wmo, + cyc, + log_level, +) -> None: + if log_level == "QUIET": + root_logger.disabled = True + log_level = "CRITICAL" + root_logger.setLevel(level=getattr(logging, log_level.upper())) + + if root_logger.isEnabledFor(logging.DEBUG): + root_logger.debug("DEBUG mode activated") + + # Validate arguments: + if target.lower() not in ["run", "obs", "velocity"]: + raise ValueError("The first argument TARGET must be one in ['run', 'obs', 'velocity']") + + assert is_wmo(wmo) + wmo = check_wmo(wmo)[0] + cyc = list(cyc) + if len(cyc) > 0: + assert is_cyc(cyc) + cyc = check_cyc(cyc) + + if target == 'obs': + describe_obs(wmo, cyc) + + elif target == 'velocity': + describe_velocity(wmo, cyc) + + elif target == 'run': + describe_run(wmo, cyc) + + else: + raise click.BadParameter("Unknown describe target '%s'" % target) + + +def describe_run(wmo, cyc): + partial_data = {'wmo': wmo} + if len(cyc) > 0: + partial_data.update({'cyc': cyc[0]}) + click.echo(DB.from_dict(partial_data).record.T.to_string(max_colwidth=15)) + + +def describe_velocity(wmo, cyc): + cyc = cyc[0] if len(cyc) > 0 else None + + for ii, item in DB.from_dict({'wmo': wmo, 'cyc': cyc}).items: + p = Path(item.path_root).joinpath(item.path_obj.velocity) + + click.secho("Velocity data for WMO=%s / CYC=%s / DOMAIN-SIZE=%0.2f / DOWNLOAD-DATE=%s" + % (item.wmo, item.cyc, item.velocity['domain_size'], item.velocity['download']), fg='blue') + + click.secho("\tNetcdf files:") + vlist = sorted(p.glob("velocity_*.nc")) + if len(vlist) > 0: + [click.secho("\t\t- %s" % v, fg='green') for v in vlist] + else: + click.secho("\tNo velocity file", fg='red') + + click.secho("\tFigures:") + vlist = sorted(p.glob("velocity_*.png")) + if len(vlist) > 0: + [click.secho("\t\t- %s" % v, fg='green') for v in vlist] + else: + click.secho("\tNo velocity figures", fg='red') + + +def describe_obs(wmo, cyc): + url = argoplot.dashboard(wmo, url_only=True) + # txt = "You can check this float dashboard while we search for float profiles in the index: %s" % url + click.secho("\nYou can check this float dashboard while we search for float profile(s) in the index:") + click.secho("\t%s" % url, fg='green') + + # Load observed float profiles index: + host = "https://data-argo.ifremer.fr" + # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" + # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" + idx = ArgoIndex(host=host) + if len(cyc) > 0: + idx.search_wmo_cyc(wmo, cyc) + else: + idx.search_wmo(wmo) + + df = idx.to_dataframe() + df = df.sort_values(by='date').reset_index(drop=True) + if df.shape[0] == 1: + click.secho("\nFloat profile data from the index:") + # df_str = "\t%s" % (df.T).to_string() + df_str = "\n".join(["\t%s" % l for l in (df.T).to_string().split("\n")[1:]]) + click.secho(df_str, fg="green") + else: + click.secho("\nFloat profile(s):") + # click.secho(df.to_string(max_colwidth=15), fg="green") + click.secho(df.to_string(), fg="green") + # click.echo_via_pager("\n%s" % df.to_string(max_colwidth=15)) diff --git a/vfrecovery/command_line_interface/group_predict.py b/vfrecovery/command_line_interface/group_predict.py new file mode 100644 index 0000000..d19f54f --- /dev/null +++ b/vfrecovery/command_line_interface/group_predict.py @@ -0,0 +1,171 @@ +import click +import logging + +from vfrecovery.core.predict import predict_function + +root_logger = logging.getLogger("vfrecovery_root_logger") +blank_logger = logging.getLogger("vfrecovery_blank_logger") + + +@click.group() +def cli_group_predict() -> None: + pass + + +@cli_group_predict.command( + "predict", + short_help="Execute VirtualFleet-Recovery predictions", + help=""" + Execute the VirtualFleet-Recovery predictor + + WMO is the float World Meteorological Organisation number. + + CYC is the cycle number to predict. If you want to simulate more than 1 cycle, use the `n_predictions` option (see below). + """, + epilog=""" +Examples: +\b +\n\tvfrecovery predict 6903091 112 + """, # noqa +) +@click.option( + "-v", "--velocity", + type=str, + required=False, + default='GLORYS', + show_default=True, + help="Velocity field to use. Velocity data are downloaded with the Copernicus Marine Toolbox. Possible values are: 'GLORYS', 'ARMOR3D'", +) +@click.option( + "--output_path", + type=str, + required=False, + default=None, + help="Simulation root data output folder [default: './vfrecovery_simulations_data']", +) +@click.option( + "--cfg_parking_depth", + type=float, + required=False, + default=None, + show_default=False, + help="Virtual floats parking depth in db [default: previous cycle value]", +) +@click.option( + "--cfg_cycle_duration", + type=float, + required=False, + default=None, + show_default=False, + help="Virtual floats cycle duration in hours [default: previous cycle value]", +) +@click.option( + "--cfg_profile_depth", + type=float, + required=False, + default=None, + show_default=False, + help="Virtual floats profile depth in db [default: previous cycle value]", +) +@click.option( + "--cfg_free_surface_drift", + type=int, + required=False, + default=9999, + show_default=True, + help="Virtual cycle number to start free surface drift, inclusive", +) +@click.option( + "-n", "--n_predictions", + type=int, + required=False, + default=0, + show_default=True, + help="Number of profiles to predict after cycle specified with argument 'CYC'", +) +@click.option( + "-s", "--swarm_size", + type=int, + required=False, + default=100, + show_default=True, + help="Swarm size, i.e. the number of virtual floats simulated to make predictions for 1 real float", +) +@click.option( + "-d", "--domain_min_size", + type=float, + required=False, + default=5, + show_default=True, + help="Minimal size (deg) of the simulation domain around the initial float position", +) +@click.option('--overwrite', + is_flag=True, + help="Should past simulation data be overwritten or not, for a similar set of arguments" + ) +@click.option('--lazy/--no-lazy', + default=True, + show_default=True, + help="Load velocity data in lazy mode (not saved on file)." + ) +@click.option('--figure/--no-figure', + default=True, + show_default=True, + help="Display and save figures on file (png format)", + ) +@click.option( + "--log_level", + type=click.Choice(["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL", "QUIET"]), + default="INFO", + show_default=True, + help=( + "Set the details printed to console by the command " + "(based on standard logging library)." + ), +) +@click.argument('WMO', nargs=1, type=int) +@click.argument('CYC', nargs=1, type=int) +def predict( + wmo, + cyc, + velocity, + output_path, + n_predictions, + cfg_parking_depth, + cfg_cycle_duration, + cfg_profile_depth, + cfg_free_surface_drift, + swarm_size, + domain_min_size, + overwrite, + lazy, + figure, + log_level, +) -> None: + """ + + """ + if log_level == "QUIET": + root_logger.disabled = True + log_level = "CRITICAL" + root_logger.setLevel(level=getattr(logging, log_level.upper())) + + if root_logger.isEnabledFor(logging.DEBUG): + root_logger.debug("DEBUG mode activated") + + # + json_dump = predict_function(wmo, cyc, + velocity=velocity, + output_path=output_path, + n_predictions=n_predictions, + cfg_parking_depth=cfg_parking_depth, + cfg_cycle_duration=cfg_cycle_duration, + cfg_profile_depth=cfg_profile_depth, + cfg_free_surface_drift=cfg_free_surface_drift, + swarm_size=swarm_size, + domain_min_size=domain_min_size, + overwrite=overwrite, + lazy=lazy, + figure=figure, + log_level=log_level) + blank_logger.info(json_dump) diff --git a/vfrecovery/command_line_interface/utils.py b/vfrecovery/command_line_interface/utils.py new file mode 100644 index 0000000..34a2c16 --- /dev/null +++ b/vfrecovery/command_line_interface/utils.py @@ -0,0 +1,8 @@ +import sys +import logging + +log = logging.getLogger("vfrecovery.cli") + + +PREF = "\033[" +RESET = f"{PREF}0m" diff --git a/vfrecovery/command_line_interface/virtualfleet_recovery.py b/vfrecovery/command_line_interface/virtualfleet_recovery.py new file mode 100644 index 0000000..4b34779 --- /dev/null +++ b/vfrecovery/command_line_interface/virtualfleet_recovery.py @@ -0,0 +1,26 @@ +import click + +from vfrecovery.command_line_interface.group_describe import cli_group_describe +from vfrecovery.command_line_interface.group_predict import cli_group_predict +from vfrecovery.command_line_interface.group_db import cli_group_db + +@click.command( + cls=click.CommandCollection, + sources=[ + cli_group_describe, + cli_group_predict, + cli_group_db, + ], + context_settings=dict(help_option_names=["-h", "--help"]), +) +@click.version_option(None, "-V", "--version", package_name="vfrecovery") +def base_command_line_interface(): + pass + + +def command_line_interface(): + base_command_line_interface(windows_expand_args=False) + + +if __name__ == "__main__": + command_line_interface() diff --git a/vfrecovery/core/__init__.py b/vfrecovery/core/__init__.py new file mode 100644 index 0000000..b02c95e --- /dev/null +++ b/vfrecovery/core/__init__.py @@ -0,0 +1,5 @@ +# from deployment_plan import setup_deployment_plan +from .trajfile_handler import Trajectories +from .analysis_handler import RunAnalyser +from .simulation_handler import Simulation +from .db import DB diff --git a/vfrecovery/core/analysis_handler.py b/vfrecovery/core/analysis_handler.py new file mode 100644 index 0000000..d0efc6b --- /dev/null +++ b/vfrecovery/core/analysis_handler.py @@ -0,0 +1,537 @@ +import pandas as pd +import numpy as np +from typing import List +from pathlib import Path + +from sklearn.neighbors import KernelDensity +from virtualargofleet import VelocityField + +import matplotlib as mpl +import matplotlib.pyplot as plt +import argopy.plot as argoplot +import cartopy.crs as ccrs + +from vfrecovery.plots.utils import save_figurefile, map_add_features +from vfrecovery.utils.geo import haversine, bearing +from vfrecovery.json import Simulation, Profile, Location, Metrics, Transit, SurfaceDrift, Location_error + + +class RunAnalyserCore: + """ + + Examples + -------- + T = Trajectories(traj_zarr_file) + df = T.get_index().add_distances() + + SP = RunAnalyser(df) + SP.fit_predict() + SP.add_metrics(VFvelocity) + SP.bbox() + SP.plot_predictions(VFvelocity) + SP.plan + SP.n_cycles + SP.trajectory + """ + + def __init__(self, df_sim: pd.DataFrame, df_obs: pd.DataFrame): + self.swarm = df_sim + self.obs = df_obs + # self.set_weights() + self.WMO = np.unique(df_obs['wmo'])[0] + self.jsobj = [] + + if 'distance_origin' not in df_sim: + raise ValueError("Invalid simulation dataframe ! You probably forget to compute distances") + + def __repr__(self): + summary = [""] + summary.append("Simulation target: %i / %i" % (self.WMO, self.sim_cycles[0])) + summary.append("Swarm size: %i floats" % len(np.unique(self.swarm['wmo']))) + summary.append("Number of simulated cycles: %i profile(s) for cycle number(s): [%s]" % ( + self.n_cycles, ",".join([str(c) for c in self.sim_cycles]))) + summary.append("Observed reference: %i profile(s) for cycle number(s): [%s]" % ( + self.obs.shape[0], ",".join([str(c) for c in self.obs_cycles]))) + return "\n".join(summary) + + @property + def n_cycles(self): + """Number of simulated cycles""" + return len(np.unique(self.swarm['cyc'])) + # return len(self.sim_cycles) + + @property + def obs_cycles(self): + """Observed cycle numbers""" + return np.unique(self.obs['cyc']) + + @property + def has_ref(self): + return len(self.obs_cycles) > 1 + + @property + def sim_cycles(self): + """Simulated cycle numbers""" + return self.obs_cycles[0] + 1 + range(self.n_cycles) + + @property + def plan(self) -> pd.DataFrame: + if not hasattr(self, '_plan'): + df_plan = self.swarm[self.swarm['cyc'] == 1][['date', 'deploy_lon', 'deploy_lat']] + df_plan = df_plan.rename(columns={'deploy_lon': 'longitude', 'deploy_lat': 'latitude'}) + self._plan = df_plan + return self._plan + + @property + def trajectory(self): + """Return the predicted trajectory as a simple :class:`np.array` + + First row is longitude, 2nd is latitude and 3rd is date of simulated profiles + + Return + ------ + :class:`np.array` + """ + if len(self.jsobj.predictions) == 0: + raise ValueError("Please call `fit_predict` first") + + traj_prediction = np.array([self.obs['longitude'].values[0], + self.obs['latitude'].values[0], + self.obs['date'].values[0]])[ + np.newaxis] # Starting point where swarm was deployed + for p in self.jsobj.predictions: + xpred, ypred, tpred = p.location.longitude, p.location.latitude, p.location.time + traj_prediction = np.concatenate((traj_prediction, + np.array([xpred, ypred, tpred])[np.newaxis]), + axis=0) + return traj_prediction + + + def bbox(self, s: float = 1) -> list: + """Get a simulation bounding box + + Parameters + ---------- + s: float, default:1 + + Returns + ------- + list + """ + if not isinstance(self.jsobj, Simulation): + raise ValueError("Please call `fit_predict` first") + + df_sim = self.swarm + df_obs = self.obs + + box = [np.min([df_sim['deploy_lon'].min(), + df_sim['longitude'].min(), + df_sim['rel_lon'].min(), + df_obs['longitude'].min()]), + np.max([df_sim['deploy_lon'].max(), + df_sim['longitude'].max(), + df_sim['rel_lon'].max(), + df_obs['longitude'].max()]), + np.min([df_sim['deploy_lat'].min(), + df_sim['latitude'].min(), + df_sim['rel_lat'].min(), + df_obs['latitude'].min()]), + np.max([df_sim['deploy_lat'].max(), + df_sim['latitude'].max(), + df_sim['rel_lat'].max(), + df_obs['latitude'].max()])] + rx, ry = box[1] - box[0], box[3] - box[2] + r = np.min([rx, ry]) + ebox = [box[0] - s * r, box[1] + s * r, box[2] - s * r, box[3] + s * r] + + return ebox + + +class RunAnalyserPredictor(RunAnalyserCore): + + def set_weights(self, scale: float = 20): + """Compute weights for predictions + + Add weights column to swarm :class:`pandas.DataFrame` as a gaussian distance + with a std based on the size of the deployment domain + + Parameters + ---------- + scale: float (default=20.) + """ + rx, ry = self.plan['longitude'].max() - self.plan['longitude'].min(), \ + self.plan['latitude'].max() - self.plan['latitude'].min() + r = np.min([rx, ry]) # Minimal size of the deployment domain + weights = np.exp(-(self.swarm['distance_origin'] ** 2) / (r / scale)) + weights[np.isnan(weights)] = 0 + self.swarm['weights'] = weights + return self + + def fit_predict(self, weights_scale: float = 20.) -> List[Profile]: + """Predict profile positions from simulated float swarm + + Prediction is based on a :class:`klearn.neighbors._kde.KernelDensity` estimate of the N_FLOATS + simulated, weighted by their deployment distance to the observed previous cycle position. + + Parameters + ---------- + weights_scale: float (default=20) + Scale (in deg) to use to weight the deployment distance to the observed previous cycle position + + Returns + ------- + List[Profile] + """ + + # Compute weights of the swarm float profiles locations + self.set_weights(scale=weights_scale) + + # self._prediction_data = {'weights_scale': weights_scale, 'cyc': {}} + + cycles = np.unique(self.swarm['cyc']).astype(int) # 1, 2, ... + Plist = [] + for icyc, this_sim_cyc in enumerate(cycles): + this_cyc_df = self.swarm[self.swarm['cyc'] == this_sim_cyc] + weights = this_cyc_df['weights'] + x, y = this_cyc_df['rel_lon'], this_cyc_df['rel_lat'] + + w = weights / np.max(np.abs(weights), axis=0) + X = np.array([x, y]).T + kde = KernelDensity(kernel='gaussian', bandwidth=0.15).fit(X, sample_weight=w) + + xg, yg = (np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 100), + np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), 100)) + xg, yg = np.meshgrid(xg, yg) + Xg = np.array([xg.flatten(), yg.flatten(), ]).T + llh = kde.score_samples(Xg) + xpred = Xg[np.argmax(llh), 0] + ypred = Xg[np.argmax(llh), 1] + tpred = this_cyc_df['date'].mean() + + # Store results in a Profile instance: + p = Profile.from_dict({ + 'location': Location.from_dict({ + 'longitude': xpred, + 'latitude': ypred, + 'time': tpred, + 'description': None, + }), + 'wmo': int(self.WMO), + 'cycle_number': int(self.sim_cycles[icyc]), + 'virtual_cycle_number': int(this_sim_cyc), + 'description': "Simulated profile #%i" % this_sim_cyc, + 'metrics': Metrics.from_dict({'description': None}), + 'url_float': argoplot.dashboard(self.WMO, url_only=True), + }) + Plist.append(p) + + # Store results internally + obs_cyc = self.obs_cycles[0] + this_df = self.obs[self.obs['cyc'] == obs_cyc] + + self.jsobj = Simulation.from_dict({ + "initial_profile": Profile.from_dict({ + 'location': Location.from_dict({ + 'longitude': this_df['longitude'].iloc[0], + 'latitude': this_df['latitude'].iloc[0], + 'time': this_df['date'].iloc[0], + 'description': None, + }), + 'wmo': int(self.WMO), + 'cycle_number': int(obs_cyc), + 'description': "Initial profile (observed)", + 'url_float': argoplot.dashboard(self.WMO, url_only=True), + 'url_profile': argoplot.dashboard(self.WMO, obs_cyc, url_only=True), + }), + "predictions": Plist, + "observations": None, + "meta_data": None, + }) + + # Add more stuff to internal storage: + self._add_ref() # Fill: self.jsobj.observations + self._predict_errors() # Fill: self.jsobj.predictions.Metrics.error and self.jsobj.predictions.Metrics.transit + # + return self + + +class RunAnalyserDiagnostics(RunAnalyserPredictor): + + def _add_ref(self): + """Possibly add observations data to internal data structure + + This is for past cycles, for which we have observed positions of the predicted profiles + + This populates the ``self.jsobj.observations`` property (``self.jsobj`` was created by the ``fit_predict`` method) + + """ + if not isinstance(self.jsobj, Simulation): + raise ValueError("Please call `fit_predict` first") + + # Observed profiles that were simulated: + Plist = [] + for cyc in self.sim_cycles: + if cyc in self.obs_cycles: + this_df = self.obs[self.obs['cyc'] == cyc] + p = Profile.from_dict({ + 'wmo': int(self.WMO), + 'cycle_number': int(cyc), + 'url_float': argoplot.dashboard(self.WMO, url_only=True), + 'url_profile': argoplot.dashboard(self.WMO, cyc, url_only=True), + 'location': Location.from_dict({'longitude': this_df['longitude'].iloc[0], + 'latitude': this_df['latitude'].iloc[0], + 'time': this_df['date'].iloc[0]}) + }) + Plist.append(p) + + self.jsobj.observations = Plist + + return self + + def _predict_errors(self): + """Possibly compute error metrics for the predicted positions + + This is for past cycles, for which we have observed positions of the predicted profiles + + This populates the ``self.jsobj.predictions.Metrics.error`` and ``self.jsobj.predictions.Metrics.transit`` properties (``self.jsobj`` was created by the ``fit_predict`` method) + + A transit time to cover the distance error is also calculated + (assume a 12 kts boat speed with 1 kt = 1.852 km/h) + + """ + if not isinstance(self.jsobj, Simulation): + raise ValueError("Please call `fit_predict` first") + + Plist_updated = [] + for p in self.jsobj.predictions: + if p.cycle_number in self.obs_cycles: + this_obs_profile = self.obs[self.obs['cyc'] == p.cycle_number] + xobs = this_obs_profile['longitude'].iloc[0] + yobs = this_obs_profile['latitude'].iloc[0] + tobs = this_obs_profile['date'].iloc[0] + + prev_obs_profile = self.obs[self.obs['cyc'] == p.cycle_number - 1] + xobs0 = prev_obs_profile['longitude'].iloc[0] + yobs0 = prev_obs_profile['latitude'].iloc[0] + + xpred = p.location.longitude + ypred = p.location.latitude + tpred = p.location.time + + dd = haversine(xobs, yobs, xpred, ypred) + + observed_bearing = bearing(xobs0, yobs0, xobs, yobs) + sim_bearing = bearing(xobs0, yobs0, xpred, ypred) + + dt = pd.Timedelta(tpred - tobs)# / np.timedelta64(1, 's') + + p.metrics.error = Location_error.from_dict({ + 'distance': np.round(dd, 3), + 'bearing': np.round(sim_bearing - observed_bearing, 3), + 'time': pd.Timedelta(dt, 'h') + }) + + # also compute a transit time to cover the distance error: + p.metrics.transit = Transit.from_dict({ + 'value': + pd.Timedelta(p.metrics.error.distance / (12 * 1.852), 'h').seconds / 3600. + }) + + Plist_updated.append(p) + + self.jsobj.predictions = Plist_updated + return self + + def add_metrics(self, VFvel=None): + """Possibly compute more metrics to interpret the prediction error + + This is for past cycles, for which we have observed positions of the predicted profiles + + This populates the ``self.jsobj.predictions.Metrics.surface_drift`` property (``self.jsobj`` was created by the ``fit_predict`` method) + + 1. Compute surface drift due to the time lag between the predicted profile timing and the expected one + + """ + if not isinstance(self.jsobj, Simulation): + raise ValueError("Please call `predict` first") + + Plist_updated = [] + for p in self.jsobj.predictions: + if p.cycle_number in self.obs_cycles and isinstance(p.metrics.error, Location_error): + # Compute the possible drift due to the time lag between the predicted profile timing and the expected one: + if VFvel is not None: + xpred, ypred, tpred = p.location.longitude, p.location.latitude, p.location.time + dsc = VFvel.field.interp( + {VFvel.dim['lon']: xpred, + VFvel.dim['lat']: ypred, + VFvel.dim['time']: tpred, + VFvel.dim['depth']: + VFvel.field[{VFvel.dim['depth']: 0}][VFvel.dim['depth']].values[np.newaxis][0]} + ) + velc = np.sqrt(dsc[VFvel.var['U']] ** 2 + dsc[VFvel.var['V']] ** 2).values[np.newaxis][0] # m/s + p.metrics.surface_drift = SurfaceDrift.from_dict({ + "surface_currents_speed": velc, # m/s by default + "value": (np.abs(p.metrics.error.time.total_seconds()) * velc / 1e3) # km + }) + + Plist_updated.append(p) + + self.jsobj.predictions = Plist_updated + return self + + +class RunAnalyserView(RunAnalyserDiagnostics): + + def plot_predictions(self, + vel: VelocityField = None, + vel_depth: float = 0., + + s=0.2, + alpha=False, + + save: bool = False, + workdir: Path = Path('.'), + fname: str = 'predictions', + mplbackend: str = 'Agg', + + figsize=None, + dpi=120, + orient='portrait' + ): + ebox = self.bbox(s=s) + pred_traj = self.trajectory + + if orient == 'portrait': + if self.n_cycles == 1: + nrows, ncols = 2, 1 + if figsize is None: + figsize = (10, 10) + else: + nrows, ncols = self.n_cycles, 2 + if figsize is None: + figsize = (10, (self.n_cycles - 1) * 10) + else: + if self.n_cycles == 1: + nrows, ncols = 1, 2 + else: + nrows, ncols = 2, self.n_cycles + if figsize is None: + figsize = (ncols * 10, 10) + + def plot_this(this_ax, i_cycle, ip): + virtual_cycle_number = i_cycle + 1 + df_sim = self.swarm[self.swarm['cyc'] == virtual_cycle_number] + df_sim = df_sim.reset_index(drop=True) + + if self.sim_cycles[i_cycle] in self.obs_cycles: + this_profile = self.obs[self.obs['cyc'] == self.sim_cycles[i_cycle]] + else: + this_profile = None + + for p in self.jsobj.predictions: + if p.virtual_cycle_number == virtual_cycle_number: + xpred, ypred, tpred = p.location.longitude, p.location.latitude, p.location.time + + this_ax.set_extent(ebox) + this_ax = map_add_features(ax[ix]) + + if vel is not None: + v = vel.field.isel(time=-1).interp(depth=vel_depth) + v.plot.quiver(x="longitude", + y="latitude", + u=vel.var['U'], + v=vel.var['V'], + ax=this_ax, + color='grey', + alpha=0.5, + scale=5, + add_guide=False) + + this_ax.plot(df_sim['deploy_lon'], df_sim['deploy_lat'], '.', + markersize=3, + color='grey', + alpha=0.1, + markeredgecolor=None, + zorder=0) + + this_ax.plot(pred_traj[:, 0], pred_traj[:, 1], color='k', linewidth=1, marker='+') + this_ax.plot(xpred, ypred, color='lightgreen', marker='+') + + weights = df_sim['weights'] + w = weights / np.max(np.abs(weights), axis=0) + ii = np.argsort(w) + cmap = plt.cm.cool + # cmap = plt.cm.Reds + + if ip == 0: + x, y = df_sim['deploy_lon'], df_sim['deploy_lat'] + title = 'Initial virtual float positions' + elif ip == 1: + x, y = df_sim['longitude'], df_sim['latitude'] + title = 'Final virtual float positions' + elif ip == 2: + x, y = df_sim['rel_lon'], df_sim['rel_lat'] + title = 'Final virtual floats positions relative to observed float' + + if not alpha: + this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], + marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) + else: + this_ax.scatter(x.iloc[ii], y.iloc[ii], c=w[ii], + alpha=w[ii], + marker='o', s=4, edgecolor=None, vmin=0, vmax=1, cmap=cmap) + + # Display full trajectory prediction: + if ip != 0 and this_profile is not None: + this_ax.arrow(this_profile['longitude'].iloc[0], + this_profile['latitude'].iloc[0], + xpred - this_profile['longitude'].iloc[0], + ypred - this_profile['latitude'].iloc[0], + length_includes_head=True, fc='k', ec='c', head_width=0.025, zorder=10) + this_ax.plot(xpred, ypred, 'k+', zorder=10) + + this_ax.set_title("") + # this_ax.set_ylabel("Cycle %i predictions" % (i_cycle+1)) + this_ax.set_title("%s\nCycle %i predictions" % (title, self.sim_cycles[i_cycle]), fontsize=6) + + initial_mplbackend = mpl.get_backend() + mpl.use(mplbackend) + + fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, dpi=dpi, + subplot_kw={'projection': ccrs.PlateCarree()}, + sharex=True, sharey=True) + ax, ix = ax.flatten(), -1 + + if orient == 'portrait': + rows = range(self.n_cycles) + cols = [1, 2] + else: + rows = [1, 2] + cols = range(self.n_cycles) + + if orient == 'portrait': + for i_cycle in rows: + for ip in cols: + ix += 1 + plot_this(ax[ix], i_cycle, ip) + else: + for ip in rows: + for i_cycle in cols: + ix += 1 + plot_this(ax[ix], i_cycle, ip) + + plt.tight_layout() + + if save: + save_figurefile(fig, fname, workdir) + + # Rewind mpl backend to initial position: + mpl.use(initial_mplbackend) + + return fig, ax + + +class RunAnalyser(RunAnalyserView): + + def to_json(self, fp=None): + return self.jsobj.to_json(fp=fp) \ No newline at end of file diff --git a/vfrecovery/core/db.py b/vfrecovery/core/db.py new file mode 100644 index 0000000..6adc9c7 --- /dev/null +++ b/vfrecovery/core/db.py @@ -0,0 +1,447 @@ +""" +The primary goal of this module is to make it easy to determine if one simulation has already been done or not + +The goal is to provide a "database"-like set of functions to manage all simulations being performed + +We should provide methods: + - to record a new simulation + - to list all past simulations + - to search in all past simulations + + +What is defining a unique VFrevovery simulation ? +- WMO & CYC & n_predictions targets > 3 params (int, int, int) +- Floats configuration > at least 6 numeric parameters +- Velocity field: name, date of download, domain size > 3 params (str, datetime, int) +- Output path > 1 param (str/Path) +- Swarm size > 1 param (int) + +Rq: the velocity field time frame is set by the WMO/CYC/n_predictions targets. so there's no need for it. + +This first implementation relies on a simple local pickle file with a panda dataframe + +""" +from typing import List, Dict, Iterable, Hashable +from virtualargofleet import FloatConfiguration +from pathlib import Path +import pandas as pd +import numpy as np +import warnings + + +from .utils import make_hash_sha256 + + +num2str = lambda x: "%s%s" % ("%0.4d" % x, "%0.3d" % (1e3 * np.round(x, 3) - 1e3 * np.round(x))) +str2num = lambda x: int(x[0:4]) + int(x[4:]) / 1e3 + + +class Row2Path: + """ + >>> Row2Path(row).wmo + >>> Row2Path(row).cyc + >>> Row2Path(row).velocity + >>> Row2Path(row).run + >>> Row2Path(row).path + """ + + def __init__(self, row): + self.row = row + + def __repr__(self): + summary = [""] + summary.append("%10s: %s" % ("wmo", self.wmo)) + summary.append("%10s: %s" % ("cyc", self.cyc)) + summary.append("%10s: %s" % ("velocity", self.velocity)) + summary.append("%10s: %s" % ("run", self.run)) + return "\n".join(summary) + + @property + def wmo(self): + return Path(str(self.row['wmo'])) + + @property + def cyc(self): + # |----CYC + last = str(self.row['cyc']) + return self.wmo.joinpath(Path(last)) + + @property + def velocity(self): + # |----VELOCITY(NAME + DOWNLOAD_DATE + DOMAIN_SIZE) + last = "%s_%s_%s" % (self.row['velocity_name'], + num2str(self.row['velocity_domain_size']), + self.row['velocity_download'].strftime("%Y%m%d"), + ) + return self.cyc.joinpath(Path(last)) + + @property + def run(self): + # |---- RUN_PARAMS(NP + CFG + NF) + last = "%s_%s_%s" % ("NP%0.3d" % self.row['n_predictions'], + "SW%0.4d" % self.row['swarm_size'], + "CFG%s" % "c".join( + [num2str(self.row[c]) for c in self.row.index if "cfg_" in c]), + ) + return self.velocity.joinpath(Path(last)) + + @property + def path(self): + return self.run + + +class Path2Row: + """ + >>> Path2Row(path).wmo + >>> Path2Row(path).cyc + >>> Path2Row(path).velocity + >>> Path2Row(path).run + >>> Path2Row(path).row + """ + + cfg_cols: list = ['cfg_cycle_duration', + 'cfg_life_expectancy', + 'cfg_parking_depth', + 'cfg_profile_depth', + 'cfg_reco_free_surface_drift', + 'cfg_vertical_speed'] + + def __init__(self, p): + self.path = p + + def __repr__(self): + summary = [""] + summary.append("%10s: %s" % ("wmo", self.wmo)) + summary.append("%10s: %s" % ("cyc", self.cyc)) + summary.append("%10s: %s" % ("velocity", self.velocity)) + summary.append("%10s: %s" % ("run", self.run)) + return "\n".join(summary) + + @property + def wmo(self): + return int(self.path.parts[0]) + + @property + def cyc(self): + return int(self.path.parts[1]) + + @property + def velocity(self): + velocity = self.path.parts[2] + result = {} + result.update({'velocity_name': velocity.split("_")[0]}) + result.update({'velocity_domain_size': str2num(velocity.split("_")[1])}) + result.update({'velocity_download': pd.to_datetime(velocity.split("_")[2], utc=True)}) + return result + + @property + def run(self): + run = self.path.parts[3] + result = {} + result.update({'n_predictions': int(run.split("_")[0][2:])}) + result.update({'swarm_size': int(run.split("_")[1][2:])}) + for key, value in zip(self.cfg_cols, [str2num(v) for v in run.split("_")[2][3:].split("c")]): + result.update({key: value}) + return result + + @property + def row(self): + row = {} + row.update({'wmo': self.wmo}) + row.update({'cyc': self.cyc}) + row.update({'n_predictions': int(self.run['n_predictions'])}) + for key in self.cfg_cols: + row.update({key: self.run[key]}) + for key in self.velocity: + row.update({key: self.velocity[key]}) + row.update({'swarm_size': int(self.run['swarm_size'])}) + return pd.DataFrame([row]) + + +class DB: + """ + + >>> DB.dbfile + >>> DB.init() + >>> DB.clear() + >>> DB.isconnected() + >>> DB.read_data() # Return db content as :class:`pd.DataFrame` + + >>> data = {'wmo': 6903091, 'cyc': 120, 'n_predictions': 0, + >>> 'cfg': FloatConfiguration('recovery'), + >>> 'velocity': {'name': 'GLORYS', + >>> 'download': pd.to_datetime('now', utc=True), + >>> 'domain_size': 5}, + >>> 'path_root': Path('.'), + >>> 'swarm_size': 1000} + >>> + >>> DB.from_dict(data).checkin() # save to db + >>> DB.from_dict(data).checkout() # delete from db + >>> DB.from_dict(data).checked + >>> DB.from_dict(data).uid + >>> DB.from_dict(data).record + + >>> partial_data = {'wmo': 6903091} + >>> DB.from_dict(partial_data) # Create new instance for actions + >>> DB.from_dict(partial_data).record + + """ + wmo: int + cyc: int + n_predictions: int + cfg: FloatConfiguration + velocity: Dict + swarm_size: int + path_root: Path + + required: List = ['wmo', 'cyc', 'n_predictions', 'cfg_cycle_duration', + 'cfg_life_expectancy', 'cfg_parking_depth', 'cfg_profile_depth', + 'cfg_reco_free_surface_drift', 'cfg_vertical_speed', 'velocity_name', + 'velocity_download', 'velocity_domain_size', 'swarm_size', 'path_root'] + properties: List = ['wmo', 'cyc', 'n_predictions', 'cfg', 'velocity', 'swarm_size', 'path_root'] + + _data: pd.DataFrame + dbfile: Path = (Path(__file__).parent.parent).joinpath('static').joinpath('assets').joinpath( + "simulations_registry.pkl") + + def __init__(self, **kwargs): + for key in kwargs: + if key in self.properties: + setattr(self, key, kwargs[key]) + + # Connect to database: + self.connect() + + @classmethod + def isconnected(cls) -> bool: + return cls.dbfile.exists() + + @classmethod + def clear(cls): + def confirm(): + """ + Ask user to enter Y or N (case-insensitive). + :return: True if the answer is Y. + :rtype: bool + """ + answer = "" + while answer not in ["y", "n"]: + answer = input("Confirm to permanently clear the simulations registry [Y/N]? ").lower() + return answer == "y" + + if confirm(): + return cls.dbfile.unlink(missing_ok=True) + + @classmethod + def init(cls): + df = pd.DataFrame({'wmo': pd.Series(dtype='int'), + 'cyc': pd.Series(dtype='int'), + 'n_predictions': pd.Series(dtype='int'), + 'cfg_cycle_duration': pd.Series(dtype='float'), + 'cfg_life_expectancy': pd.Series(dtype='float'), + 'cfg_parking_depth': pd.Series(dtype='float'), + 'cfg_profile_depth': pd.Series(dtype='float'), + 'cfg_reco_free_surface_drift': pd.Series(dtype='float'), + 'cfg_vertical_speed': pd.Series(dtype='float'), + 'velocity_name': pd.Series(dtype='string'), + 'velocity_download': pd.Series(dtype='datetime64[ns]'), + 'velocity_domain_size': pd.Series(dtype='float'), + 'swarm_size': pd.Series(dtype='int'), + 'path_root': pd.Series(dtype='object'), + 'path': pd.Series(dtype='object'), + 'uid': pd.Series(dtype='string'), + }) + df.name = pd.to_datetime('now', utc=True) + cls._data = df + return cls + + @classmethod + def connect(cls) -> "DB": + """Connect to database and refresh data holder""" + if not cls.isconnected(): + cls.init() + cls._data.to_pickle(cls.dbfile) + else: + cls._data = pd.read_pickle(cls.dbfile) + + # Add read-only columns generated on the fly: + if cls._data.shape[0] > 0: + cls._data['path'] = cls._data.apply(lambda row: Row2Path(row).path, axis=1) + cls._data['uid'] = cls._data.apply(lambda row: make_hash_sha256(row.to_dict()), axis=1) + return cls + + # @classmethod + # def consolidate(cls): + # """Reconcile DB records with files on disk + # + # Make sure that all records have result files on disk + # """ + # def has_result_file(df_row): + # df_row['output'] + # + # + # cls.connect() + # df = cls._data + # df.apply(has_result_file, axis=1) + + @classmethod + def read_data(cls) -> pd.DataFrame: + """Return database content as a :class:`pd.DataFrame`""" + cls.connect() + return cls._data + + @classmethod + def exists(cls, dict_of_values) -> bool: + """Return True if an exact match on all properties is found""" + df = cls.read_data() + v = df.iloc[:, 0] == df.iloc[:, 0] + for key, value in dict_of_values.items(): + v &= (df[key] == value) + return v.any() + + @classmethod + def put_data(cls, row): + if not cls.exists(row): + df = cls.read_data() + if 'path' in row: + warnings.warn("'path' is a read only property, removed from input") + row.pop('path', None) + if 'uid' in row: + warnings.warn("'uid' is a read only property, removed from input") + row.pop('uid', None) + df = pd.concat([df, pd.DataFrame([row])], ignore_index=True) + df.to_pickle(cls.dbfile) + else: + print("This record is already in the database") + + @classmethod + def del_data(cls, row): + df = cls.read_data() + v = df.iloc[:, 0] == df.iloc[:, 0] + for key, value in row.items(): + v &= (df[key] == value) + df = df[v != True] + df.to_pickle(cls.dbfile) + + @classmethod + def get_data(cls, row) -> pd.DataFrame: + """Return records matching no-None properties""" + df = cls.read_data() + mask = df.iloc[:, 0] == df.iloc[:, 0] + for key in row: + if row[key] is not None: + mask &= df[key] == row[key] + return df[mask] + + def __repr__(self): + self.connect() + summary = [""] + + summary.append("db_file: %s" % self.dbfile) + summary.append("connected: %s" % self.isconnected()) + summary.append("Number of records: %i" % self.read_data().shape[0]) + + return "\n".join(summary) + + @classmethod + def info(cls) -> str: + return cls.__repr__(cls) + + @staticmethod + def from_dict(obj: Dict) -> "DB": + return DB(**obj) + + def _instance2row(self): + row = {} + for key in ['wmo', 'cyc', 'n_predictions', 'cfg_cycle_duration', + 'cfg_life_expectancy', 'cfg_parking_depth', 'cfg_profile_depth', + 'cfg_reco_free_surface_drift', 'cfg_vertical_speed', 'velocity_name', + 'velocity_download', 'velocity_domain_size', 'swarm_size', 'path_root']: + row.update({key: getattr(self, key, None)}) + + if hasattr(self, 'cfg'): + for key in self.cfg.mission: + row.update({"cfg_%s" % key: self.cfg.mission[key]}) + + if hasattr(self, 'velocity'): + for key in ['name', 'download', 'domain_size']: + if key in self.velocity: + row.update({"velocity_%s" % key: self.velocity[key]}) + + if hasattr(self, 'path_root'): + row.update({'path_root': str(getattr(self, 'path_root', None))}) + + return row + + @classmethod + def _row2dict(self, row) -> dict: + """Convert a db row to a dictionary input""" + data = {} + data.update({'wmo': row['wmo']}) + data.update({'cyc': row['cyc']}) + data.update({'n_predictions': row['n_predictions']}) + + cfg = FloatConfiguration('recovery') + for key in cfg.mission: + cfg.update(key, row["cfg_%s" % key]) + data.update({'cfg': cfg}) + + vel = {'name': None, 'download': None, 'domain_size': None} + for key in vel: + vel.update({key: row["velocity_%s" % key]}) + data.update({'velocity': vel}) + + data.update({'swarm_size': row['swarm_size']}) + data.update({'path_root': row['path_root']}) + + return data + + def checkin(self): + """Add one new record to the database""" + new_row = self._instance2row() + + for key, value in new_row.items(): + if value is None: + raise ValueError("Cannot checkin a new record with missing value for '%s'" % key) + + self.put_data(new_row) + + def checkout(self): + """Remove record from the database""" + row = self._instance2row() + + for key, value in row.items(): + if value is None: + raise ValueError("Cannot id a record to remove with missing value for '%s'" % key) + + self.del_data(row) + + @property + def checked(self): + row = self._instance2row() + return self.exists(row) + + @property + def uid(self): + row = self._instance2row() + return self.get_data(row)['uid'].values[0] + + @property + def path(self): + row = self._instance2row() + return self.get_data(row)['path'].values[0] + + @property + def path_obj(self): + row = self._instance2row() + return Row2Path(pd.DataFrame([row]).iloc[0]) + + @property + def record(self) -> pd.DataFrame: + row = self._instance2row() + return self.get_data(row) + + @property + def items(self) -> Iterable[tuple[Hashable, "DB"]]: + for irow, df_row in self.record.iterrows(): + yield irow, DB.from_dict(self._row2dict(df_row)) diff --git a/vfrecovery/core/deployment_plan.py b/vfrecovery/core/deployment_plan.py new file mode 100644 index 0000000..edb9a20 --- /dev/null +++ b/vfrecovery/core/deployment_plan.py @@ -0,0 +1,45 @@ +import numpy as np +import pandas as pd +from vfrecovery.json import Profile + + +def setup_deployment_plan(P: Profile, swarm_size: int = 120) -> pd.DataFrame: + """Create a deployment plan as a :class:`pandas.DataFrame` + + We will deploy a collection of virtual floats that are located around the real float with random perturbations in space and time + """ + + # Amplitude of the profile position perturbations in the zonal (deg), meridional (deg), and temporal (hours) directions: + rx = 0.5 + ry = 0.5 + rt = 0 + + # + lonc, latc = P.location.longitude, P.location.latitude + # box = [lonc - rx / 2, lonc + rx / 2, latc - ry / 2, latc + ry / 2] + + a, b = lonc - rx / 2, lonc + rx / 2 + lon = (b - a) * np.random.random_sample((swarm_size,)) + a + + a, b = latc - ry / 2, latc + ry / 2 + lat = (b - a) * np.random.random_sample((swarm_size,)) + a + + a, b = 0, rt + dtim = (b - a) * np.random.random_sample((swarm_size,)) + a + dtim = np.round(dtim).astype(int) + tim = pd.to_datetime([P.location.time + np.timedelta64(dt, 'h') for dt in dtim]) + # dtim = (b-a) * np.random.random_sample((nfloats, )) + a + # dtim = np.round(dtim).astype(int) + # tim2 = pd.to_datetime([this_date - np.timedelta64(dt, 'h') for dt in dtim]) + # tim = np.sort(np.concatenate([tim2, tim1])) + + # Round time to the o(5mins), same as step=timedelta(minutes=5) in the simulation params + tim = tim.round(freq='5min') + + # + df = pd.DataFrame( + [tim, lat, lon, np.arange(0, swarm_size) + 9000000, np.full_like(lon, 0), ['VF' for l in lon], ['?' for l in lon]], + index=['date', 'latitude', 'longitude', 'wmo', 'cycle_number', 'institution_code', 'file']).T + df['date'] = pd.to_datetime(df['date']) + + return df diff --git a/vfrecovery/core/floats_config.py b/vfrecovery/core/floats_config.py new file mode 100644 index 0000000..edeeabd --- /dev/null +++ b/vfrecovery/core/floats_config.py @@ -0,0 +1,58 @@ +from virtualargofleet import FloatConfiguration, ConfigParam + + +def setup_floats_config( + wmo: int, + cyc: int, + cfg_parking_depth: float, + cfg_cycle_duration: float, + cfg_profile_depth: float, + cfg_free_surface_drift: int, + logger, +) -> FloatConfiguration: + """Load float configuration at a given cycle number and possibly overwrite data with user parameters + + Parameters + ---------- + wmo: int + cyc: int + cfg_parking_depth: float, + cfg_cycle_duration: float, + cfg_profile_depth: float, + cfg_free_surface_drift: int, + logger + + Returns + ------- + :class:`virtualargofleet.FloatConfiguration` + + """ + try: + CFG = FloatConfiguration([wmo, cyc]) + except: + logger.error("Can't load this profile configuration, fall back on default values") + CFG = FloatConfiguration('default') + + if cfg_parking_depth is not None: + logger.debug("parking_depth=%i is overwritten with %i" % (CFG.mission['parking_depth'], + float(cfg_parking_depth))) + CFG.update('parking_depth', float(cfg_parking_depth)) + + if cfg_cycle_duration is not None: + logger.debug("cycle_duration=%i is overwritten with %i" % (CFG.mission['cycle_duration'], + float(cfg_cycle_duration))) + CFG.update('cycle_duration', float(cfg_cycle_duration)) + + if cfg_profile_depth is not None: + logger.debug("profile_depth=%i is overwritten with %i" % (CFG.mission['profile_depth'], + float(cfg_profile_depth))) + CFG.update('profile_depth', float(cfg_profile_depth)) + + CFG.params = ConfigParam(key='reco_free_surface_drift', + value=int(cfg_free_surface_drift), + unit='cycle', + description='First cycle with free surface drift', + dtype=int) + + return CFG + diff --git a/vfrecovery/core/predict.py b/vfrecovery/core/predict.py new file mode 100644 index 0000000..b604a05 --- /dev/null +++ b/vfrecovery/core/predict.py @@ -0,0 +1,178 @@ +import time +from argopy.utils import is_wmo, is_cyc, check_cyc, check_wmo +from pathlib import Path +from typing import Union +import os +import logging +import json +import pandas as pd + +from .simulation_handler import Simulation +from .utils import pp_obj, get_a_log_filename + +root_logger = logging.getLogger("vfrecovery_root_logger") +sim_logger = logging.getLogger("vfrecovery_simulation") + + +class log_this: + + def __init__(self, txt, log_level): + """Log text to simulation and possibly root logger(s)""" + getattr(root_logger, log_level.lower())(txt) + getattr(sim_logger, log_level.lower())(txt) + + @staticmethod + def info(txt) -> 'log_this': + return log_this(txt, 'INFO') + + @staticmethod + def debug(txt) -> 'log_this': + return log_this(txt, 'DEBUG') + + @staticmethod + def warning(txt) -> 'log_this': + return log_this(txt, 'WARNING') + + @staticmethod + def error(txt) -> 'log_this': + return log_this(txt, 'ERROR') + + +def predict_function( + wmo: int, + cyc: int, + velocity: str, + n_predictions: int, + output_path: Union[str, Path], + cfg_parking_depth: float, + cfg_cycle_duration: float, + cfg_profile_depth: float, + cfg_free_surface_drift: int, + swarm_size: int, + domain_min_size: float, + overwrite: bool, + lazy: bool, + figure: bool, + log_level: str, +) -> str: + """ + Execute VirtualFleet-Recovery predictor and save results as a JSON string + + Parameters + ---------- + wmo + cyc + velocity + n_predictions + output_path + cfg_parking_depth + cfg_cycle_duration + cfg_profile_depth + cfg_free_surface_drift + swarm_size + domain_min_size + overwrite + lazy + figure + log_level + + Returns + ------- + str: a JSON formatted str + + """ # noqa + if log_level == "QUIET": + root_logger.disabled = True + log_level = "CRITICAL" + root_logger.setLevel(level=getattr(logging, log_level.upper())) + + execution_start = time.time() + process_start = time.process_time() + # run_id = pd.to_datetime('now', utc=True).strftime('%Y%m%d%H%M%S') + + # Validate arguments: + assert is_wmo(wmo) + assert is_cyc(cyc) + wmo = check_wmo(wmo)[0] + cyc = check_cyc(cyc)[0] + + if velocity.upper() not in ['ARMOR3D', 'GLORYS']: + raise ValueError("Velocity field must be one in: ['ARMOR3D', 'GLORYS']") + else: + velocity = velocity.upper() + + # Build the list of cycle numbers to work with `cyc`: + # The `cyc` list follows this structure: + # [PREVIOUS_CYCLE_USED_AS_INITIAL_CONDITIONS, CYCLE_NUMBER_REQUESTED_BY_USER, ADDITIONAL_CYCLE_i, ADDITIONAL_CYCLE_i+1, ...] + # Prepend previous cycle number that will be used as initial conditions for the prediction of `cyc`: + cyc = [cyc - 1, cyc] + # Append additional `n_predictions` cycle numbers: + [cyc.append(cyc[1] + n + 1) for n in range(n_predictions)] + + if output_path is None: + output_path = Path(__file__).parents[2].joinpath("vfrecovery_simulations_data") + output_path = Path(output_path) + output_path.mkdir(parents=True, exist_ok=True) + + # Set-up simulation logger + templogfile = get_a_log_filename(output_path) + simlogfile = logging.FileHandler(templogfile, mode='a') + simlogfile.setFormatter(logging.Formatter("%(asctime)s | %(levelname)s | %(name)s:%(filename)s | %(message)s", + datefmt='%Y/%m/%d %I:%M:%S')) + sim_logger.handlers = [] + sim_logger.addHandler(simlogfile) + + # Redirect all warnings to log files + logging.captureWarnings(True) + + # + S = Simulation(wmo, cyc, + swarm_size=swarm_size, + velocity=velocity, + output_path=output_path, + overwrite=overwrite, + lazy=lazy, + logger=log_this, + figure=figure, + ) + S.setup(cfg_parking_depth=cfg_parking_depth, + cfg_cycle_duration=cfg_cycle_duration, + cfg_profile_depth=cfg_profile_depth, + cfg_free_surface_drift=cfg_free_surface_drift, + domain_min_size=domain_min_size, + ) + if not S.is_registered or overwrite: + S.execute() + S.predict() + S.postprocess() + S.finish(execution_start, process_start) # Save on disk in json file + else: + log_this.info("This simulation already exists, stop here and return existing results") + + # Move log file to the appropriate final destination: + templogfile.rename(get_a_log_filename(S.output_path)) + + # Load json results to return + with open(S.run_file, 'r') as f: + jsdata = json.load(f) + return json.dumps(jsdata, indent=4) + + + +# def predictor(args): +# """Prediction manager""" +# +# if args.save_figure: +# mplbackend = matplotlib.get_backend() +# matplotlib.use('Agg') + +# if args.save_figure: +# plt.close('all') +# # Restore Matplotlib backend +# matplotlib.use(mplbackend) +# +# if not args.save_sim: +# shutil.rmtree(output_path) +# +# return results_js +# diff --git a/vfrecovery/core/simulation_handler.py b/vfrecovery/core/simulation_handler.py new file mode 100644 index 0000000..443676e --- /dev/null +++ b/vfrecovery/core/simulation_handler.py @@ -0,0 +1,405 @@ +import time +import argopy.plot as argoplot +from virtualargofleet import Velocity, VirtualFleet +from pathlib import Path +import pandas as pd +import numpy as np +import os +from datetime import timedelta +import logging +import tempfile + + +from vfrecovery.json import MetaData, MetaDataSystem, MetaDataComputation +from vfrecovery.downloaders import get_velocity_field +from .utils import ArgoIndex2jsProfile, get_domain, pp_obj +from .floats_config import setup_floats_config +from .deployment_plan import setup_deployment_plan +from .trajfile_handler import Trajectories +from .analysis_handler import RunAnalyser +from .db import DB, Row2Path + + +root_logger = logging.getLogger("vfrecovery_root_logger") + + +class default_logger: + + def __init__(self, txt, log_level): + """Log text to simulation and possibly root logger(s)""" + getattr(root_logger, log_level.lower())(txt) + + @staticmethod + def info(txt) -> 'default_logger': + return default_logger(txt, 'INFO') + + @staticmethod + def debug(txt) -> 'default_logger': + return default_logger(txt, 'DEBUG') + + @staticmethod + def warning(txt) -> 'default_logger': + return default_logger(txt, 'WARNING') + + @staticmethod + def error(txt) -> 'default_logger': + return default_logger(txt, 'ERROR') + + +class Simulation_core: + def __init__(self, wmo, cyc, **kwargs): + self.run_file = None + + self.wmo = wmo + self.cyc = cyc + self.path_root = kwargs['output_path'] + self.logger = default_logger if 'logger' not in kwargs else kwargs['logger'] + + self.overwrite = bool(kwargs['overwrite']) if 'overwrite' in kwargs else False + self.lazy = bool(kwargs['lazy']) if 'lazy' in kwargs else True + self.figure = bool(kwargs['figure']) if 'figure' in kwargs else True + + self.logger.info("%s \\" % ("=" * 55)) + self.logger.info("STARTING SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (self.wmo, self.cyc[1])) + + # self.logger.info("n_predictions: %i" % n_predictions) + self.logger.info("Working with cycle numbers list: %s" % str(cyc)) + + # + url = argoplot.dashboard(wmo, url_only=True) + txt = "You can check this float dashboard while we prepare the prediction: %s" % url + self.logger.info(txt) + + # Create Simulation Meta-data class holder + self.MD = MetaData.from_dict({ + 'swarm_size': kwargs['swarm_size'], + 'velocity_field': kwargs['velocity'], + 'system': MetaDataSystem.auto_load(), + 'vfconfig': None, # will be filled later + 'computation': None, # will be filled later + }) + + +class Simulation_setup(Simulation_core): + + def _instance2rec(self): + """Convert this instance data to a dictionary to be used with the DB module""" + cyc = self.cyc[1] + n_predictions = len(self.cyc) - 1 - 1 # Remove initial conditions and cyc target, as passed by user + + data = {'wmo': self.wmo, + 'cyc': cyc, + 'n_predictions': n_predictions, + 'cfg': self.MD.vfconfig, + 'velocity': {'name': self.MD.velocity_field, + 'download': pd.to_datetime(self.ds_vel.attrs['access_date']), + 'domain_size': self.domain_min_size}, + 'swarm_size': self.MD.swarm_size, + 'path_root': self.path_root, + } + + return data + + @property + def is_registered(self): + """Check if this simulation has already been registered or not""" + return DB.from_dict(self._instance2rec()).checked + + @property + def output_path(self): + """Path to run output""" + p = self.path_root.joinpath(DB.from_dict(self._instance2rec()).path_obj.run) + p.mkdir(parents=True, exist_ok=True) + return p + + @property + def velocity_path(self): + """Path to velocity output""" + p = self.path_root.joinpath(DB.from_dict(self._instance2rec()).path_obj.velocity) + p.mkdir(parents=True, exist_ok=True) + return p + + @property + def temp_path(self): + """A temporary path""" + return tempfile.gettempdir() + + def _setup_load_observed_profiles(self): + """Load observed float profiles index""" + + self.logger.info("Loading float profiles index") + self.P_obs, self.df_obs = ArgoIndex2jsProfile(self.wmo, self.cyc, cache=False, cachedir=str(self.path_root)) + [self.logger.debug("Observed profiles list: %s" % pp_obj(p)) for p in self.P_obs] + + if len(self.P_obs) == 1: + self.logger.info('Real-time scenario: True position unknown !') + else: + self.logger.info('Evaluation scenario: Historical position known') + + def _setup_float_config(self, **kwargs): + """Load and setup float configuration""" + self.logger.info("Loading float configuration") + + # Load real float configuration at the previous cycle, to be used for the simulation as initial conditions. + # (the loaded config is possibly overwritten with user defined cfg_* parameters) + self.CFG = setup_floats_config(self.wmo, self.cyc[0], + kwargs['cfg_parking_depth'], + kwargs['cfg_cycle_duration'], + kwargs['cfg_profile_depth'], + kwargs['cfg_free_surface_drift'], + self.logger, + ) + self.logger.debug(pp_obj(self.CFG)) + self.MD.vfconfig = self.CFG # Register floats configuration to the simulation meta-data class + + def _setup_load_velocity_data(self, **kwargs): + # Define domain to load velocity for: + # In space: + self.domain_min_size = kwargs['domain_min_size'] + domain, domain_center = get_domain(self.P_obs, self.domain_min_size) + # and time: + cycle_period = int(np.round(self.CFG.mission['cycle_duration'] / 24)) # Get the float cycle period (in days) + self.n_days = (len(self.cyc)-1) * cycle_period + + self.logger.info("Velocity field should cover %i cycles of %i hours (%i days)" % (len(self.cyc)-1, + 24 * cycle_period, + self.n_days)) + self.logger.info("Retrieve info for %s velocity starting on %s" % ( + self.MD.velocity_field, self.P_obs[0].location.time)) + + self.ds_vel, velocity_file, new_file = get_velocity_field(domain, self.P_obs[0].location.time, + n_days=self.n_days, + output=self.temp_path, + dataset=self.MD.velocity_field, + logger=self.logger, + lazy=self.lazy, + ) + if new_file: + # We force overwriting results because we're using a new velocity field + self.logger.warning("Found a new velocity field, force overwriting results") + self.overwrite = True + + self.velocity_file = velocity_file + self.logger.debug(pp_obj(self.ds_vel)) + self.logger.info("%s loaded %s field from %s to %s" % ( + "Lazily" if self.lazy else "Hard", + self.MD.velocity_field, + pd.to_datetime(self.ds_vel['time'][0].values).strftime("%Y-%m-%dT%H:%M:%S"), + pd.to_datetime(self.ds_vel['time'][-1].values).strftime("%Y-%m-%dT%H:%M:%S")) + ) + + def setup(self, **kwargs): + """Fulfill all requirements for the simulation""" + + # Load data in memory: + self._setup_load_observed_profiles() + self._setup_float_config(**kwargs) + self._setup_load_velocity_data(**kwargs) + + # Possibly save setup files to proper final folders: + + # and save the final virtual float configuration on file: + self.CFG.to_json(self.output_path.joinpath("floats_configuration.json")) + + # move velocity file from temporary to final output path: + # self.logger.info("self.temp_path: %s" % self.temp_path) + # self.logger.info("self.velocity_file: %s" % self.velocity_file) + # self.logger.info("self.output_path: %s" % self.output_path) + + # + self.run_file = self.output_path.joinpath("results.json") + # self.logger.info("Simulation results will be registered under:\n%s" % self.run_file) + self.logger.info("Check if such a simulation has already been registered: %s" % self.is_registered) + self.logger.debug("Setup terminated") + + return self + + +class Simulation_execute(Simulation_setup): + + def _execute_get_velocity(self): + self.logger.info("Create a velocity object (this can take a while)") + self.VEL = Velocity(model='GLORYS12V1' if self.MD.velocity_field == 'GLORYS' else self.MD.velocity_field, + src=self.ds_vel, + logger=self.logger, + ) + + if self.figure: + self.logger.info("Plot velocity") + for it in [0, -1]: + _, _, fname = self.VEL.plot(it=it, + iz=0, + save=True, + workdir=self.velocity_path + ) + # self.logger.info(fname) + # self.logger.info(self.velocity_path.stem) + # fname.rename( + # str(fname).replace("velocity_%s" % self.VEL.name, + # Path(self.velocity_file).name.replace(".nc", "") + # ) + # ) + + def _execute_get_plan(self): + # VirtualFleet, get a deployment plan: + self.logger.info("Create a deployment plan") + df_plan = setup_deployment_plan(self.P_obs[0], swarm_size=self.MD.swarm_size) + self.logger.info( + "Set %i virtual floats to deploy (i.e. swarm size = %i)" % (df_plan.shape[0], df_plan.shape[0])) + + self.PLAN = {'lon': df_plan['longitude'], + 'lat': df_plan['latitude'], + 'time': np.array([np.datetime64(t) for t in df_plan['date'].dt.strftime('%Y-%m-%d %H:%M').array]), + } + + def execute(self): + """Execute a VirtualFleet simulation""" + + self._execute_get_velocity() + self._execute_get_plan() + + # Set up VirtualFleet: + self.logger.info("Create a VirtualFleet instance") + self.VFleet = VirtualFleet(plan=self.PLAN, + fieldset=self.VEL, + mission=self.CFG, + verbose_events=False) + self.logger.debug(pp_obj(self.VFleet)) + + # Execute the simulation: + self.logger.info("Starting simulation") + + # Remove traj file if exists: + # output_path = os.path.join(WORKDIR, 'trajectories_%s.zarr' % get_sim_suffix(args, CFG)) + # if os.path.exists(output_path): + # shutil.rmtree(output_path) + + # self.traj_file = os.path.join(self.output_path, 'trajectories_%s.zarr' % get_simulation_suffix(self.MD)) + self.traj_file = self.output_path.joinpath('trajectories.zarr') + if os.path.exists(self.traj_file) and not self.overwrite: + self.logger.warning("Using data from a previous similar run (no simulation executed)") + else: + self.VFleet.simulate(duration=timedelta(hours=self.n_days * 24 + 1), + step=timedelta(minutes=5), + record=timedelta(minutes=30), + output=True, + output_folder=self.output_path, + output_file='trajectories.zarr', + verbose_progress=True, + ) + self.logger.info("Simulation ended with success") + self.logger.info(pp_obj(self.VFleet)) + return self + + +class Simulation_predict(Simulation_execute): + + def _predict_read_trajectories(self): + + # Get simulated profiles index: + self.logger.info("Extract swarm profiles index") + + self.traj = Trajectories(self.traj_file) + self.traj.get_index().add_distances(origin=self.P_obs[0]) + self.logger.debug(pp_obj(self.traj)) + + if self.figure: + self.logger.info("Plot swarm initial and final states") + self.traj.plot_positions(domain_scale=2., + vel=self.VEL, + vel_depth=self.CFG.mission['parking_depth'], + save=True, + workdir=self.output_path, + fname='swarm_states', + ) + + def _predict_positions(self): + """Make predictions based on simulated profile density""" + self.logger.info("Predict float cycle position(s) from swarm simulation") + self.run = RunAnalyser(self.traj.index, self.df_obs) + self.run.fit_predict() + self.logger.debug(pp_obj(self.run)) + + if self.figure: + self.logger.info("Plot predictions") + self.run.plot_predictions( + vel=self.VEL, + vel_depth=self.CFG.mission['parking_depth'], + save=True, + workdir=self.output_path, + fname='predictions', + orient='portrait', + ) + + def predict(self): + """Make float profile predictions based on the swarm simulation""" + self._predict_read_trajectories() + self._predict_positions() + return self + + +class Simulation_postprocess(Simulation_predict): + + def _postprocess_metrics(self): + if self.run.has_ref: + self.logger.info("Computing prediction metrics for past cycles with observed ground truth") + self.run.add_metrics(self.VEL) + + def _postprocess_swarm_metrics(self): + self.logger.info("Computing swarm metrics") + Plist_updated = [] + for p in self.run.jsobj.predictions: + this_cyc = p.virtual_cycle_number + swarm_metrics = self.traj.analyse_pairwise_distances(virtual_cycle_number=this_cyc, + save_figure=False, + # save_figure=self.save_figure, + ) + p.metrics.trajectory_lengths = swarm_metrics.trajectory_lengths + p.metrics.pairwise_distances = swarm_metrics.pairwise_distances + Plist_updated.append(p) + self.run.jsobj.predictions = Plist_updated + + def postprocess(self): + self._postprocess_metrics() + self._postprocess_swarm_metrics() + return self + + +class Simulation(Simulation_postprocess): + """Base class to execute the simulation/prediction workflow + + >>> S = Simulation(wmo, cyc, swarm_size=swarm_size, velocity=velocity, output_path=Path('.')) + >>> S.setup() + >>> S.execute() + >>> S.predict() + >>> S.postprocess() + >>> S.to_json() + """ + def register(self): + """Save simulation to the registry""" + return DB.from_dict(self._instance2rec()).checkin() + + def finish(self, execution_start: float, process_start: float): + """Click timers and save results to finish""" + self.MD.computation = MetaDataComputation.from_dict({ + 'date': pd.to_datetime('now', utc=True), + 'wall_time': pd.Timedelta(time.time() - execution_start, 's'), + 'cpu_time': pd.Timedelta(time.process_time() - process_start, 's'), + 'description': None, + }) + self.logger.debug(pp_obj(self.MD.computation)) + + self.to_json(fp=self.run_file) + self.logger.info("Simulation results and analysis saved in:\n%s" % self.run_file) + + self.register() + self.logger.debug("Simulation recorded in registry") + + self.logger.info("END OF SIMULATION: WMO=%i / CYCLE_NUMBER=%i" % (self.wmo, self.cyc[1])) + self.logger.info("%s /" % ("=" * 55)) + return self + + def to_json(self, fp=None): + y = self.run.jsobj # :class:`Simulation` instance + y.meta_data = self.MD + return y.to_json(fp=fp) diff --git a/vfrecovery/core/trajfile_handler.py b/vfrecovery/core/trajfile_handler.py new file mode 100644 index 0000000..199e8d1 --- /dev/null +++ b/vfrecovery/core/trajfile_handler.py @@ -0,0 +1,547 @@ +import xarray as xr +import pandas as pd +import numpy as np +import matplotlib +from scipy.signal import find_peaks +from sklearn.metrics import pairwise_distances +import matplotlib.pyplot as plt +from virtualargofleet import VelocityField +from pathlib import Path +import logging + +from vfrecovery.utils.misc import get_cfg_str +from vfrecovery.plots.utils import map_add_features, save_figurefile +from vfrecovery.json import Profile, Location +from vfrecovery.json import Metrics, TrajectoryLengths, PairwiseDistances, PairwiseDistancesState + + +root_logger = logging.getLogger("vfrecovery_root_logger") + + +class default_logger: + + def __init__(self, txt, log_level): + """Log text to simulation and possibly root logger(s)""" + getattr(root_logger, log_level.lower())(txt) + + @staticmethod + def info(txt) -> 'default_logger': + return default_logger(txt, 'INFO') + + @staticmethod + def debug(txt) -> 'default_logger': + return default_logger(txt, 'DEBUG') + + @staticmethod + def warning(txt) -> 'default_logger': + return default_logger(txt, 'WARNING') + + @staticmethod + def error(txt) -> 'default_logger': + return default_logger(txt, 'ERROR') + + +class Trajectories: + """Trajectory file manager for VFrecovery + + Examples: + --------- + T = Trajectories(traj_zarr_file) + T.swarm_size + T.sim_cycles + df = T.to_index() + df = T.get_index().add_distances() + jsdata, fig, ax = T.analyse_pairwise_distances(cycle=1, save_figure=True) + """ + + def __init__(self, zfile, **kwargs): + self.zarr_file = zfile + self.obj = xr.open_zarr(zfile) + self._index = None + self.logger = default_logger if 'logger' not in kwargs else kwargs['logger'] + + @property + def swarm_size(self): + # len(self.obj['trajectory']) + return self.obj['trajectory'].shape[0] + + @property + def sim_cycles(self): + """Return list of cycles simulated""" + cycs = np.unique(self.obj['cycle_number']) + last_obs_phase = \ + self.obj.where(self.obj['cycle_number'] == cycs[-1])['cycle_phase'].isel(trajectory=0).isel(obs=-1).values[ + np.newaxis][0] + if last_obs_phase < 3: + cycs = cycs[0:-1] + return cycs + + def __repr__(self): + summary = [""] + summary.append("Swarm size: %i floats" % self.swarm_size) + start_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=0).values) + end_date = pd.to_datetime(self.obj['time'].isel(trajectory=0, obs=-1).values) + summary.append("Simulation length: %s, from %s to %s" % ( + pd.Timedelta(end_date - start_date, 'd'), start_date.strftime("%Y/%m/%d"), end_date.strftime("%Y/%m/%d"))) + return "\n".join(summary) + + # def to_index_par(self) -> pd.DataFrame: + # # Deployment loc: + # deploy_lon, deploy_lat = self.obj.isel(obs=0)['lon'].values, self.obj.isel(obs=0)['lat'].values + # + # def worker(ds, cyc, x0, y0): + # mask = np.logical_and((ds['cycle_number'] == cyc).compute(), + # (ds['cycle_phase'] >= 3).compute()) + # this_cyc = ds.where(mask, drop=True) + # if len(this_cyc['time']) > 0: + # data = { + # 'date': this_cyc.isel(obs=-1)['time'].values, + # 'latitude': this_cyc.isel(obs=-1)['lat'].values, + # 'longitude': this_cyc.isel(obs=-1)['lon'].values, + # 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, + # 'cyc': cyc, + # # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, + # 'deploy_lon': x0, + # 'deploy_lat': y0, + # } + # return pd.DataFrame(data) + # else: + # return None + # + # cycles = np.unique(self.obj['cycle_number']) + # rows = [] + # with concurrent.futures.ThreadPoolExecutor() as executor: + # future_to_url = { + # executor.submit( + # worker, + # self.obj, + # cyc, + # deploy_lon, + # deploy_lat + # ): cyc + # for cyc in cycles + # } + # futures = concurrent.futures.as_completed(future_to_url) + # for future in futures: + # data = None + # try: + # data = future.result() + # except Exception: + # raise + # finally: + # rows.append(data) + # + # rows = [r for r in rows if r is not None] + # df = pd.concat(rows).reset_index() + # df['wmo'] = df['wmo'].astype(int) + # df['cyc'] = df['cyc'].astype(int) + # # df['cycle_phase'] = df['cycle_phase'].astype(int) + # self._index = df + # + # return self._index + + def to_index(self) -> pd.DataFrame: + """Compute and return index (profile dataframe from trajectory dataset) + + Create a Profile index :class:`pandas.dataframe` with columns: [data, latitude ,longitude, wmo, cyc, deploy_lon, deploy_lat] + from a trajectory :class:`xarray.dataset`. + + There is one dataframe row for each dataset trajectory cycle. + + We use the last trajectory point of given cycle number (with cycle phase >= 3) to identify a profile location. + + If they are N trajectories simulating C cycles, there will be about a maximum of N*C rows in the dataframe. + + Returns + ------- + :class:`pandas.dataframe` + """ + if self._index is None: + + # Deployment loc: + deploy_lon, deploy_lat = self.obj.isel(obs=0)['lon'].values, self.obj.isel(obs=0)['lat'].values + + def worker(ds, cyc, x0, y0): + mask_end_of_cycle = np.logical_or((ds['cycle_phase'] == 3).compute(), (ds['cycle_phase'] == 4).compute()) + + mask = np.logical_and((ds['cycle_number'] == cyc).compute(), mask_end_of_cycle) + this_cyc = ds.where(mask, drop=True) + + if len(this_cyc['time']) > 0: + + # Check if we didn't lose some particles: + n = len(x0) - len(this_cyc.isel(obs=-1)['time'].values) + if n > 0: + raise ValueError("%i virtual floats did not make all required cycles. They probably reached " + "the edge of the velocity field domain. You should try to increase the domain " + "size of the simulation." % n) + else: + data = { + 'date': this_cyc.isel(obs=-1)['time'].values, + 'latitude': this_cyc.isel(obs=-1)['lat'].values, + 'longitude': this_cyc.isel(obs=-1)['lon'].values, + 'wmo': 9000000 + this_cyc.isel(obs=-1)['trajectory'].values, + 'cyc': this_cyc.isel(obs=-1)['cycle_number'].values, + # 'cycle_phase': this_cyc.isel(obs=-1)['cycle_phase'].values, + 'deploy_lon': x0, + 'deploy_lat': y0, + } + return pd.DataFrame(data) + else: + return None + + cycles = np.unique(self.obj['cycle_number']) + rows = [] + for cyc in cycles: + if ~ np.isnan(cyc): + df = worker(self.obj, cyc, deploy_lon, deploy_lat) + rows.append(df) + rows = [r for r in rows if r is not None] + if len(rows) > 0: + df = pd.concat(rows).reset_index() + df['wmo'] = df['wmo'].astype(int) + df['cyc'] = df['cyc'].astype(int) + # df['cycle_phase'] = df['cycle_phase'].astype(int) + self._index = df + else: + raise ValueError("") + + return self._index + + def get_index(self): + """Compute index and return self""" + self.to_index() + return self + + @property + def index(self): + self.get_index() + return self._index + + def add_distances(self, origin: Location = None) -> pd.DataFrame: + """Compute profiles distance to some origin + + Parameters + ---------- + origin: :class:`Location` + + Returns + ------- + :class:`pandas.dataframe` + """ + + # Compute distance between the predicted profile and the initial profile location from the deployment plan + # We assume that virtual floats are sequentially taken from the deployment plan + # Since distances are very short, we compute a simple rectangular distance + + # Observed cycles: + # obs_cyc = np.unique(this_profile['cyc']) + + # Simulated cycles: + # sim_cyc = np.unique(this_df['cyc']) + + df = self.index + + x2, y2 = origin.longitude, origin.latitude # real float initial position + df['distance'] = np.nan + df['rel_lon'] = np.nan + df['rel_lat'] = np.nan + df['distance_origin'] = np.nan + + def worker(row): + # Simulation profile coordinates: + x0, y0 = row['deploy_lon'], row['deploy_lat'] # virtual float initial position + x1, y1 = row['longitude'], row['latitude'] # virtual float position + + # Distance between each pair of cycles of virtual floats: + dist = np.sqrt((y1 - y0) ** 2 + (x1 - x0) ** 2) + row['distance'] = dist + + # Shift between each pair of cycles: + dx, dy = x1 - x0, y1 - y0 + # Get a relative displacement from real float initial position: + row['rel_lon'] = x2 + dx + row['rel_lat'] = y2 + dy + + # Distance between the predicted profile and the observed initial profile + dist = np.sqrt((y2 - y0) ** 2 + (x2 - x0) ** 2) + row['distance_origin'] = dist + + return row + + df = df.apply(worker, axis=1) + self._index = df + + return self.index + + def analyse_pairwise_distances(self, + virtual_cycle_number: int = 1, + save_figure: bool = False, + workdir: str = '.', + sim_suffix=None, + mplbackend: str = 'Agg', + this_cfg=None, + this_args: dict = None, + ): + + def pairs_pdf(longitude, latitude): + Xi = np.array((longitude, latitude)).T + di = pairwise_distances(Xi, n_jobs=-1) + di = np.triu(di) + di[di == 0] = np.nan + + xi = di.flatten() + xi = xi[~np.isnan(xi)] + xi = xi[:, np.newaxis] + + histi, bin_edgesi = np.histogram(xi, bins=100, density=1) + dhi = np.diff(bin_edgesi[0:2]) + peaksi, _ = find_peaks(histi / np.max(histi), height=.4, distance=20) + + return histi, bin_edgesi, peaksi, dhi, di + + # def get_hist_and_peaks(this_d): + # x = this_d.flatten() + # x = x[~np.isnan(x)] + # x = x[:, np.newaxis] + # hist, bin_edges = np.histogram(x, bins=100, density=1) + # # dh = np.diff(bin_edges[0:2]) + # peaks, _ = find_peaks(hist / np.max(hist), height=.4, distance=20) + # return {'pdf': hist, 'bins': bin_edges[0:-1], 'Npeaks': len(peaks)} + + # Squeeze traj file to virtual_cycle_number (sim can have more than 1 cycle): + ds = self.obj.where((self.obj['cycle_number'] == virtual_cycle_number).compute(), drop=True) + ds = ds.compute() + + # Compute swarm trajectories relative to the single/only real float initial position: + # (Make all swarm trajectories to start at the same first position) + lon0, lat0 = self.obj.isel(obs=0)['lon'].values[0], self.obj.isel(obs=0)['lat'].values[ + 0] # deployment locations + lon, lat = ds['lon'].values, ds['lat'].values + ds['lonc'] = xr.DataArray(lon - np.broadcast_to(lon[:, 0][:, np.newaxis], lon.shape) + lon0, + dims=['trajectory', 'obs']) + ds['latc'] = xr.DataArray(lat - np.broadcast_to(lat[:, 0][:, np.newaxis], lat.shape) + lat0, + dims=['trajectory', 'obs']) + + # Compute trajectory lengths: + ds['length'] = np.sqrt(ds.diff(dim='obs')['lon'] ** 2 + ds.diff(dim='obs')['lat'] ** 2).sum(dim='obs') + ds['lengthc'] = np.sqrt(ds.diff(dim='obs')['lonc'] ** 2 + ds.diff(dim='obs')['latc'] ** 2).sum(dim='obs') + + # Compute initial points pairwise distances, PDF and nb of peaks: + X = ds.isel(obs=0) + X = X.isel(trajectory=~np.isnan(X['lon'])) + hist0, bin_edges0, peaks0, dh0, d0 = pairs_pdf(X['lon'].values, X['lat'].values) + + # Compute final points pairwise distances, PDF and nb of peaks: + X = ds.isel(obs=-1) + X = X.isel(trajectory=~np.isnan(X['lon'])) + dsf = X + hist, bin_edges, peaks, dh, d = pairs_pdf(X['lon'].values, X['lat'].values) + + # Compute final points pairwise distances (relative traj), PDF and nb of peaks: + X1 = ds.isel(obs=-1) + X1 = X1.isel(trajectory=~np.isnan(X1['lonc'])) + dsfc = X1 + hist1, bin_edges1, peaks1, dh1, d1 = pairs_pdf(X1['lonc'].values, X1['latc'].values) + + # Compute the overlapping between the initial and relative state PDFs: + bin_unif = np.arange(0, np.max([bin_edges0, bin_edges1]), np.min([dh0, dh1])) + dh_unif = np.diff(bin_unif[0:2]) + hist0_unif = np.interp(bin_unif, bin_edges0[0:-1], hist0) + hist_unif = np.interp(bin_unif, bin_edges[0:-1], hist) + hist1_unif = np.interp(bin_unif, bin_edges1[0:-1], hist1) + + # Area under hist1 AND hist0: + # overlapping = np.sum(hist1_unif[hist0_unif >= hist1_unif]*dh_unif) + overlapping = np.sum(hist_unif[hist0_unif >= hist_unif] * dh_unif) + + # Ratio of the max PDF ranges: + # staggering = np.max(bin_edges1)/np.max(bin_edges0) + staggering = np.max(bin_edges) / np.max(bin_edges0) + + if np.isinf(overlapping / len(peaks)): + raise ValueError("Can't compute the prediction score, infinity !") + + # Store metrics as VFRschema instance + PD = PairwiseDistances.from_dict({ + 'description': None, + 'initial_state': PairwiseDistancesState.from_dict({ + 'median': np.nanmedian(d0), 'std': np.nanstd(d0), 'nPDFpeaks': len(peaks0), 'description': None, + }), + 'final_state': PairwiseDistancesState.from_dict({ + 'median': np.nanmedian(d), 'std': np.nanstd(d), 'nPDFpeaks': len(peaks), 'description': None, + }), + 'relative_state': PairwiseDistancesState.from_dict({ + 'median': np.nanmedian(d1), 'std': np.nanstd(d1), 'nPDFpeaks': len(peaks1), 'description': None, + }), + 'overlapping': overlapping, + 'staggering': staggering, + 'score': overlapping / len(peaks), + }) + PD.std_ratio = PD.final_state.std / PD.initial_state.std + + M = Metrics.from_dict({ + "description": None, + "trajectory_lengths": TrajectoryLengths.from_dict({ + "median": np.nanmedian(ds['length'].values), + "std": np.nanstd(ds['length'].values), + "description": None, + }), + "pairwise_distances": PD, + }) + + # Figure: + if save_figure: + initial_mplbackend = matplotlib.get_backend() + matplotlib.use(mplbackend) + + fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(18, 10), dpi=90) + ax, ix = ax.flatten(), -1 + cmap = plt.cm.coolwarm + + ix += 1 + dd = dsf['length'].values + ax[ix].plot(X0[:, 0], X0[:, 1], '.', markersize=3, color='grey', alpha=0.5, markeredgecolor=None, zorder=0) + ax[ix].scatter(X[:, 0], X[:, 1], c=dd, zorder=10, s=3, cmap=cmap) + ax[ix].grid() + this_traj = int(dsf.isel(trajectory=np.argmax(dd))['trajectory'].values[np.newaxis][0]) + ax[ix].plot(ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lon'], + ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lat'], 'r', + zorder=13, label='Longest traj.') + this_traj = int(dsf.isel(trajectory=np.argmin(dd))['trajectory'].values[np.newaxis][0]) + ax[ix].plot(ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lon'], + ds.where(ds['trajectory'] == this_traj, drop=True).isel(trajectory=0)['lat'], 'b', + zorder=13, label='Shortest traj.') + ax[ix].legend() + ax[ix].set_title('Trajectory lengths') + + ix += 1 + ax[ix].plot(bin_edges0[0:-1], hist0, label='Initial (%i peak)' % len(peaks0), color='gray') + ax[ix].plot(bin_edges[0:-1], hist, label='Final (%i peak)' % len(peaks), color='lightblue') + ax[ix].plot(bin_edges[peaks], hist[peaks], "x", label='Peaks') + ax[ix].legend() + ax[ix].grid() + ax[ix].set_xlabel('Pairwise distance [degree]') + line1 = "Staggering: %0.4f" % staggering + line2 = "Overlapping: %0.4f" % overlapping + line3 = "Score: %0.4f" % (overlapping / len(peaks)) + ax[ix].set_title("Pairwise distances PDF: [%s / %s / %s]" % (line1, line2, line3)) + + if this_args is not None: + line0 = "VirtualFleet recovery swarm simulation for WMO %i, starting from cycle %i, predicting cycle %i\n%s" % \ + (this_args.wmo, this_args.cyc[0] - 1, this_args.cyc[0], get_cfg_str(this_cfg)) + line1 = "Simulation made with %s and %i virtual floats" % (this_args.velocity, this_args.nfloats) + else: + line0 = "VirtualFleet recovery swarm simulation for cycle %i" % virtual_cycle_number + line1 = "Simulation made with %i virtual floats" % (self.swarm_size) + + fig.suptitle("%s\n%s" % (line0, line1), fontsize=15) + plt.tight_layout() + + if sim_suffix is not None: + filename = 'vfrecov_metrics01_%s_cyc%i' % (sim_suffix, virtual_cycle_number) + else: + filename = 'vfrecov_metrics01_cyc%i' % (virtual_cycle_number) + save_figurefile(fig, filename, workdir) + + # Rewind mpl backend to initial position: + matplotlib.use(initial_mplbackend) + + # Exit + if save_figure: + return M, fig, ax + else: + return M + + def HBOX(self, s: float = 1.): + """Swarm bounding box + + Parameters + ---------- + s: float + Set how much to extend maps outward the deployment 'box' + + Returns + ------- + list + """ + df_plan = self.index.iloc[0] + + box = [np.min([self.index['deploy_lon'].min(), self.index['longitude'].min(), self.index['rel_lon'].min()]), + np.max([self.index['deploy_lon'].max(), self.index['longitude'].max(), self.index['rel_lon'].max()]), + np.min([self.index['deploy_lat'].min(), self.index['latitude'].min(), self.index['rel_lat'].min()]), + np.max([self.index['deploy_lat'].max(), self.index['latitude'].max(), self.index['rel_lat'].max()])] + rx, ry = df_plan['longitude'].max() - df_plan['longitude'].min(), df_plan['latitude'].max() - df_plan[ + 'latitude'].min() + r = np.min([rx, ry]) + ebox = [box[0] - s * r, box[1] + s * r, box[2] - s * r, box[3] + s * r] + return ebox + + def plot_positions(self, + domain_scale: float = 1, + vel: VelocityField = None, + vel_depth: float = 0., + save: bool = True, + workdir: Path = Path('.'), + fname: str = 'swarm_positions', + mplbackend: str = 'Agg', + ): + """ + + >>> T = Trajectories(traj_file) + >>> T.plot_positions(vel_depth=cfg.mission['parking_depth']) + """ + import cartopy.crs as ccrs + + initial_mplbackend = matplotlib.get_backend() + matplotlib.use(mplbackend) + + ebox = self.HBOX(s=domain_scale) + + fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(25, 7), dpi=120, + subplot_kw={'projection': ccrs.PlateCarree()}, + sharex=True, sharey=True) + ax = ax.flatten() + + for ix in [0, 1, 2]: + ax[ix].set_extent(ebox) + ax[ix] = map_add_features(ax[ix]) + + if vel is not None: + vel.field.isel(time=0 if ix == 0 else -1).interp(depth=vel_depth).plot.quiver(x="longitude", + y="latitude", + u=vel.var['U'], + v=vel.var['V'], + ax=ax[ix], + color='grey', + alpha=1 if ix == 0 else 0.5, + add_guide=False) + + ax[ix].plot(self.index['deploy_lon'], self.index['deploy_lat'], '.', + markersize=3, color='grey', alpha=0.1, markeredgecolor=None, zorder=0) + if ix == 0: + title = 'Initial Velocity field at %0.2fm and deployment plan' % vel_depth + elif ix == 1: + x, y, c = self.index['longitude'], self.index['latitude'], self.index['cyc'] + title = 'Final float positions' + # sc = ax[ix].plot(x, y, '.', markersize=3, color='cyan', alpha=0.9, markeredgecolor=None) + sc = ax[ix].scatter(x, y, c=c, s=3, alpha=0.9, edgecolors=None) + elif ix == 2: + x, y, c = self.index['rel_lon'], self.index['rel_lat'], self.index['cyc'] + title = 'Final floats position relative to last float position' + # sc = ax[ix].plot(x, y, '.', markersize=3, color='cyan', alpha=0.9, markeredgecolor=None) + sc = ax[ix].scatter(x, y, c=c, s=3, alpha=0.9, edgecolors=None) + + # ax[ix] = map_add_profiles(ax[ix], this_profile) + ax[ix].set_title(title) + + # fig.suptitle("VirtualFleet recovery prediction for WMO %i: starting from cycle %i, predicting cycle %s\n%s" % + # (wmo, cyc[0], cyc[1:], get_cfg_str(cfg)), fontsize=15) + plt.tight_layout() + if save: + save_figurefile(fig, fname, workdir) + + # Rewind mpl backend to initial position: + matplotlib.use(initial_mplbackend) + + return fig, ax diff --git a/vfrecovery/core/utils.py b/vfrecovery/core/utils.py new file mode 100644 index 0000000..8760ea4 --- /dev/null +++ b/vfrecovery/core/utils.py @@ -0,0 +1,110 @@ +import pandas as pd +import numpy as np +from typing import List +from argopy import ArgoIndex +import argopy.plot as argoplot +from argopy.errors import DataNotFound +import hashlib +import base64 + + +from vfrecovery.json import Profile, MetaData + +pp_obj = lambda x: "\n%s" % "\n".join(["\t%s" % line for line in x.__repr__().split("\n")]) + + +def ArgoIndex2df_obs(a_wmo, a_cyc, cache:bool=False, cachedir:str='.') -> pd.DataFrame: + """Retrieve WMO/CYC Argo index entries as :class:`pd.DataFrame` + + Parameters + ---------- + wmo: int + cyc: Union(int, List) + + Returns + ------- + :class:`pd.DataFrame` + """ + host = "https://data-argo.ifremer.fr" + # host = "/home/ref-argo/gdac" if os.uname()[0] == 'Darwin' else "https://data-argo.ifremer.fr" + # host = "/home/ref-argo/gdac" if not os.uname()[0] == 'Darwin' else "~/data/ARGO" + idx = ArgoIndex(host=host, cache=cache, cachedir=cachedir).search_wmo_cyc(a_wmo, a_cyc) + if idx.N_MATCH == 0: + raise DataNotFound("This float has no cycle %i usable as initial conditions for a simulation of %i" % (a_cyc[0], a_cyc[1])) + else: + df = idx.to_dataframe() + df = df.sort_values(by='date') + return df + + +def df_obs2jsProfile(df_obs) -> List[Profile]: + Plist = Profile.from_ArgoIndex(df_obs) + for P in Plist: + P.description = "Observed Argo profile" + P.location.description = None + return Plist + + +def ArgoIndex2jsProfile(a_wmo, a_cyc, cache:bool=False, cachedir:str='.') -> List[Profile]: + """Retrieve WMO/CYC Argo index entries as a list of :class:`vfrecovery.json.Profile` + + Parameters + ---------- + wmo: int + cyc: Union(int, List) + + Returns + ------- + :class:`vfrecovery.json.Profile` + """ + df_obs = ArgoIndex2df_obs(a_wmo, a_cyc, cache=cache, cachedir=cachedir) + return df_obs2jsProfile(df_obs), df_obs + + +def get_simulation_suffix(md: MetaData) -> str: + """Compose a simulation unique ID for output files""" + # suf = '%s_%i' % (this_args.velocity, this_args.nfloats) + suf = 'VEL%s_SWS%i_CYT%i_PKD%i_PFD%i_FSD%i' % (md.velocity_field, + md.swarm_size, + int(md.vfconfig.mission['cycle_duration']), + int(md.vfconfig.mission['parking_depth']), + int(md.vfconfig.mission['profile_depth']), + int(md.vfconfig.mission['reco_free_surface_drift'])) + return suf + + +def get_domain(Plist, size): + # Get mean position of the observed profiles: + c = [np.mean([p.location.longitude for p in Plist]), np.mean([p.location.latitude for p in Plist])] + # Set the domain: + domain = [c[0] - size / 2, c[0] + size / 2, + c[1] - size / 2, c[1] + size / 2] + domain = [np.round(d, 3) for d in domain] + return domain, c + + +def make_hash_sha256(o): + hasher = hashlib.sha256() + hasher.update(repr(make_hashable(o)).encode()) + return base64.b64encode(hasher.digest()).decode() + + +def make_hashable(o): + if isinstance(o, (tuple, list)): + return tuple((make_hashable(e) for e in o)) + + if isinstance(o, dict): + return tuple(sorted((k, make_hashable(v)) for k, v in o.items())) + + if isinstance(o, (set, frozenset)): + return tuple(sorted(make_hashable(e) for e in o)) + + return o + + +def get_a_log_filename(op, name='simulation_'): + fname = lambda i: "%s%0.3d.log" % (name, i) + i = 1 + while op.joinpath(fname(i)).exists(): + i += 1 + return op.joinpath(fname(i)) \ No newline at end of file diff --git a/vfrecovery/downloaders/__init__.py b/vfrecovery/downloaders/__init__.py new file mode 100644 index 0000000..1afd68c --- /dev/null +++ b/vfrecovery/downloaders/__init__.py @@ -0,0 +1,3 @@ +from .armor3d import Armor3d +from .glorys import Glorys +from .core import get_velocity_field diff --git a/vfrecovery/downloaders/armor3d.py b/vfrecovery/downloaders/armor3d.py new file mode 100644 index 0000000..a238d2e --- /dev/null +++ b/vfrecovery/downloaders/armor3d.py @@ -0,0 +1,116 @@ +import pandas as pd +import copernicusmarine + + +class Armor3d: + """Global Ocean 1/4° Multi Observation Product ARMOR3D + + Product description: + https://data.marine.copernicus.eu/product/MULTIOBS_GLO_PHY_TSUV_3D_MYNRT_015_012 + + If start_date + n_days <= 2022-12-28: + Delivers the multi-year reprocessed (REP) weekly data + + otherwise: + Delivers the near-real-time (NRT) weekly data + + Examples + -------- + >>> Armor3d([-25, -13, 6.5, 13], pd.to_datetime('20091130', utc=True)).to_xarray() + >>> Armor3d([-25, -13, 6.5, 13], pd.to_datetime('20231121', utc=True), n_days=10).to_xarray() + + """ + + def __init__(self, box, start_date, n_days=1, max_depth=2500): + """ + Parameters + ---------- + box: list(float) + Define domain to load: [lon_min, lon_max, lat_min, lat_max] + start_date: :class:`pandas.Timestamp` + Starting date of the time series to load. Since ARMOR3D is weekly, the effective starting + date will be the first weekly period including the user-defined ``start_date`` + n_days: int (default=1) + Number of days to load data for. + max_depth: float (default=2500) + Maximum depth levels to load data for. + """ + self.box = box + self.start_date = start_date + self.n_days = n_days + self.max_depth = max_depth + + dt = pd.Timedelta(n_days, 'D') if n_days > 1 else pd.Timedelta(0, 'D') + if start_date + dt <= pd.to_datetime('2022-12-28', utc=True): + self._loader = self._get_rep + self.dataset_id = "dataset-armor-3d-rep-weekly" + self.time_axis = pd.Series(pd.date_range('19930106', '20221228', freq='7D').tz_localize("UTC")) + else: + self._loader = self._get_nrt + self.dataset_id = "dataset-armor-3d-nrt-weekly" + self.time_axis = pd.Series( + pd.date_range('20190102', pd.to_datetime('now', utc=True).strftime("%Y%m%d"), freq='7D').tz_localize( + "UTC")[0:-1]) + + if start_date < self.time_axis.iloc[0]: + raise ValueError('Date out of bounds') + elif start_date + dt > self.time_axis.iloc[-1]: + raise ValueError('Date out of bounds, %s > %s' % ( + start_date + dt, self.time_axis.iloc[-1])) + + def _get_this(self, dataset_id): + start_date = self.time_axis[self.time_axis <= self.start_date].iloc[-1] + if self.n_days == 1: + end_date = start_date + else: + end_date = \ + self.time_axis[self.time_axis <= self.start_date + (self.n_days + 7) * pd.Timedelta(1, 'D')].iloc[-1] + + ds = copernicusmarine.open_dataset( + dataset_id=dataset_id, + minimum_longitude=self.box[0], + maximum_longitude=self.box[1], + minimum_latitude=self.box[2], + maximum_latitude=self.box[3], + maximum_depth=self.max_depth, + start_datetime=start_date.strftime("%Y-%m-%dT%H:%M:%S"), + end_datetime=end_date.strftime("%Y-%m-%dT%H:%M:%S"), + variables=['ugo', 'vgo'] + ) + return ds + + def _get_rep(self): + """multi-year reprocessed (REP) weekly data + + Returns + ------- + :class:xarray.dataset + """ + return self._get_this(self.dataset_id) + + def _get_nrt(self): + """near-real-time (NRT) weekly data + + Returns + ------- + :class:xarray.dataset + """ + return self._get_this(self.dataset_id) + + def to_xarray(self): + """Load and return data as a :class:`xarray.dataset` + + Returns + ------- + :class:xarray.dataset + """ + return self._loader() + + def __repr__(self): + summary = [""] + summary.append("dataset_id: %s" % self.dataset_id) + summary.append("First day: %s" % self.start_date) + summary.append("N days: %s" % self.n_days) + summary.append("Domain: %s" % self.box) + summary.append("Max depth (m): %s" % self.max_depth) + return "\n".join(summary) diff --git a/vfrecovery/downloaders/core.py b/vfrecovery/downloaders/core.py new file mode 100644 index 0000000..331912b --- /dev/null +++ b/vfrecovery/downloaders/core.py @@ -0,0 +1,71 @@ +import pandas as pd +import os +import xarray as xr + +from . import Glorys, Armor3d +from vfrecovery.core.utils import pp_obj + + +def get_velocity_field(a_box, + a_date, + n_days=1, + output='.', + dataset='ARMOR3D', + max_depth=6000, + logger=None, + lazy=True, + ) -> tuple: + """Return the velocity field as an :class:`xr.Dataset`, force download/save if not lazy + + Parameters + ---------- + a_box + a_date + n_days + output + dataset + max_depth + logger + lazy + + Retuns + ------ + tuple + """ + + access_date = pd.to_datetime('now', utc='now').strftime("%Y%m%d") + + def get_velocity_filename(dataset, n_days): + fname = os.path.join(output, 'velocity_%s_%idays_%s.nc' % (dataset, n_days, access_date)) + return fname + + velocity_file = get_velocity_filename(dataset, n_days) + + if not os.path.exists(velocity_file): + new = True + # Define Data loader: + loader = Armor3d if dataset == 'ARMOR3D' else Glorys + + # Make an instance + # (we add a 1-day security delay at the beginning to make sure that we have velocity at the deployment time) + loader = loader(a_box, a_date - pd.Timedelta(1, 'D'), n_days=n_days+1, logger=logger, max_depth=max_depth) + + # Load data from the Copernicus Marine Data store: + ds = loader.to_xarray() # Lazy by default + if logger is not None: + logger.debug(pp_obj(loader)) + + # Save on file for later re-used: + # (this can take a while and is often longer than the lazy mode !) + if not lazy: + if logger is not None: + logger.debug("Saving velocity on file for later re-used") + ds.to_netcdf(velocity_file) + + else: + new = False + ds = xr.open_dataset(velocity_file) + + ds.attrs['access_date'] = access_date + + return ds, velocity_file, new diff --git a/vfrecovery/downloaders/glorys.py b/vfrecovery/downloaders/glorys.py new file mode 100644 index 0000000..d5958f1 --- /dev/null +++ b/vfrecovery/downloaders/glorys.py @@ -0,0 +1,186 @@ +import os +import numpy as np +import glob +import pandas as pd +import xarray as xr +import copernicusmarine +import logging + + +logger = logging.getLogger("vfrecovery.downloaders") + +class default_logger: + + def __init__(self, txt, log_level): + """Log text""" + getattr(logger, log_level.lower())(txt) + + @staticmethod + def info(txt) -> 'default_logger': + return default_logger(txt, 'INFO') + + @staticmethod + def debug(txt) -> 'default_logger': + return default_logger(txt, 'DEBUG') + + @staticmethod + def warning(txt) -> 'default_logger': + return default_logger(txt, 'WARNING') + + @staticmethod + def error(txt) -> 'default_logger': + return default_logger(txt, 'ERROR') + + +def get_glorys_forecast_from_datarmor(a_box, a_start_date, n_days=1): + """Load Datarmor Global Ocean 1/12° Physics Analysis and Forecast updated Daily + + Fields: daily, from 2020-11-25T12:00 to 'now' + 5 days + Src: /home/ref-ocean-model-public/multiparameter/physic/global/cmems/global-analysis-forecast-phy-001-024 + Info: https://resources.marine.copernicus.eu/product-detail/GLOBAL_ANALYSISFORECAST_PHY_001_024/INFORMATION + + Parameters + ---------- + a_box + a_start_date + n_days + """ + def get_forecast_files(a_date, n_days=1): + file_list = [] + for n in range(0, n_days): + t = a_date + pd.Timedelta(n, 'D') + p = os.path.join(src, "%i" % t.year, "%0.3d" % t.day_of_year) + # print(p, os.path.exists(p)) + if os.path.exists(p): + file_list.append(sorted(glob.glob(os.path.join(p, "*.nc")))[0]) + return file_list + + def preprocess(this_ds): + idpt = np.argwhere(this_ds['depth'].values > 2000)[0][0] + ilon = np.argwhere(this_ds['longitude'].values >= a_box[0])[0][0], \ + np.argwhere(this_ds['longitude'].values >= a_box[1])[0][0] + ilat = np.argwhere(this_ds['latitude'].values >= a_box[2])[0][0], \ + np.argwhere(this_ds['latitude'].values >= a_box[3])[0][0] + this_ds = this_ds.isel({'depth': range(0, idpt), + 'longitude': range(ilon[0], ilon[1]), + 'latitude': range(ilat[0], ilat[1])}) + return this_ds + + root = "/home/ref-ocean-model-public" if not os.uname()[0] == 'Darwin' else "/Volumes/MODEL-PUBLIC/" + src = os.path.join(root, "multiparameter/physic/global/cmems/global-analysis-forecast-phy-001-024") + # puts("\t%s" % src, color=COLORS.green) + flist = get_forecast_files(a_start_date, n_days=n_days) + if len(flist) == 0: + raise ValueError("This float cycle is too old for this velocity field.") + glorys = xr.open_mfdataset(flist, preprocess=preprocess, combine='nested', concat_dim='time', parallel=True) + # + return glorys + + +class Glorys: + """Global Ocean 1/12° Physics Re-Analysis or Forecast + + If start_date + n_days <= 2021-01-09: + Delivers the multi-year reprocessed (REP) daily data + https://resources.marine.copernicus.eu/product-detail/GLOBAL_MULTIYEAR_PHY_001_030 + + otherwise: + Delivers the near-real-time (NRT) Analysis and Forecast daily data + https://resources.marine.copernicus.eu/product-detail/GLOBAL_ANALYSISFORECAST_PHY_001_024 + + Examples + -------- + >>> Glorys([-25, -13, 6.5, 13], pd.to_datetime('20091130', utc=True)).to_xarray() + >>> Glorys([-25, -13, 6.5, 13], pd.to_datetime('20231121', utc=True), n_days=10).to_xarray() + + """ + + def __init__(self, box, start_date, n_days=1, max_depth=2500, **kwargs): + """ + Parameters + ---------- + box: list(float) + Define domain to load: [lon_min, lon_max, lat_min, lat_max] + start_date: :class:`pandas.Timestamp` + Starting date of the time series to load. + n_days: int (default=1) + Number of days to load data for. + max_depth: float (default=2500) + Maximum depth levels to load data for. + """ + self.box = box + self.start_date = start_date + self.n_days = n_days + self.max_depth = max_depth + + dt = pd.Timedelta(n_days, 'D') if n_days > 1 else pd.Timedelta(0, 'D') + if start_date + dt <= pd.to_datetime('2021-01-09', utc=True): + self._loader = self._get_reanalysis + self.dataset_id = "cmems_mod_glo_phy_my_0.083deg_P1D-m" + else: + self._loader = self._get_forecast + self.dataset_id = "cmems_mod_glo_phy-cur_anfc_0.083deg_P1D-m" + + self.logger = kwargs['logger'] if 'logger' in kwargs else default_logger + self.overwrite_metadata_cache = kwargs['overwrite_metadata_cache'] if 'overwrite_metadata_cache' in kwargs else False + self.disable_progress_bar = kwargs['disable_progress_bar'] if 'disable_progress_bar' in kwargs else False + + def _get_this(self, dataset_id, dates): + ds = copernicusmarine.open_dataset( + dataset_id=dataset_id, + minimum_longitude=self.box[0], + maximum_longitude=self.box[1], + minimum_latitude=self.box[2], + maximum_latitude=self.box[3], + maximum_depth=self.max_depth, + start_datetime=dates[0].strftime("%Y-%m-%dT%H:%M:%S"), + end_datetime=dates[1].strftime("%Y-%m-%dT%H:%M:%S"), + variables=['uo', 'vo'], + disable_progress_bar=self.disable_progress_bar, + overwrite_metadata_cache=self.overwrite_metadata_cache, + ) + return ds + + def _get_forecast(self): + """ + Returns + ------- + :class:`xarray.dataset` + """ + start_date = self.start_date + if self.n_days == 1: + end_date = start_date + else: + end_date = start_date + pd.Timedelta(self.n_days + 1, 'D') + return self._get_this(self.dataset_id, [start_date, end_date]) + + def _get_reanalysis(self): + """ + Returns + ------- + :class:`xarray.dataset` + """ + start_date = self.start_date + if self.n_days == 1: + end_date = start_date + else: + end_date = self.start_date + pd.Timedelta(self.n_days + 1, 'D') + return self._get_this(self.dataset_id, [start_date, end_date]) + + def to_xarray(self): + """ Load and return data as a :class:`xarray.dataset` + Returns + ------- + :class:`xarray.dataset` + """ + return self._loader() + + def __repr__(self): + summary = [""] + summary.append("dataset_id: %s" % self.dataset_id) + summary.append("Starting date: %s" % self.start_date) + summary.append("N days: %s" % self.n_days) + summary.append("Domain: %s" % self.box) + summary.append("Max depth (m): %s" % self.max_depth) + return "\n".join(summary) + diff --git a/vfrecovery/json_functions/VFRschema.py b/vfrecovery/json/VFRschema.py similarity index 75% rename from vfrecovery/json_functions/VFRschema.py rename to vfrecovery/json/VFRschema.py index c0e53f0..c60e373 100644 --- a/vfrecovery/json_functions/VFRschema.py +++ b/vfrecovery/json/VFRschema.py @@ -1,6 +1,6 @@ """ -Re-usable base class +Re-usable base class to handle JSON schema compliance """ @@ -8,9 +8,9 @@ import numpy as np import pandas as pd import ipaddress -from typing import List, Dict, Union +from typing import List, Union, TextIO import jsonschema -from jsonschema import Draft202012Validator +# from jsonschema import Draft202012Validator from referencing import Registry, Resource from pathlib import Path import logging @@ -19,8 +19,9 @@ class VFschema: - """A base class to export json files following a schema""" - schema_root: str = "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/json-schema/schemas" + """A base class to export json files complying to a public schema""" + # schema_root: str = "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/main/schemas" + schema_root: str = "https://raw.githubusercontent.com/euroargodev/VirtualFleet_recovery/refactoring-as-a-clean-module-and-cli/schemas" def __init__(self, **kwargs): for key in self.required: @@ -35,10 +36,13 @@ def __repr__(self): summary = [] for p in self.properties: if p != 'description': - summary.append("%s=%s" % (p, getattr(self, p))) - if hasattr(self, 'description'): + v = getattr(self, p) + if np.asarray(v).dtype.kind in set('buifc'): + summary.append("%s=%s" % (p, v)) + else: + summary.append("%s='%s'" % (p, v)) + if hasattr(self, 'description') and getattr(self, 'description') is not None: summary.append("%s='%s'" % ('description', getattr(self, 'description'))) - return "%s(%s)" % (name, ", ".join(summary)) def _repr_html_(self): @@ -64,9 +68,13 @@ def default(self, obj): return obj.isoformat() if isinstance(obj, pd.Timedelta): return obj.isoformat() - if getattr(type(obj), '__name__') in ['Location', 'Profile', + if isinstance(obj, np.float32): + return float(obj) + if isinstance(obj, np.int64): + return int(obj) + if getattr(type(obj), '__name__') in ['Location', 'Profile', 'Trajectory', 'Metrics', 'TrajectoryLengths', 'PairwiseDistances', 'PairwiseDistancesState', - 'SurfaceDrift', 'Transit', + 'SurfaceDrift', 'Transit', 'Location_error', 'MetaDataSystem', 'MetaDataComputation', 'MetaData']: # We use "getattr(type(obj), '__name__')" in order to avoid circular import return obj.__dict__ @@ -83,17 +91,27 @@ def __dict__(self): if key != "description": value = getattr(self, key) d.update({key: value}) + if hasattr(self, 'schema'): + d.update({"$schema": "%s/%s.json" % (self.schema_root, getattr(self, 'schema'))}) return d - def to_json(self, fp=None, indent=4): + def to_json(self, fp: Union[str, Path, TextIO] = None, indent=4): """Save to JSON file or return a JSON string that can be loaded with json.loads()""" jsdata = self.__dict__ - if hasattr(self, 'schema'): - jsdata.update({"$schema": "%s/%s.json" % (self.schema_root, getattr(self, 'schema'))}) + # if hasattr(self, 'schema'): + # jsdata.update({"$schema": "%s/%s.json" % (self.schema_root, getattr(self, 'schema'))}) if fp is None: return json.dumps(jsdata, indent=indent, cls=self.JSONEncoder) else: - return json.dump(jsdata, fp, indent=indent, cls=self.JSONEncoder) + if hasattr(fp, 'write'): + return json.dump(jsdata, fp, indent=indent, cls=self.JSONEncoder) + else: + if isinstance(fp, str): + fp = Path(fp) + + with fp.open('w') as fpp: + o = json.dump(jsdata, fpp, indent=indent, cls=self.JSONEncoder) + return o class VFvalidators(VFschema): @@ -127,7 +145,7 @@ def validate(data, schema) -> Union[bool, List]: return True if len(errors) == 0 else errors def _is_numeric(self, x, name='?'): - assert isinstance(x, (int, float)), "'%s' must be a float, got '%s'" % (name, type(x)) + assert np.asarray(x).dtype.kind in set('buifc'), "'%s' must be numeric, got '%s'" % (name, type(x)) def _is_datetime(self, x, name='?'): assert isinstance(x, ( @@ -135,7 +153,7 @@ def _is_datetime(self, x, name='?'): name, type(x)) def _is_integer(self, x, name='?'): - assert isinstance(x, int), "'%s' must be an integer, got '%s'" % (name, type(x)) + assert isinstance(x, (int, np.integer)), "'%s' must be an integer, got '%s'" % (name, type(x)) def _is_timedelta(self, x, name='?'): assert isinstance(x, (pd.Timedelta)), "'%s' must be castable with pd.to_timedelta, got '%s'" % (name, type(x)) diff --git a/vfrecovery/json_functions/VFRschema_meta.py b/vfrecovery/json/VFRschema_meta.py similarity index 91% rename from vfrecovery/json_functions/VFRschema_meta.py rename to vfrecovery/json/VFRschema_meta.py index b0c5cbb..f3dfbbe 100644 --- a/vfrecovery/json_functions/VFRschema_meta.py +++ b/vfrecovery/json/VFRschema_meta.py @@ -5,7 +5,7 @@ import socket import psutil -from VFRschema import VFvalidators +from .VFRschema import VFvalidators from virtualargofleet.utilities import VFschema_configuration @@ -59,7 +59,6 @@ def auto_load() -> 'MetaDataSystem': class MetaDataComputation(VFvalidators): - system: MetaDataSystem = None date: pd.Timestamp = None cpu_time: pd.Timedelta = None wall_time: pd.Timedelta = None @@ -68,7 +67,6 @@ class MetaDataComputation(VFvalidators): description: str = "A set of meta-data to describe one computation run" required: List = [] properties: List = ["description", - "system", "cpu_time", "wall_time", "date"] def __init__(self, **kwargs): @@ -88,21 +86,22 @@ def from_dict(obj: Dict) -> 'MetaDataComputation': class MetaData(VFvalidators): - nfloats: int = None + swarm_size: int = None velocity_field: str = None vfconfig: VFschema_configuration = None computation: MetaDataComputation = None + system: MetaDataSystem = None schema: str = "VFrecovery-schema-metadata" description: str = "A set of meta-data to describe one simulation" - required: List = ["nfloats", "velocity_field", "vfconfig"] + required: List = ["swarm_size", "velocity_field", "vfconfig"] properties: List = ["description", - "nfloats", "velocity_field", - "vfconfig", "computation"] + "swarm_size", "velocity_field", + "vfconfig", "computation", "system"] def __init__(self, **kwargs): super().__init__(**kwargs) - self._is_integer(self.nfloats) + self._is_integer(self.swarm_size) if 'vfconfig' not in kwargs: self.vfconfig = None diff --git a/vfrecovery/json_functions/VFRschema_metrics.py b/vfrecovery/json/VFRschema_metrics.py similarity index 85% rename from vfrecovery/json_functions/VFRschema_metrics.py rename to vfrecovery/json/VFRschema_metrics.py index 37c2e1c..89ffa2d 100644 --- a/vfrecovery/json_functions/VFRschema_metrics.py +++ b/vfrecovery/json/VFRschema_metrics.py @@ -5,7 +5,6 @@ - trajectory_lengths: ArrayMetric - pairwise_distances: PairwiseDistances - surface_drift: SurfaceDrift - - trajectory_lengths: TrajectoryLengths - transit: Transit ``` @@ -20,14 +19,6 @@ - std_ratio ``` - ``` - SurfaceDrift(VFvalidators) - - surface_currents_speed - - surface_currents_speed_unit - - unit - - value - ``` - ``` TrajectoryLengths(ArrayMetric) - median @@ -39,6 +30,15 @@ - nPDFpeaks ``` + + ``` + SurfaceDrift(VFvalidators) + - surface_currents_speed + - surface_currents_speed_unit + - unit + - value + ``` + ``` Transit(VFvalidators) - value @@ -47,7 +47,8 @@ """ from typing import List, Dict -from VFRschema import VFvalidators +import pandas as pd +from .VFRschema import VFvalidators class ArrayMetric(VFvalidators): @@ -138,15 +139,37 @@ def from_dict(obj: Dict) -> 'Transit': return Transit(**obj) +class Location_error(VFvalidators): + distance: float = None + bearing: float = None + time: pd.Timedelta = None + + description: str = "Location error" + properties: List = ["distance", "bearing", "time", "description"] + required: List = [] + + def __init__(self, **kwargs): + super().__init__(**kwargs) + if 'time' not in kwargs: + setattr(self, 'time', pd.NaT) + else: + self._is_timedelta(kwargs['time'], 'time') + + @staticmethod + def from_dict(obj: Dict) -> 'Location_error': + return Location_error(**obj) + + class Metrics(VFvalidators): - pairwise_distances: PairwiseDistances = None + error: Location_error = None + transit: Transit = None surface_drift: SurfaceDrift = None trajectory_lengths: TrajectoryLengths = None - transit: Transit = None + pairwise_distances: PairwiseDistances = None schema: str = "VFrecovery-schema-metrics" description: str = "A set of metrics to describe/interpret one predicted VFrecovery profile location" - properties: List = ["trajectory_lengths", "pairwise_distances", "surface_drift", "trajectory_lengths", "transit", "description"] + properties: List = ["trajectory_lengths", "pairwise_distances", "surface_drift", "trajectory_lengths", "transit", "error", "description"] required: List = [] @staticmethod diff --git a/vfrecovery/json/VFRschema_profile.py b/vfrecovery/json/VFRschema_profile.py new file mode 100644 index 0000000..18651f5 --- /dev/null +++ b/vfrecovery/json/VFRschema_profile.py @@ -0,0 +1,149 @@ +import pandas as pd +import numpy as np +from typing import List, Dict, Iterable +import argopy.plot as argoplot + +from .VFRschema import VFvalidators +from .VFRschema_metrics import Metrics + + +class Location(VFvalidators): + longitude: float + latitude: float + time: pd.Timestamp + + schema: str = "VFrecovery-schema-location" + description: str = "A set of longitude/latitude/time coordinates on Earth" + properties: List = ["longitude", "latitude", "time", "description"] + required: List = ["longitude", "latitude"] + + def __init__(self, **kwargs): + """ + Parameters + ---------- + longitude: float + latitude: float + time: pd.Timestamp + """ + super().__init__(**kwargs) + if 'time' not in kwargs: + # setattr(self, 'time', pd.to_datetime('now', utc=True)) + setattr(self, 'time', pd.NaT) + + self._validate_longitude(self.longitude) + self._validate_latitude(self.latitude) + self._validate_time(self.time) + + self.longitude = np.round(self.longitude, 3) + self.latitude = np.round(self.latitude, 3) + + + @staticmethod + def from_dict(obj: Dict) -> 'Location': + return Location(**obj) + + @staticmethod + def from_tuple(obj: tuple) -> 'Location': + if len(obj) == 2: + d = {'longitude': obj[0], 'latitude': obj[1]} + elif len(obj) == 3: + d = {'longitude': obj[0], 'latitude': obj[1], 'time': obj[2]} + elif len(obj) == 4: + d = {'longitude': obj[0], 'latitude': obj[1], 'time': obj[2], 'description': obj[3]} + return Location(**d) + + +class Profile(VFvalidators): + location: Location + cycle_number: int = None + wmo: int = None + url_float: str = None + url_profile: str = None + virtual_cycle_number: int = None + metrics: Metrics = None + + schema: str = "VFrecovery-schema-profile" + description: str = "A set of meta-data and longitude/latitude/time coordinates on Earth, for an Argo float vertical profile location" + required: List = ["location"] + properties: List = ["location", "cycle_number", "wmo", "url_float", "url_profile", "virtual_cycle_number", "metrics", "description"] + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._validate_wmo(self.wmo) + self._validate_cycle_number(self.cycle_number) + self._validate_cycle_number(self.virtual_cycle_number) + if isinstance(kwargs['location'], dict): + self.location = Location.from_dict(kwargs['location']) + + @staticmethod + def from_dict(obj: Dict) -> 'Profile': + """ + + Parameters + ---------- + location: Location + wmo: int + cycle_number: int + url_float: str + url_profile: str + virtual_cycle_number: int + metrics: Metrics + + """ + return Profile(**obj) + + @staticmethod + def from_ArgoIndex(df: pd.DataFrame) -> List['Profile']: + Plist = [] + df = df.sort_values(by='date') + for irow, this_obs in df.iterrows(): + p = Profile.from_dict({ + 'location': Location.from_dict({'longitude': this_obs['longitude'], + 'latitude': this_obs['latitude'], + 'time': this_obs['date'].tz_localize('UTC') + }), + 'wmo': this_obs['wmo'], + 'cycle_number': this_obs['cyc'], + 'url_float': argoplot.dashboard(wmo=this_obs['wmo'], url_only=True), + 'url_profile': argoplot.dashboard(wmo=this_obs['wmo'], cyc=this_obs['cyc'], url_only=True), + }) + Plist.append(p) + return Plist + + +class Trajectory(VFvalidators): + locations: List[Location] + + schema: str = "VFrecovery-schema-trajectory" + description: str = "Represents two or more VirtualFleet-Recovery locations that share a relationship" + required: List = ["locations"] + properties: List = ["locations", "description"] + + def __init__(self, **kwargs): + super().__init__(**kwargs) + if len(kwargs['locations']) < 2: + raise ValueError("'locations' must a be list with at least 2 elements") + L = [] + for location in kwargs['locations']: + if isinstance(location, dict): + loc = Location.from_dict(location) + elif isinstance(location, Iterable): + loc = Location.from_tuple(location) + else: + raise ValueError("'locations' item must be a dictionary or an Iterable") + L.append(loc) + self.locations = L + + @staticmethod + def from_dict(obj: Dict) -> 'Trajectory': + """ + + Parameters + ---------- + locations: List[Location] + """ + return Trajectory(**obj) + + @staticmethod + def from_tuple(obj: tuple) -> 'Trajectory': + return Trajectory(**{'locations': obj}) diff --git a/vfrecovery/json_functions/VFRschema_simulation.py b/vfrecovery/json/VFRschema_simulation.py similarity index 75% rename from vfrecovery/json_functions/VFRschema_simulation.py rename to vfrecovery/json/VFRschema_simulation.py index b459628..7bcd181 100644 --- a/vfrecovery/json_functions/VFRschema_simulation.py +++ b/vfrecovery/json/VFRschema_simulation.py @@ -1,7 +1,7 @@ from typing import List, Dict -from VFRschema import VFvalidators -from VFRschema_profile import Profile -from VFRschema_meta import MetaData +from .VFRschema import VFvalidators +from .VFRschema_profile import Profile +from .VFRschema_meta import MetaData class Simulation(VFvalidators): @@ -12,7 +12,7 @@ class Simulation(VFvalidators): schema: str = "VFrecovery-schema-simulation" description: str = "This document records the details of one VirtualFleet-Recovery simulation and Argo float profile predictions" - required: List = ["initial_profile", "observations", "predictions"] + required: List = ["initial_profile", "predictions"] properties: List = ["initial_profile", "observations", "predictions", "meta_data", "description"] @staticmethod diff --git a/vfrecovery/json/__init__.py b/vfrecovery/json/__init__.py new file mode 100644 index 0000000..f697ee7 --- /dev/null +++ b/vfrecovery/json/__init__.py @@ -0,0 +1,4 @@ +from .VFRschema_profile import Profile, Location, Trajectory +from .VFRschema_simulation import Simulation +from .VFRschema_meta import MetaData, MetaDataSystem, MetaDataComputation +from .VFRschema_metrics import Metrics, TrajectoryLengths, PairwiseDistances, PairwiseDistancesState, Transit, SurfaceDrift, Location_error \ No newline at end of file diff --git a/vfrecovery/json_functions/VFRschema_profile.py b/vfrecovery/json_functions/VFRschema_profile.py deleted file mode 100644 index 15d6515..0000000 --- a/vfrecovery/json_functions/VFRschema_profile.py +++ /dev/null @@ -1,54 +0,0 @@ -import pandas as pd -from typing import List, Dict -from VFRschema import VFvalidators -from VFRschema_metrics import Metrics - - -class Location(VFvalidators): - longitude: float - latitude: float - time: pd.Timestamp - - schema: str = "VFrecovery-schema-location" - description: str = "A set of longitude/latitude/time coordinates on Earth" - properties: List = ["longitude", "latitude", "time", "description"] - required: List = ["longitude", "latitude"] - - def __init__(self, **kwargs): - super().__init__(**kwargs) - if 'time' not in kwargs: - # setattr(self, 'time', pd.to_datetime('now', utc=True)) - setattr(self, 'time', pd.NaT) - - self._validate_longitude(self.longitude) - self._validate_latitude(self.latitude) - self._validate_time(self.time) - - @staticmethod - def from_dict(obj: Dict) -> 'Location': - return Location(**obj) - - -class Profile(VFvalidators): - location: Location - cycle_number: int = None - wmo: int = None - url_float: str = None - url_profile: str = None - virtual_cycle_number: int = None - metrics: Metrics = None - - schema: str = "VFrecovery-schema-profile" - description: str = "A set of meta-data and longitude/latitude/time coordinates on Earth, for an Argo float vertical profile location" - required: List = ["location"] - properties: List = ["location", "cycle_number", "wmo", "url_float", "url_profile", "virtual_cycle_number", "metrics", "description"] - - def __init__(self, **kwargs): - super().__init__(**kwargs) - self._validate_wmo(self.wmo) - self._validate_cycle_number(self.cycle_number) - self._validate_cycle_number(self.virtual_cycle_number) - - @staticmethod - def from_dict(obj: Dict) -> 'Profile': - return Profile(**obj) diff --git a/vfrecovery/json_functions/__init__.py b/vfrecovery/json_functions/__init__.py deleted file mode 100644 index e2df9c8..0000000 --- a/vfrecovery/json_functions/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from VFRschema_simulation import Simulation -from VFRschema_profile import Profile diff --git a/vfrecovery/logging_conf.json b/vfrecovery/logging_conf.json new file mode 100644 index 0000000..b5ae834 --- /dev/null +++ b/vfrecovery/logging_conf.json @@ -0,0 +1,60 @@ +{ + "disable_existing_loggers": false, + "formatters": { + "blank": { + "format": "%(message)s" + }, + "simple": { + "datefmt": "%Y-%m-%dT%H:%M:%SZ", + "format": "%(asctime)s [%(levelname)s]: %(message)s" + }, + "detailed": { + "datefmt": "%Y/%m/%d %I:%M:%S", + "format": "%(asctime)s | %(levelname)s | %(name)s:%(filename)s:%(lineno)d | %(message)s" + } + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "simple", + "stream": "ext://sys.stdout" + }, + "console_blank": { + "class": "logging.StreamHandler", + "formatter": "blank", + "stream": "ext://sys.stdout" + }, + "file": { + "class": "logging.handlers.RotatingFileHandler", + "formatter": "detailed", + "filename": "/tmp/junk.log", + "mode": "a", + "maxBytes": 10485760, + "backupCount": 5 + } + }, + "loggers": { + "vfrecovery_blank_logger": { + "handlers": [ + "console_blank" + ], + "level": "INFO", + "propagate": false + }, + "vfrecovery_root_logger": { + "handlers": [ + "console" + ], + "level": "WARNING", + "propagate": false + }, + "vfrecovery_simulation": { + "handlers": [ + "file" + ], + "level": "DEBUG", + "propagate": false + } + }, + "version": 1 +} diff --git a/vfrecovery/plots/__init__.py b/vfrecovery/plots/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vfrecovery/plots/positions.py b/vfrecovery/plots/positions.py new file mode 100644 index 0000000..311b6fc --- /dev/null +++ b/vfrecovery/plots/positions.py @@ -0,0 +1,59 @@ +import matplotlib.pyplot as plt +import pandas as pd +import cartopy.crs as ccrs +import numpy as np + +from .utils import get_HBOX, map_add_features, map_add_profiles, save_figurefile + + +def figure_positions(this_args, vel, df_sim, df_plan, this_profile, cfg, wmo, cyc, vel_name, + dd=1, save_figure=False, workdir='.'): + # log.debug("Starts figure_positions") + ebox = get_HBOX(df_sim, dd=dd) + nfloats = df_plan.shape[0] + + fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(25, 7), dpi=120, + subplot_kw={'projection': ccrs.PlateCarree()}, + sharex=True, sharey=True) + ax = ax.flatten() + + for ix in [0, 1, 2]: + ax[ix].set_extent(ebox) + ax[ix] = map_add_features(ax[ix]) + + v = vel.field.isel(time=0).interp(depth=cfg.mission['parking_depth']).plot.quiver(x="longitude", + y="latitude", + u=vel.var['U'], + v=vel.var['V'], + ax=ax[ix], + color='grey', + alpha=0.5, + add_guide=False) + + ax[ix].plot(df_sim['deploy_lon'], df_sim['deploy_lat'], '.', + markersize=3, color='grey', alpha=0.1, markeredgecolor=None, zorder=0) + if ix == 0: + title = 'Velocity field at %0.2fm and deployment plan' % cfg.mission['parking_depth'] + v.set_alpha(1) + # v.set_color('black') + elif ix == 1: + x, y, c = df_sim['longitude'], df_sim['latitude'], df_sim['cyc'] + title = 'Final float positions' + # sc = ax[ix].plot(x, y, '.', markersize=3, color='cyan', alpha=0.9, markeredgecolor=None) + sc = ax[ix].scatter(x, y, c=c, s=3, alpha=0.9, edgecolors=None) + elif ix == 2: + x, y, c = df_sim['rel_lon'], df_sim['rel_lat'], df_sim['cyc'] + title = 'Final floats position relative to last float position' + # sc = ax[ix].plot(x, y, '.', markersize=3, color='cyan', alpha=0.9, markeredgecolor=None) + sc = ax[ix].scatter(x, y, c=c, s=3, alpha=0.9, edgecolors=None) + + ax[ix] = map_add_profiles(ax[ix], this_profile) + ax[ix].set_title(title) + + fig.suptitle("VirtualFleet recovery prediction for WMO %i: starting from cycle %i, predicting cycle %s\n%s" % + (wmo, cyc[0], cyc[1:], get_cfg_str(cfg)), fontsize=15) + plt.tight_layout() + if save_figure: + save_figurefile(fig, "vfrecov_positions_%s" % get_sim_suffix(this_args, cfg), workdir) + return fig, ax + diff --git a/vfrecovery/plots/utils.py b/vfrecovery/plots/utils.py new file mode 100644 index 0000000..aae37f3 --- /dev/null +++ b/vfrecovery/plots/utils.py @@ -0,0 +1,128 @@ +import os +import numpy as np +import argopy.plot as argoplot +from pathlib import Path + + +def save_figurefile(this_fig, a_name, folder: Path = Path('.')): + """ + Parameters + ---------- + this_fig + a_name + Path + + Returns + ------- + path + """ + figname = folder.joinpath("%s.png" % a_name) + this_fig.savefig(figname) + return figname + +def map_add_profiles(this_ax, this_profile): + """ + + Parameters + ---------- + this_ax + + Returns + ------- + this_ax + """ + this_ax.plot(this_profile['longitude'][0], this_profile['latitude'][0], 'k.', markersize=10, markeredgecolor='w') + if this_profile.shape[0] > 1: + this_ax.plot(this_profile['longitude'][1], this_profile['latitude'][1], 'r.', markersize=10, markeredgecolor='w') + this_ax.arrow(this_profile['longitude'][0], + this_profile['latitude'][0], + this_profile['longitude'][1] - this_profile['longitude'][0], + this_profile['latitude'][1] - this_profile['latitude'][0], + length_includes_head=True, fc='k', ec='k', head_width=0.025, zorder=10) + + return this_ax + + +def map_add_features(this_ax): + """ + + Parameters + ---------- + this_ax + + Returns + ------- + this_ax + """ + argoplot.utils.latlongrid(this_ax) + this_ax.add_feature(argoplot.utils.land_feature, edgecolor="black") + return this_ax + + +def map_add_cyc_nb(this_ax, this_df, lon='lon', lat='lat', cyc='cyc', pos='bt', fs=6, color='black'): + """ Add cycle number labels next to axis + + Parameters + ---------- + ax + df + + Returns + ------- + list of text label + """ + t = [] + if pos == 'bt': + ha, va, label = 'center', 'top', "\n{}".format + if pos == 'tp': + ha, va, label = 'center', 'bottom', "{}\n".format + for irow, row in this_df.iterrows(): + this_t = this_ax.text(row[lon], row[lat], label(int(row[cyc])), ha=ha, va=va, fontsize=fs, color=color) + t.append(this_t) + return t + + +def get_HBOX(df_sim, dd=1): + """ + + Parameters + ---------- + dd: how much to extend maps outward the deployment 'box' + + Returns + ------- + list + """ + rx = df_sim['deploy_lon'].max() - df_sim['deploy_lon'].min() + ry = df_sim['deploy_lat'].max() - df_sim['deploy_lat'].min() + lonc, latc = df_sim['deploy_lon'].mean(), df_sim['deploy_lat'].mean() + box = [lonc - rx / 2, lonc + rx / 2, latc - ry / 2, latc + ry / 2] + ebox = [box[i] + [-dd, dd, -dd, dd][i] for i in range(0, 4)] # Extended 'box' + + return ebox + + +def get_EBOX(df_sim, df_plan, this_profile, s=1): + """Get a box for maps + + Use all data positions from DF_SIM to make sure all points are visible + Extend the domain by a 's' scaling factor of the deployment plan domain + + Parameters + ---------- + s: float, default:1 + + Returns + ------- + list + """ + box = [np.min([df_sim['deploy_lon'].min(), df_sim['longitude'].min(), df_sim['rel_lon'].min(), this_profile['longitude'].min()]), + np.max([df_sim['deploy_lon'].max(), df_sim['longitude'].max(), df_sim['rel_lon'].max(), this_profile['longitude'].max()]), + np.min([df_sim['deploy_lat'].min(), df_sim['latitude'].min(), df_sim['rel_lat'].min(), this_profile['latitude'].min()]), + np.max([df_sim['deploy_lat'].max(), df_sim['latitude'].max(), df_sim['rel_lat'].max(), this_profile['latitude'].max()])] + rx, ry = df_plan['longitude'].max() - df_plan['longitude'].min(), df_plan['latitude'].max() - df_plan['latitude'].min() + r = np.min([rx, ry]) + ebox = [box[0]-s*r, box[1]+s*r, box[2]-s*r, box[3]+s*r] + + return ebox + diff --git a/vfrecovery/plots/velocity.py b/vfrecovery/plots/velocity.py new file mode 100644 index 0000000..42a6d25 --- /dev/null +++ b/vfrecovery/plots/velocity.py @@ -0,0 +1,8 @@ +import matplotlib.pyplot as plt +import pandas as pd +import cartopy.crs as ccrs +import numpy as np +from pathlib import Path +from virtualargofleet import Velocity + +from .utils import map_add_profiles, map_add_features, save_figurefile diff --git a/vfrecovery/python_interface/predict.py b/vfrecovery/python_interface/predict.py new file mode 100644 index 0000000..d0c0055 --- /dev/null +++ b/vfrecovery/python_interface/predict.py @@ -0,0 +1,67 @@ +import json +from vfrecovery.core.predict import predict_function +from pathlib import Path +from typing import Union + + +def predict( + wmo: int, + cyc: int, + velocity: str = 'GLORYS', + output_path: Union[str, Path] = None, + n_predictions: int = 0, + cfg_parking_depth: float = None, + cfg_cycle_duration: float = None, + cfg_profile_depth: float = None, + cfg_free_surface_drift: int = 9999, + swarm_size: int = 100, + domain_min_size: float = 5., + overwrite: bool = False, + lazy: bool = True, + figure: bool = True, + log_level: str = 'INFO', +): + """ + Execute VirtualFleet-Recovery predictor and return results as a JSON string + + Parameters + ---------- + wmo + cyc + velocity + output_path + n_predictions + cfg_parking_depth + cfg_cycle_duration + cfg_profile_depth + cfg_free_surface_drift + swarm_size + domain_min_size + overwrite + lazy + figure + log_level + + Returns + ------- + data + + """ # noqa + results_json = predict_function( + wmo, cyc, + velocity=velocity, + output_path=output_path, + n_predictions=n_predictions, + cfg_parking_depth=cfg_parking_depth, + cfg_cycle_duration=cfg_cycle_duration, + cfg_profile_depth=cfg_profile_depth, + cfg_free_surface_drift=cfg_free_surface_drift, + swarm_size=swarm_size, + domain_min_size=domain_min_size, + overwrite=overwrite, + lazy=lazy, + figure=figure, + log_level=log_level, + ) + results = json.loads(results_json) + return results diff --git a/vfrecovery/static/assets/.gitkeep b/vfrecovery/static/assets/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/vfrecovery/utils/__init__.py b/vfrecovery/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vfrecovery/utils/formatters.py b/vfrecovery/utils/formatters.py new file mode 100644 index 0000000..98f90fb --- /dev/null +++ b/vfrecovery/utils/formatters.py @@ -0,0 +1,28 @@ + +def strfdelta(tdelta, fmt): + """ + + Parameters + ---------- + tdelta + fmt + + Returns + ------- + + """ + d = {"days": tdelta.days} + d["hours"], rem = divmod(tdelta.seconds, 3600) + d["minutes"], d["seconds"] = divmod(rem, 60) + return fmt.format(**d) + + +class COLORS: + black = "30m" + red = "31m" + green = "32m" + yellow = "33m" + blue = "34m" + magenta = "35m" + cyan = "36m" + white = "37m" diff --git a/vfrecovery/utils/geo.py b/vfrecovery/utils/geo.py new file mode 100644 index 0000000..67c9fca --- /dev/null +++ b/vfrecovery/utils/geo.py @@ -0,0 +1,65 @@ +from math import radians, cos, sin, asin, sqrt +import pyproj +import numpy as np + + +def haversine(lon1, lat1, lon2, lat2): + """ + Calculate the great circle distance (in [km]) between two points + on the earth (specified in decimal degrees) + + see: https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points + + Parameters + ---------- + lon1 + lat1 + lon2 + lat2 + + Returns + ------- + km + """ + # convert decimal degrees to radians + lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) + + # haversine formula + dlon = lon2 - lon1 + dlat = lat2 - lat1 + a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2 + c = 2 * asin(sqrt(a)) + r = 6371 # Radius of earth in kilometers. + return c * r + + +def bearing(lon1, lat1, lon2, lat2): + """ + + Parameters + ---------- + lon1 + lat1 + lon2 + lat2 + + Returns + ------- + + """ + # from math import cos, sin, atan2, degrees + # b = atan2(cos(lat1) * sin(lat2) - sin(lat1) * cos(lat2) * cos(lon2 - lon1), sin(lon2 - lon1) * cos(lat2)) + # b = degrees(b) + # return b + + geodesic = pyproj.Geod(ellps='WGS84') + fwd_azimuth, back_azimuth, distance = geodesic.inv(lon1, lat1, lon2, lat2) + return fwd_azimuth + + +def fixLON(x): + """Ensure a 0-360 longitude""" + if x < 0: + x = 360 + x + return x + diff --git a/vfrecovery/utils/misc.py b/vfrecovery/utils/misc.py new file mode 100644 index 0000000..8e88395 --- /dev/null +++ b/vfrecovery/utils/misc.py @@ -0,0 +1,41 @@ +import argopy.plot as argoplot +from pathlib import Path +import os +from argopy.utils import is_cyc + + +def get_package_dir(): + fpath = Path(__file__) + return str(fpath.parent.parent) + + +def get_cfg_str(a_cfg): + txt = "VFloat configuration: (Parking depth: %i [db], Cycle duration: %i [hours], Profile depth: %i [db])" % ( + a_cfg.mission['parking_depth'], + a_cfg.mission['cycle_duration'], + a_cfg.mission['profile_depth'], + ) + return txt + + +def list_float_simulation_folders(wmo, cyc=None) -> dict: + """Return the list of all available simulation folders for a given WMO""" + output_path = {} + pl = Path(os.path.sep.join(["vfrecovery_simulations_data", str(wmo)])).glob("*") + for p in pl: + if p.is_dir(): + cyc = p.parts[-1] + if is_cyc(cyc): + output_path.update({int(cyc): p}) + if cyc is not None: + output_path = {c: output_path[c] for c in list(output_path) if str(c) in cyc} + return output_path + + +def get_ea_profile_page_url(wmo, cyc): + try: + url = argoplot.dashboard(wmo, cyc, url_only=True) + except: + # log.info("EA dashboard page not available for this profile: %i/%i" % (wmo, cyc)) + url = "404" + return url