From 547bd2a7eb2c74061360197e814ecbe277386819 Mon Sep 17 00:00:00 2001 From: kelcyno <88055123+kelcyno@users.noreply.github.com> Date: Thu, 2 Jun 2022 15:36:34 -0500 Subject: [PATCH] Revert "Create merge_split.py" This reverts commit d4cbfa296b76bb04bd8831cbf302579ce52e0592. --- tobac/merge_split.py | 345 ------------------------------------------- 1 file changed, 345 deletions(-) delete mode 100644 tobac/merge_split.py diff --git a/tobac/merge_split.py b/tobac/merge_split.py deleted file mode 100644 index 0cbbc7e5..00000000 --- a/tobac/merge_split.py +++ /dev/null @@ -1,345 +0,0 @@ -#Tobac merge and split v0.1 - -from geopy.distance import geodesic -from networkx import * -import numpy as np -from pandas.core.common import flatten -import xarray as xr - -def compress_all(nc_grids, min_dims=2): - for var in nc_grids: - if len(nc_grids[var].dims) >= min_dims: - # print("Compressing ", var) - nc_grids[var].encoding["zlib"] = True - nc_grids[var].encoding["complevel"] = 4 - nc_grids[var].encoding["contiguous"] = False - return nc_grids - - -def standardize_track_dataset(TrackedFeatures, Mask, Projection): - """ Combine a feature mask with the feature data table into a common dataset. - Also renames th - - returned by tobac.themes.tobac_v1.segmentation - with the TrackedFeatures dataset returned by tobac.themes.tobac_v1.linking_trackpy. - - Also rename the variables to be more desciptive and comply with cf-tree. - - Convert the default cell parent ID to an integer table. - - Add a cell dimension to reflect - - Projection is an xarray DataArray - - TODO: Add metadata attributes to - - """ - feature_standard_names = { - # new variable name, and long description for the NetCDF attribute - 'frame':('feature_time_index', - 'positional index of the feature along the time dimension of the mask, from 0 to N-1'), - 'hdim_1':('feature_hdim1_coordinate', - 'position of the feature along the first horizontal dimension in grid point space; a north-south coordinate for dim order (time, y, x).' - 'The numbering is consistent with positional indexing of the coordinate, but can be' - 'fractional, to account for a centroid not aligned to the grid.'), - 'hdim_2':('feature_hdim2_coordinate', - 'position of the feature along the second horizontal dimension in grid point space; an east-west coordinate for dim order (time, y, x)' - 'The numbering is consistent with positional indexing of the coordinate, but can be' - 'fractional, to account for a centroid not aligned to the grid.'), - 'idx':('feature_id_this_frame',), - 'num':('feature_grid_cell_count', - 'Number of grid points that are within the threshold of this feature'), - 'threshold_value':('feature_threshold_max', - "Feature number within that frame; starts at 1, increments by 1 to the number of features for each frame, and resets to 1 when the frame increments"), - 'feature':('feature_id', - "Unique number of the feature; starts from 1 and increments by 1 to the number of features"), - 'time':('feature_time','time of the feature, consistent with feature_time_index'), - 'timestr':('feature_time_str','String representation of the feature time, YYYY-MM-DD HH:MM:SS'), - 'projection_y_coordinate':('feature_projection_y_coordinate','y position of the feature in the projection given by ProjectionCoordinateSystem'), - 'projection_x_coordinate':('feature_projection_x_coordinate','x position of the feature in the projection given by ProjectionCoordinateSystem'), - 'lat':('feature_latitude','latitude of the feature'), - 'lon':('feature_longitude','longitude of the feature'), - 'ncells':('feature_ncells','number of grid cells for this feature (meaning uncertain)'), - 'areas':('feature_area',), - 'isolated':('feature_isolation_flag',), - 'num_objects':('number_of_feature_neighbors',), - 'cell':('feature_parent_cell_id',), - 'time_cell':('feature_parent_cell_elapsed_time',), - 'segmentation_mask':('2d segmentation mask',) - } - new_feature_var_names = {k:feature_standard_names[k][0] for k in feature_standard_names.keys() - if k in TrackedFeatures.variables.keys()} - - TrackedFeatures = TrackedFeatures.drop(['cell_parent_track_id']) - # Combine Track and Mask variables. Use the 'feature' variable as the coordinate variable instead of - # the 'index' variable and call the dimension 'feature' - ds = xr.merge([TrackedFeatures.swap_dims({'index':'feature'}).drop('index').rename_vars(new_feature_var_names), - Mask]) - - # Add the projection data back in - ds['ProjectionCoordinateSystem']=Projection - - # Convert the cell ID variable from float to integer - if 'int' not in str(TrackedFeatures.cell.dtype): - # The raw output from the tracking is actually an object array - # array([nan, 2, 3], dtype=object) - # (and is cast to a float array when saved as NetCDF, I think). - # Cast to float. - int_cell = xr.zeros_like(TrackedFeatures.cell, dtype='int64') - - cell_id_data = TrackedFeatures.cell.astype('float64') - valid_cell = np.isfinite(cell_id_data) - valid_cell_ids = cell_id_data[valid_cell] - if not (np.unique(valid_cell_ids) > 0).all(): - raise AssertionError('Lowest cell ID cell is less than one, conflicting with use of zero to indicate an untracked cell') - int_cell[valid_cell] = valid_cell_ids.astype('int64') - #ds['feature_parent_cell_id'] = int_cell - return ds - - - -def merge_split(TRACK,distance = 25000,frame_len = 5): - ''' - function to postprocess tobac track data for merge/split cells - Input: - TRACK: xarray dataset of tobac Track information - - distance: float, optional distance threshold prior to adding a pair of points - into the minimum spanning tree. Default is 25000 meters. - - frame_len: float, optional threshold for the spanning length within which two points - can be separated. Default is five (5) frames. - - - Output: - d: xarray dataset of - feature position along 1st horizontal dimension - hdim2_index: float - feature position along 2nd horizontal dimension - - Example: - d = merge_split(Track) - ds = standardize_track_dataset(Track, refl_mask, data['ProjectionCoordinateSystem']) - # both_ds = xarray.combine_by_coords((ds,d), compat='override') - both_ds = xr.merge([ds, d],compat ='override') - both_ds = compress_all(both_ds) - both_ds.to_netcdf(os.path.join(savedir,'Track_features_merges.nc')) - - ''' - track_groups = TRACK.groupby('cell') - cell_ids = {cid:len(v) for cid, v in track_groups.groups.items()} - id_data = np.fromiter(cell_ids.keys(), dtype=int) - count_data = np.fromiter(cell_ids.values(), dtype=int) - all_frames = np.sort(np.unique(TRACK.frame)) - a_points = list() - b_points = list() - a_names = list() - b_names = list() - dist = list() - - - for i in id_data: - #print(i) - a_pointx = track_groups[i].projection_x_coordinate[-1].values - a_pointy = track_groups[i].projection_y_coordinate[-1].values - for j in id_data: - b_pointx = track_groups[j].projection_x_coordinate[0].values - b_pointy = track_groups[j].projection_y_coordinate[0].values - d = np.linalg.norm(np.array((a_pointx, a_pointy)) - np.array((b_pointx, b_pointy))) - if d <= distance: - a_points.append([a_pointx,a_pointy]) - b_points.append([b_pointx, b_pointy]) - dist.append(d) - a_names.append(i) - b_names.append(j) - - - - - -# for i in id_data: -# a_pointx = track_groups[i].grid_longitude[-1].values -# a_pointy = track_groups[i].grid_latitude[-1].values -# for j in id_data: -# b_pointx = track_groups[j].grid_longitude[0].values -# b_pointy = track_groups[j].grid_latitude[0].values -# d = geodesic((a_pointy,a_pointx),(b_pointy,b_pointx)).km -# if d <= distance: -# a_points.append([a_pointx,a_pointy]) -# b_points.append([b_pointx, b_pointy]) -# dist.append(d) -# a_names.append(i) -# b_names.append(j) - - id = [] - for i in range(len(dist)-1, -1, -1): - if a_names[i] == b_names[i]: - id.append(i) - a_points.pop(i) - b_points.pop(i) - dist.pop(i) - a_names.pop(i) - b_names.pop(i) - else: - continue - - g = Graph() - for i in np.arange(len(dist)): - g.add_edge(a_names[i], b_names[i],weight=dist[i]) - - tree = minimum_spanning_edges(g) - tree_list = list(minimum_spanning_edges(g)) - - new_tree = [] - for i,j in enumerate(tree_list): - frame_a = np.nanmax(track_groups[j[0]].frame.values) - frame_b = np.nanmin(track_groups[j[1]].frame.values) - if np.abs(frame_a - frame_b) <= frame_len: - new_tree.append(tree_list[i][0:2]) - new_tree_arr = np.array(new_tree) - - TRACK['cell_parent_track_id'] = np.zeros(len(TRACK['cell'].values)) - cell_id = np.unique(TRACK.cell.values.astype(float)[~np.isnan(TRACK.cell.values.astype(float))]) - track_id = dict() #same size as number of total merged tracks - - arr = np.array([0]) - for p in cell_id: - j = np.where(arr == int(p)) - if len(j[0]) > 0: - continue - else: - k = np.where(new_tree_arr == p) - if len(k[0]) == 0: - track_id[p] = [p] - arr = np.append(arr,p) - else: - temp1 = list(np.unique(new_tree_arr[k[0]])) - temp = list(np.unique(new_tree_arr[k[0]])) - - for l in range(15): - for i in temp1: - k2 = np.where(new_tree_arr == i) - temp.append(list(np.unique(new_tree_arr[k2[0]]).squeeze())) - temp = list(flatten(temp)) - temp = list(np.unique(temp)) - - if len(temp1) == len(temp): - break - temp1 = np.array(temp) - - for i in temp1: - k2 = np.where(new_tree_arr == i) - temp.append(list(np.unique(new_tree_arr[k2[0]]).squeeze())) - - temp = list(flatten(temp)) - temp = list(np.unique(temp)) - arr = np.append(arr,np.unique(temp)) - - track_id[np.nanmax(np.unique(temp))] = list(np.unique(temp)) - - - - storm_id = [0] #default because we don't track larger storm systems *yet* - print('found storm id') - - - track_parent_storm_id = np.repeat(0, len(track_id)) #This will always be zero when we don't track larger storm systems *yet* - print('found track parent storm ids') - - track_ids = np.array(list(track_id.keys())) - print('found track ids') - - - cell_id = list(np.unique(TRACK.cell.values.astype(float)[~np.isnan(TRACK.cell.values.astype(float))])) - print('found cell ids') - - cell_parent_track_id = [] - - for i, id in enumerate(track_id): - - if len(track_id[int(id)]) == 1: - cell_parent_track_id.append(int(id)) - - else: - cell_parent_track_id.append(np.repeat(int(id),len(track_id[int(id)]))) - - - cell_parent_track_id = list(flatten(cell_parent_track_id)) - print('found cell parent track ids') - - feature_parent_cell_id = list(TRACK.cell.values.astype(float)) - - print('found feature parent cell ids') - - #This version includes all the feature regardless of if they are used in cells or not. - feature_id = list(TRACK.feature.values.astype(int)) - print('found feature ids') - - feature_parent_storm_id = np.repeat(0,len(feature_id)) #we don't do storms atm - print('found feature parent storm ids') - - feature_parent_track_id = [] - feature_parent_track_id = np.zeros(len(feature_id)) - for i, id in enumerate(feature_id): - cellid = feature_parent_cell_id[i] - if np.isnan(cellid): - feature_parent_track_id[i] = -1 - else: - j = np.where(cell_id == cellid) - j = np.squeeze(j) - trackid = cell_parent_track_id[j] - feature_parent_track_id[i] = trackid - - print('found feature parent track ids') - - - storm_child_track_count = [len(track_id)] - print('found storm child track count') - - track_child_cell_count = [] - for i,id in enumerate(track_id): - track_child_cell_count.append(len(track_id[int(id)])) - print('found track child cell count') - - - cell_child_feature_count = [] - for i,id in enumerate(cell_id): - cell_child_feature_count.append(len(track_groups[id].feature.values)) - print('found cell child feature count') - - storm_child_cell_count = [len(cell_id)] - storm_child_feature_count = [len(feature_id)] - - storm_dim = 'nstorms' - track_dim = 'ntracks' - cell_dim = 'ncells' - feature_dim = 'nfeatures' - - d = xr.Dataset({ - 'storm_id': (storm_dim, storm_id), - 'track_id': (track_dim, track_ids), - 'track_parent_storm_id': (track_dim, track_parent_storm_id), - 'cell_id': (cell_dim, cell_id), - 'cell_parent_track_id': (cell_dim, cell_parent_track_id), - 'feature_id': (feature_dim, feature_id), - 'feature_parent_cell_id': (feature_dim, feature_parent_cell_id), - 'feature_parent_track_id': (feature_dim, feature_parent_track_id), - 'feature_parent_storm_id': (feature_dim, feature_parent_storm_id), - 'storm_child_track_count': (storm_dim, storm_child_track_count), - 'storm_child_cell_count': (storm_dim, storm_child_cell_count), - 'storm_child_feature_count': (storm_dim, storm_child_feature_count), - 'track_child_cell_count': (track_dim, track_child_cell_count), - 'cell_child_feature_count': (cell_dim, cell_child_feature_count), - }) - d = d.set_coords(['feature_id','cell_id', 'track_id', 'storm_id']) - - assert len(track_id) == len(track_parent_storm_id) - assert len(cell_id) == len(cell_parent_track_id) - assert len(feature_id) == len(feature_parent_cell_id) - assert sum(storm_child_track_count) == len(track_id) - assert sum(storm_child_cell_count) == len(cell_id) - assert sum(storm_child_feature_count) == len(feature_id) - assert sum(track_child_cell_count) == len(cell_id) - assert sum([sum(cell_child_feature_count),(len(np.where(feature_parent_track_id < 0)[0]))]) == len(feature_id) - - return d \ No newline at end of file