Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: Identify object "families" using segmentation data #398

Open
wants to merge 5 commits into
base: RC_v1.5.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions tobac/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,3 +584,31 @@ def test_transform_feature_points_3D():
assert np.all(new_feat_df["hdim_1"] == [25, 30])
assert np.all(new_feat_df["hdim_2"] == [5, 15])
assert np.all(new_feat_df["vdim"] == [5, 10])


def test_identify_feature_families():
"""tests tobac.utils.general.identify_feature_families"""
orig_feat_df_1 = tb_test.generate_single_feature(
10, 30, 10, max_h1=50, max_h2=50, feature_num=1
)
orig_feat_df_2 = tb_test.generate_single_feature(
30, 30, 20, max_h1=50, max_h2=50, feature_num=2
)

orig_feat_df = tb_utils.combine_feature_dataframes(
[orig_feat_df_1, orig_feat_df_2], renumber_features=False
)

# make fake segmentation
test_arr = np.zeros((2, 50, 50), dtype=int)
test_arr[0, 5:15, 20:40] = 1
test_arr[0, 15:40, 20:40] = 2

test_xr = xr.DataArray(data=test_arr, dims=["time", "hdim_1", "hdim_2"])

out_df, out_grid = tb_utils.general.identify_feature_families(
orig_feat_df, test_xr, return_grid=True, family_column_name="family"
)
assert np.unique(out_df["family"] == 1)
assert np.all(out_grid[0, 5:15, 20:40] == 1)
assert np.all(out_grid[0, 15:40, 20:40] == 1)
89 changes: 89 additions & 0 deletions tobac/utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@

import copy
import logging
from typing import Union

import pandas as pd
import skimage

from . import internal as internal_utils
from . import decorators
import numpy as np
import sklearn
import sklearn.neighbors
Expand Down Expand Up @@ -922,3 +926,88 @@
ds["ProjectionCoordinateSystem"] = Projection

return ds


@decorators.iris_to_xarray
def identify_feature_families(
feature_df: pd.DataFrame,
in_segmentation: xr.DataArray,
return_grid: bool = False,
family_column_name: str = "feature_family_id",
unsegmented_point_values: int = 0,
below_threshold_values: int = -1,
) -> Union[tuple[pd.DataFrame, xr.DataArray], pd.DataFrame]:
"""
Function to identify families/storm systems by identifying where segmentation touches.
At a given time, segmentation areas are considered part of the same family if they
touch at any point.

Parameters
----------
feature_df: pd.DataFrame
Input feature dataframe
in_segmentation: xr.DataArray
Input segmentation
return_grid: bool
Whether to return the segmentation grid showing families
family_column_name: str
The name in the output dataframe of the family ID
unsegmented_point_values: int
The value in the input segmentation for unsegmented but above threshold points
below_threshold_values: int
The value in the input segmentation for below threshold points

Returns
-------
pd.DataFrame and xr.DataArray or pd.DataFrame
Input dataframe with family IDs associated with each feature
if return_grid is True, the segmentation grid showing families is
also returned.

"""

# we need to label the data, but we currently label using skimage label, not dask label.

# 3D should be 4-D (time, then 3 spatial).
# 2D should be 3-D (time, then 2 spatial)
is_3D = len(in_segmentation.shape) == 4
seg_family_dict = dict()
out_families = copy.deepcopy(in_segmentation)

for time_index in range(in_segmentation.shape[0]):
in_arr = np.array(in_segmentation.values[time_index])

segmented_arr = np.logical_and(
in_arr != unsegmented_point_values, in_arr != below_threshold_values
)
# These are our families
family_labeled_data = skimage.measure.label(
segmented_arr,
)
Comment on lines +984 to +986
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should offset the value of these labels by the previous highest feature family so that the families are unique over time


# now we need to note feature->family relationship in the dataframe.
segmentation_props = skimage.measure.regionprops(in_arr)

# associate feature ID -> family ID
for seg_area in segmentation_props:
if is_3D:
seg_family = family_labeled_data[

Check warning on line 994 in tobac/utils/general.py

View check run for this annotation

Codecov / codecov/patch

tobac/utils/general.py#L994

Added line #L994 was not covered by tests
seg_area.coords[0, 0], seg_area.coords[0, 1], seg_area.cords[0, 2]
]
else:
seg_family = family_labeled_data[
seg_area.coords[0, 0], seg_area.coords[0, 1]
]
seg_family_dict[seg_area.label] = seg_family

out_families[time_index] = segmented_arr
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
out_families[time_index] = segmented_arr
out_families[time_index] = family_labeled_data

Currently output grid is all ones for segmented area.


family_series = pd.Series(seg_family_dict, name=family_column_name)
feature_series = pd.Series({x: x for x in seg_family_dict.keys()}, name="feature")
family_df = pd.concat([family_series, feature_series], axis=1)
out_df = feature_df.merge(family_df, on="feature", how="inner")

if return_grid:
return out_df, out_families
else:
return out_df

Check warning on line 1013 in tobac/utils/general.py

View check run for this annotation

Codecov / codecov/patch

tobac/utils/general.py#L1013

Added line #L1013 was not covered by tests
Loading