Skip to content

Commit

Permalink
Added boxplot grouped elev comp
Browse files Browse the repository at this point in the history
  • Loading branch information
Jack-Hayes committed Jan 31, 2025
1 parent ff69973 commit 7bcbcaf
Show file tree
Hide file tree
Showing 4 changed files with 416 additions and 14 deletions.
185 changes: 171 additions & 14 deletions docs/examples/elevation_plotting.ipynb

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions src/coincident/plot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from __future__ import annotations

from coincident.plot.matplotlib import (
boxplot_terrain_diff,
clear_labels,
compare_dems,
get_elev_diff,
Expand All @@ -28,4 +29,5 @@
"plot_diff_hist",
"plot_worldcover_custom_ax",
"compare_dems",
"boxplot_terrain_diff",
]
177 changes: 177 additions & 0 deletions src/coincident/plot/matplotlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,3 +740,180 @@ def style_ax(
if show:
plt.show()
return axd


@depends_on_optional("matplotlib")
def boxplot_terrain_diff(
dem_list: list[xr.Dataset | gpd.GeoDataFrame],
terrain_v: str = "slope",
terrain_groups: np.ndarray | None = None,
ylims: list[float] | None = None,
title: str = "Elevation Differences (m) by Terrain",
xlabel: str = "Terrain Groups",
elev_col: str | None = None,
show: bool = True,
) -> plt.Axes:
"""
Plots boxplots of elevation differences by terrain group for two input elevation datasets.
(e.g. boxplots for elevation differences over varying slopes for two DEMs)
This also plots the counts of the grouped elevation differences
NOTE:
- This function assumes that the datasets in dem_list are in the same CRS and aligned
- This function can also work with point geometry GeoDataFrames (e.g. ICESat-2 points)
- If using a GeoDataFrame, you must also provide the elev_col parameter
which is the column name containing elevation values you wish to compare
- This function requires there to be EXACTLY 2 datasets in the dem_list.
- The first dataset in dem_list MUST be a xr.Dataset with an 'elevation' variable
and the corresponding terrain_v variable (e.g. 'slope')
Parameters
----------
dem_list : list[xr.Dataset | gpd.GeoDataFrame]
List containing exactly two elevation datasets to compare. The first
dataset must be a xr.Dataset with an 'elevation' variable and a variable
corresponding to the terrain_v variable (e.g. 'slope').
terrain_v : str
Terrain variable of interest (e.g. 'slope') that exists in first DEM. This
is what your elevation differences will be grouped by
terrain_groups : np.ndarray
Array defining the edges of terrain groups.
ylims : list
The y-axis limits for the plot. If None, then the y-axis limits will be
automatically adjusted to fit the whiskers of each boxplot
title : str
The title of the plot.
xlabel : str
The label for the x-axis.
elev_col : str
The name of the column containing elevation values to difference in
the GeoDataFrame if a GeoDataFrame is provided
show: bool
Whether to display the plot. Implemented for testing purposes
Set False for tests so plots don't display
Returns
-------
plt.Axes
The matplotlib axes object containing the plot.
"""
if len(dem_list) != 2:
msg_len = "dem_list must contain exactly two datasets"
raise ValueError(msg_len)
# ruff B008 if default is set to np.arange(0, 46, 1)
if terrain_groups is None:
terrain_groups = np.arange(0, 46, 1)
# difference (second - first) based on type (xr.dataset vs gdf)
if isinstance(dem_list[1], gpd.GeoDataFrame):
if elev_col is None:
msg_col_arg = "elev_col must be provided when using point data"
raise ValueError(msg_col_arg)
da_points = dem_list[1].get_coordinates().to_xarray()
samples = (
dem_list[0]
.interp(da_points)
.drop_vars(["band", "spatial_ref"])
.to_dataframe()
)
dem_list[1]["elev_diff"] = (
dem_list[1][elev_col].to_numpy() - samples["elevation"].to_numpy()
)
dem_list[1][terrain_v] = samples[terrain_v].to_numpy()
diff_data = dem_list[1]["elev_diff"]
else:
diff_data = dem_list[1]["elevation"] - dem_list[0]["elevation"]
fig, ax = plt.subplots(figsize=(14, 6))
box_data = [] # difference data per group
box_labels = [] # xtick labels for box groups
counts = [] # group observation counts
# loop over terrain groups and extract differences by group
for i in range(len(terrain_groups) - 1):
b1, b2 = terrain_groups[i], terrain_groups[i + 1]

if isinstance(dem_list[1], gpd.GeoDataFrame):
mask = (dem_list[1][terrain_v] > b1) & (dem_list[1][terrain_v] <= b2)
data = dem_list[1].loc[mask, "elev_diff"].dropna()
else:
mask = (dem_list[0][terrain_v] > b1) & (dem_list[0][terrain_v] <= b2)
data = diff_data.where(mask).to_numpy()
data = data[~np.isnan(data)]

# minimum 30 observations per group
if len(data) >= 30:
box_data.append(data)
box_labels.append(f"{b1}-{b2}")
counts.append(len(data))

if len(box_data) == 0:
msg_counts = "No groups satisfied the minimum 30 observation count threshold."
raise ValueError(msg_counts)

ax.boxplot(
box_data,
orientation="vertical",
patch_artist=True,
showfliers=True,
boxprops={"facecolor": "lightgray", "color": "black"},
flierprops={"marker": "x", "color": "black", "markersize": 5, "alpha": 0.6},
medianprops={"color": "black"},
positions=np.arange(len(box_data)),
)

# second axis for observation counts
ax2 = ax.twinx()
ax2.plot(np.arange(len(counts)), counts, "o", color="orange", alpha=0.6)
# dynamic y-ticks for observation counts
magnitude = 10 ** np.floor(np.log10(max(counts)))
min_count = np.floor(min(counts) / magnitude) * magnitude
max_count = np.ceil(max(counts) / magnitude) * magnitude
ticks = np.linspace(min_count, max_count, 11)
ax2.set_yticks(ticks)
ax2.spines["right"].set_color("orange")
ax2.tick_params(axis="y", colors="orange")
ax2.set_ylabel("Count", color="orange", fontsize=12)

# original axis elements
ax.axhline(0, color="black", linestyle="dashed", linewidth=0.7)
global_median = np.nanmedian(diff_data.to_numpy())
ax.axhline(
global_median,
color="magenta",
linestyle="dashed",
linewidth=0.7,
label=f"Global Med: {global_median:.2f}m",
alpha=0.8,
)
# Dynamic ylims for the elevation differences
# Zooms into whiskers extent
if ylims is None:
# get whiskers to fit the ylims of the plot
data_concat = np.concatenate(box_data)
q1, q3 = np.percentile(data_concat, [25, 75])
iqr = q3 - q1
ymin = np.floor(q1 - 1.5 * iqr)
ymax = np.ceil(q3 + 1.5 * iqr)
# force include 0 in y axis
ymin = min(ymin, 0)
ymax = max(ymax, 0)
n_ticks = 11
spacing = max(abs(ymin), abs(ymax)) * 2 / (n_ticks - 1)
spacing = np.ceil(spacing)
ymin = np.floor(ymin / spacing) * spacing
ymax = np.ceil(ymax / spacing) * spacing
yticks = np.arange(ymin, ymax + spacing, spacing)
ylims = [ymin, ymax]
ax.set_yticks(yticks)
ax.set_ylim(ylims)
ax.set_xticks(np.arange(len(box_labels)))
ax.set_xticklabels(box_labels, rotation=45, fontsize=10)
ax.set_ylabel("Elevation Difference (m)", fontsize=12)
ax.set_xlabel(xlabel, fontsize=12)
ax.set_title(title, fontsize=14)

lines1, labels1 = ax.get_legend_handles_labels()
ax.legend(lines1, labels1, loc="best", fontsize=10)

if show:
plt.tight_layout()
plt.show()

return ax
66 changes: 66 additions & 0 deletions tests/test_plot.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import geopandas as gpd
import matplotlib.patches
import matplotlib.pyplot as plt
import numpy as np
import pytest
Expand Down Expand Up @@ -282,3 +283,68 @@ def test_compare_dems(dem_tiny, points_tiny):
assert (
len(axd_full[f"hist_{key}"].patches) > 0
), f"hist_{key} should have histogram bars"


def test_boxplot_raster_diff(dem_tiny):
"""Test boxplot_terrain_diff with raster data sources"""
# create random slope values
# make sure that counts of values of 5 and values of 40 are greater than 30
# as boxplot_terrain_diff() will only plot groups with counts >= 30
slope = np.concatenate(([5] * 35, [40] * 40, [1] * 25)).reshape(10, 10)
dem_tiny["slope"] = xr.DataArray(slope, dims=["y", "x"])

dem_tiny_2 = dem_tiny.copy()
random_elevations = np.random.uniform(2935, 2965, size=(1, 10, 10)) # noqa: NPY002
dem_tiny_2["elevation"] = (("band", "y", "x"), random_elevations)

axd = coincident.plot.boxplot_terrain_diff([dem_tiny, dem_tiny_2], show=False)
assert isinstance(axd, plt.Axes), "Return value should be a matplotlib Axes object"
assert len(axd.get_children()) > 0, "Figure should exist"
assert (
len(
[
child
for child in axd.get_children()
if isinstance(child, matplotlib.patches.PathPatch)
]
)
== 2
), "Should have two boxplot boxes"
assert axd.get_legend() is not None, "Legend should exist"
assert len(axd.figure.axes) == 2, "Should have two y-axes"
assert axd.figure.axes[1].get_ylabel() == "Count", "Second y-axis should be 'Count'"


def test_boxplot_point_diff(dem_tiny, points_tiny):
"""Test boxplot_terrain_diff with point geodataframe reference"""
# create random slope values
# same as test_boxplot_raster_diff()
slope = np.concatenate(([5] * 35, [40] * 40, [1] * 25)).reshape(10, 10)
dem_tiny["slope"] = xr.DataArray(slope, dims=["y", "x"])

# duplicate points to make sure counts are > 30
# gf = pd.concat([gf] * 30, ignore_index=True) <- better but don't want to import pandas
points_tiny = gpd.GeoDataFrame(
np.tile(points_tiny.values, (20, 1)),
columns=points_tiny.columns,
geometry="geometry",
)

axd = coincident.plot.boxplot_terrain_diff(
[dem_tiny, points_tiny], elev_col="h_li", show=False
)
assert isinstance(axd, plt.Axes), "Return value should be a matplotlib Axes object"
assert len(axd.get_children()) > 0, "Figure should exist"
assert (
len(
[
child
for child in axd.get_children()
if isinstance(child, matplotlib.patches.PathPatch)
]
)
== 2
), "Should have two boxplot boxes"
assert axd.get_legend() is not None, "Legend should exist"
assert len(axd.figure.axes) == 2, "Should have two y-axes"
assert axd.figure.axes[1].get_ylabel() == "Count", "Second y-axis should be 'Count'"

3 comments on commit 7bcbcaf

@Jack-Hayes
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added function coincident.plot.boxplot_terrain_diff which creates boxplots for elevation differences for two input DEMs or an input DEM and point GeoDataFrame. Dynamically adjusts ylims and ticks based on distribution where the two ylims are the elevation differences and counts of each group. Added tests as well.

NOTE: docs/examples/elevation_plotting.ipynb is definitely not ready to merge into Main. Hard to give good examples with 2mb notebook limit. The functions themselves, though, are close to "merge-ready"

@scottyhq
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hard to give good examples with 2mb notebook limit.

It would be great if we could execute the notebook dynamically (which would require sliderule in the docs environment), but then we can have large outputs b/c they just end up on the website rather than the code repository. We can also increase that limit if need be ( I did once from the scientific python template default of 1MB, but if these notebooks change regularly then the size could balloon...

args: ["--maxkb=2000"]

@Jack-Hayes
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah gotcha. I do like the idea of dynamically executing the notebook, but I'm not quite sure what the best way forward is (and how many example notebooks we'll actually end up having). I can maybe identify a smaller 4-way overlap AOI and do the demo on DEMs at a coarser but not too-coarse resolution for now? Thanks for this!

Please sign in to comment.