Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #288 use DriverVectorCube in apply_polygon #291

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 30 additions & 58 deletions openeo_driver/ProcessGraphDeserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,34 @@ def reduce_dimension(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
return data_cube.reduce_dimension(reducer=reduce_pg, dimension=dimension, context=context, env=env)


def _apply_polygon(
data_cube: DriverDataCube, process: dict, polygons, mask_value: Union[int, float], context, env: EvalEnv
):
polygon: DriverVectorCube = None
if isinstance(polygons, DelayedVector):
polygons = list(polygons.geometries)
for p in polygons:
if not isinstance(p, shapely.geometry.Polygon):
reason = "{m!s} is not a polygon.".format(m=p)
raise ProcessParameterInvalidException(parameter="polygons", process="apply_polygon", reason=reason)
polygon = DriverVectorCube.from_geometry(polygons)
elif isinstance(polygons, DriverVectorCube):
polygon = polygons
elif isinstance(polygons, shapely.geometry.base.BaseGeometry):
polygon = DriverVectorCube.from_geometry(polygons)
elif isinstance(polygons, dict):
polygon = DriverVectorCube.from_geojson(polygons)
else:
reason = f"unsupported type: {type(polygons).__name__}"
raise ProcessParameterInvalidException(parameter="polygons", process="apply_polygon", reason=reason)

if polygon.get_area() == 0:
reason = "Polygon {m!s} has an area of {a!r}".format(m=polygon, a=polygon.get_area())
raise ProcessParameterInvalidException(parameter="polygons", process="apply_polygon", reason=reason)

return data_cube.apply_polygon(polygons=polygon, process=process, mask_value=mask_value, context=context, env=env)


@process_registry_100.add_function(
spec=read_spec("openeo-processes/experimental/chunk_polygon.json"), name="chunk_polygon"
)
Expand All @@ -816,34 +844,7 @@ def chunk_polygon(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
chunks = args.get_required("chunks")
mask_value = args.get_optional("mask_value", expected_type=(int, float), default=None)
context = args.get_optional("context", default=None)

# Chunks parameter check.
# TODO #114 EP-3981 normalize first to vector cube and simplify logic
if isinstance(chunks, DelayedVector):
polygons = list(chunks.geometries)
for p in polygons:
if not isinstance(p, shapely.geometry.Polygon):
reason = "{m!s} is not a polygon.".format(m=p)
raise ProcessParameterInvalidException(parameter='chunks', process='chunk_polygon', reason=reason)
polygon = MultiPolygon(polygons)
elif isinstance(chunks, shapely.geometry.base.BaseGeometry):
polygon = MultiPolygon(chunks)
elif isinstance(chunks, dict):
polygon = geojson_to_multipolygon(chunks)
if isinstance(polygon, shapely.geometry.Polygon):
polygon = MultiPolygon([polygon])
elif isinstance(chunks, str):
# Delayed vector is not supported yet.
reason = "Polygon of type string is not yet supported."
raise ProcessParameterInvalidException(parameter='chunks', process='chunk_polygon', reason=reason)
else:
reason = "Polygon type is not supported."
raise ProcessParameterInvalidException(parameter='chunks', process='chunk_polygon', reason=reason)
if polygon.area == 0:
reason = "Polygon {m!s} has an area of {a!r}".format(m=polygon, a=polygon.area)
raise ProcessParameterInvalidException(parameter='chunks', process='chunk_polygon', reason=reason)

return data_cube.chunk_polygon(reducer=reduce_pg, chunks=polygon, mask_value=mask_value, context=context, env=env)
return _apply_polygon(data_cube, reduce_pg, chunks, mask_value, context, env)


@process_registry_100.add_function(spec=read_spec("openeo-processes/2.x/proposals/apply_polygon.json"))
Expand All @@ -854,36 +855,7 @@ def apply_polygon(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
polygons = args.get_required("polygons")
mask_value = args.get_optional("mask_value", expected_type=(int, float), default=None)
context = args.get_optional("context", default=None)

# TODO #114 EP-3981 normalize first to vector cube and simplify logic
# TODO #288: this logic (copied from original chunk_polygon implementation) coerces the input polygons
# to a single MultiPolygon of pure (non-multi) polygons, which is conceptually wrong.
# Instead it should normalize to a feature collection or vector cube.
if isinstance(polygons, DelayedVector):
polygons = list(polygons.geometries)
for p in polygons:
if not isinstance(p, shapely.geometry.Polygon):
reason = "{m!s} is not a polygon.".format(m=p)
raise ProcessParameterInvalidException(parameter="polygons", process="apply_polygon", reason=reason)
polygon = MultiPolygon(polygons)
elif isinstance(polygons, DriverVectorCube):
# TODO #288: I know it's wrong to coerce to MultiPolygon here, but we stick to this ill-defined API for now.
polygon = polygons.to_multipolygon()
elif isinstance(polygons, shapely.geometry.base.BaseGeometry):
polygon = MultiPolygon(polygons)
elif isinstance(polygons, dict):
polygon = geojson_to_multipolygon(polygons)
if isinstance(polygon, shapely.geometry.Polygon):
polygon = MultiPolygon([polygon])
else:
reason = f"unsupported type: {type(polygons).__name__}"
raise ProcessParameterInvalidException(parameter="polygons", process="apply_polygon", reason=reason)

if polygon.area == 0:
reason = "Polygon {m!s} has an area of {a!r}".format(m=polygon, a=polygon.area)
raise ProcessParameterInvalidException(parameter="polygons", process="apply_polygon", reason=reason)

return data_cube.apply_polygon(polygons=polygon, process=process, mask_value=mask_value, context=context, env=env)
return _apply_polygon(data_cube, process, polygons, mask_value, context, env)


@process_registry_100.add_function(spec=read_spec("openeo-processes/experimental/fit_class_random_forest.json"))
Expand Down
4 changes: 2 additions & 2 deletions openeo_driver/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def chunk_polygon(
reducer: dict,
# TODO #288:` chunks` should be an explicit collection of geometries (e.g a FeatureCollection, vector cube base class or an iterable of geometries)
# Note that subclass implementations even wrongly retype this to `MultiPolygon`.
chunks: Union[shapely.geometry.base.BaseGeometry],
chunks: DriverVectorCube,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm afraid it's not that easy to change this API like this without being backward compatible.

openeo-geopyspark-driver for example extends this API so we need to create some window for transition

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it okay to merge this PR together with the one on the geopyspark-driver?
Open-EO/openeo-geopyspark-driver#801

mask_value: Union[float, None],
env: EvalEnv,
context: Optional[dict] = None,
Expand All @@ -137,7 +137,7 @@ def apply_polygon(
*,
# TODO #229/#288 better type for `polygons` arg: should be vector cube or something alike
# TODO #288: use `geometries` argument instead of confusing `polygons` argument (https://github.com/Open-EO/openeo-processes/issues/511)
polygons: shapely.geometry.base.BaseGeometry,
polygons: DriverVectorCube,
process: dict,
mask_value: Optional[float] = None,
context: Optional[dict] = None,
Expand Down
12 changes: 3 additions & 9 deletions openeo_driver/dry_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,18 +650,12 @@ def ndvi(self, nir: str = "nir", red: str = "red", target_band: str = None) -> '
return self

def chunk_polygon(
# TODO #288: `chunks`: MultiPolygon should not be abused as collection of separate geometries.
self, reducer, chunks: MultiPolygon, mask_value: float, env: EvalEnv, context: Optional[dict] = None
self, reducer, chunks: DriverVectorCube, mask_value: float, env: EvalEnv, context: Optional[dict] = None
) -> "DryRunDataCube":
# TODO #229: rename/update `chunk_polygon` to `apply_polygon` (https://github.com/Open-EO/openeo-processes/pull/298)
if isinstance(chunks, Polygon):
polygons = [chunks]
elif isinstance(chunks, MultiPolygon):
polygons: List[Polygon] = chunks.geoms
else:
if not isinstance(chunks, DriverVectorCube):
raise ValueError(f"Invalid type for `chunks`: {type(chunks)}")
# TODO #71 #114 Deprecate/avoid usage of GeometryCollection
geometries, bbox = self._normalize_geometry(GeometryCollection(polygons))
geometries, bbox = self._normalize_geometry(chunks)
cube = self.filter_bbox(**bbox, operation="weak_spatial_extent")
return cube._process("chunk_polygon", arguments={"geometries": geometries})

Expand Down
4 changes: 2 additions & 2 deletions tests/test_views_execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3730,8 +3730,8 @@ def test_apply_polygon_with_vector_cube(api, tmp_path):
assert dummy.apply_polygon.call_count == 1
polygons = dummy.apply_polygon.call_args.kwargs["polygons"]
# TODO #288 instead of MultPolygon, this should actually be a vector cube, feature collection or something equivalent
assert isinstance(polygons, shapely.geometry.MultiPolygon)
assert polygons.bounds == (4.45, 51.1, 4.52, 51.2)
assert isinstance(polygons, DriverVectorCube)
assert polygons.get_bounding_box() == (4.45, 51.1, 4.52, 51.2)


def test_fit_class_random_forest(api):
Expand Down