diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 319a512..a157d69 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -32,10 +32,10 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install "numpy<2" gdal[numpy]==3.9.2 scikit-image "h3==4.0.0b5" - python -m pip install pylint mypy pytest types-setuptools + python -m pip install pylint mypy pytest types-setuptools pytest-cov - name: Lint with pylint run: | python3 -m pylint yirgacheffe - name: Test with pytest run: | - python3 -m pytest -vv + python3 -m pytest --cov=yirgacheffe -vv diff --git a/README.md b/README.md index 902771f..34b3b60 100644 --- a/README.md +++ b/README.md @@ -2,82 +2,79 @@ ## Overview -Yirgacheffe is an attempt to wrap gdal datasets such that you can do computational work on them without having to worry about common tasks: +Yirgacheffe is an attempt to wrap raster and polygon geospatial datasets such that you can do computational work on them as a whole or at the pixel level, but without having to do a lot of the grunt work of working out where you need to be in rasters, or managing how much you can load into memory safely. + +Example common use-cases: * Do the datasets overlap? Yirgacheffe will let you define either the intersection or the union of a set of different datasets, scaling up or down the area as required. * Rasterisation of vector layers: if you have a vector dataset then you can add that to your computation and yirgaceffe will rasterize it on demand, so you never need to store more data in memory than necessary. * Do the raster layers get big and take up large amounts of memory? Yirgacheffe will let you do simple numerical operations with layers directly and then worry about the memory management behind the scenes for you. -## Basic layer usage +## Basic usage + +They main unit of data in Yirgacheffe is a "layer", which wraps either a raster dataset or polygon data, and then you can do work on layers without having to worry (unless you choose to) about how they align - Yirgacheffe will work out all the details around overlapping The motivation for Yirgacheffe layers is to make working with gdal data slightly easier - it just hides some common operations, such as incremental loading to save memory, or letting you align layers to generate either the intersection result or the union result. -For example, say we had three layers that overlapped and we wanted to know the +For example, if we wanted to do a simple [Area of Habitat](https://github.com/quantifyearth/aoh-calculator/) calculation, whereby we find the pixels where a species resides by combining its range polygon, its habitat preferences, and its elevation preferences, the code would be like this: ```python -from yirgacheffe.layer import Layer, UniformAreaLayer - -elevation_layer = RasterLayer.layer_from_file('elecation.tiff') -area_layer = UniformAreaLayer('area.tiff') -validity_layer = RasterLayer.layer_from_file('validity.tiff') - -# Work out the common subsection of all these and apply it to the layers -intersection = RasterLayer.find_intersection([elecation_layer, area_layer, validity_layer]) -elevation_layer.set_window_for_intersection(intersection) -area_layer.set_window_for_intersection(intersection) -validity_layer.set_window_for_intersection(intersection) - -# Create a layer that is just the size of the intersection to store the results -result = RasterLayer.empty_raster_layer( - intersection, - area_layer.pixel_scale, - area_layer.datatype, - "result.tif", - area_layer.projection -) - -# Work out the area where the data is valid and over 3000ft -def is_munro(data): - return numpy.where(data > 3000.0, 0.0, 1.0) -calc = validity_layer * area_layer * elevation_layer.numpy_apply(is_munro) +from yirgacheffe.layer import RasterLayer, VectorLayer -calc.save(result) -result.close() +habitat_map = RasterLayer.layer_from_file("habitats.tif") +elevation_map = RasterLayer.layer_from_file('elevation.tif') +range_polygon = VectorLayer.layer_from_file('species123.geojson', raster_like=habitat_map) +area_per_pixel_map = RasterLayer.layer_from_file('area_per_pixel.tif') + +refined_habitat = habitat_map.isin([...species habitat codes...]) +refined_elevation = (elevation_map >= species_min) && (elevation_map <= species_max) + +aoh = refined_habitat * refined_elevation * range_polygon * area_per_pixel_map + +print(f'area for species 123: {aoh.sum()}') ``` -If you want the union then you can simply swap do: +Similarly, you could save the result to a new raster layer: ```python -intersection = RasterLayer.find_union(elecation_layer, area_layer, validity_layer) -elevation_layer.set_window_for_union(intersection) -area_layer.set_window_for_union(intersection) -validity_layer.set_window_for_union(intersection) +with RasterLayer.empty_raster_layer_like(aoh, filename="result.tif") as result: + aoh.save(result) ``` -If you want to work on the data in a layer directly you can call `read_array`, as you would with gdal. The data you access will be relative to the specified window - that is, if you've called either `set_window_for_intersection` or `set_window_for_union` then `read_array` will be relative to that and Yirgacheffe will clip or expand the data with zero values as necessary. +Yirgacheffe will automatically infer if you want to do an intersection of maps or a union of the maps based on the operators you use (see below for a full table). You can explicitly override that if you want. + +### Lazy loading and evaluation + +Yirgacheffe uses a technique from computer science called "lazy evaluation", which means that only when you resolve a calculation will Yirgacheffe do any work. So in the first code example given, the work is only calculated when you call the `sum()` method. All the other intermediary results such as `refined_habitat` and `refined_elevation` are not calculated either until that final `sum()` is called. You could easily call sum on those intermediaries if you wanted and get their results, and that would cause them to be evaluated then. + +Similarly, when you load a layer, be it a raster layer or a vector layer from polygon data, the full data for the file isn't loaded until it's needed for a calculation, and even then only the part of the data necessary will be loaded or rasterized. Furthermore, Yirgacheffe will load the data in chunks, letting you work with rasters bigger than those that would otherwise fit within your computer's memory. -If you have set either the intersection window or union window on a layer and you wish to undo that restriction, then you can simply call `reset_window()` on the layer. -Layers also work with Python's `with` statement, which helps prevent you forgetting to close a layer, which is important both for ensuring results are written to disk when you want, and that memory used by layers is freed (Yirgacheffe doesn't cache data in memory, but it uses GDAL which does). +### Automatic expanding and contracting layers -So for example you can turn the above example to: +When you load raster layers that aren't of equal geographic area (that is, they have a different origin, size, or both)then Yirgacheffe will do all the math internally to ensure that it aligns the pixels geospatially when doing calculations. + +If size adjustments are needed, then Yirgacheffe will infer from the calculations you're doing if it needs to either crop or enlarge layers. For instance, if you're summing two rasters it'll expand them to be the union of their two areas before adding them, filling in the missing parts with zeros. If you're multiplying or doing a logical AND of pixels then it'll find the intersection between the two rasters (as areas missing in one would cause the other layer to result in zero anyway). + +Whilst it is hoped that the default behaviour makes sense in most cases, we can't anticipate all usages, and so if you want to be explicit about the result of any maps you can specify it yourself. + +For example, to tell Yirgacheffe to make a union of a set of layers you'd write: ```python -with RasterLayer.empty_raster_layer( - intersection, - area_layer.pixel_scale, - area_layer.datatype, - "result.tif", - area_layer.projection -) as result: - # Work out the area where the data is valid and over 3000ft - def is_munro(data): - return numpy.where(data > 3000.0, 0.0, 1.0) - calc = validity_layer * area_layer * elevation_layer.numpy_apply(is_munro) +layers = [habitat_map, elevation_map, range_polygon] +union_area = YirgacheffeLayer.find_union(layers) +for layer in layers: + layer.set_window_for_union(union_area) ``` -And you no longer need to explicitly call `result.close()` to ensure the result is written to disk. +There is a similar set of methods for using the intersection. + +If you have set either the intersection window or union window on a layer and you wish to undo that restriction, then you can simply call `reset_window()` on the layer. + +### Direct access to data + +If doing per-layer operations isn't applicable for your application, you can read the pixel values for all layers (including VectorLayers) by calling `read_array` similarly to how you would for gdal. The data you access will be relative to the specified window - that is, if you've called either `set_window_for_intersection` or `set_window_for_union` then `read_array` will be relative to that and Yirgacheffe will clip or expand the data with zero values as necessary. ### Todo but not supported diff --git a/pyproject.toml b/pyproject.toml index b61b6d4..a4b0cd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "yirgacheffe" -version = "0.9.4" +version = "1.0.0" description = "Abstraction of gdal datasets for doing basic math operations" readme = "README.md" authors = [{ name = "Michael Dales", email = "mwd24@cam.ac.uk" }] @@ -25,7 +25,7 @@ dependencies = [ requires-python = ">=3.10" [project.optional-dependencies] -dev = ["mypy", "pylint", "pytest", "h3"] +dev = ["mypy", "pylint", "pytest", "h3", "pytest-cov"] [project.urls] Homepage = "https://github.com/quantifyearth/yirgacheffe" diff --git a/tests/test_auto_windowing.py b/tests/test_auto_windowing.py new file mode 100644 index 0000000..4f2e1a4 --- /dev/null +++ b/tests/test_auto_windowing.py @@ -0,0 +1,250 @@ +import os +import tempfile + +import numpy as np + +from helpers import gdal_dataset_with_data, make_vectors_with_mutlile_ids +from yirgacheffe.layers import ConstantLayer, RasterLayer, VectorLayer, area +from yirgacheffe.window import Area + +def test_add_windows() -> None: + data1 = np.array([[1, 2], [3, 4]]) + data2 = np.array([[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120], [130, 140, 150, 160]]) + + layer1 = RasterLayer(gdal_dataset_with_data((0.0, 0.0), 0.02, data1)) + layer2 = RasterLayer(gdal_dataset_with_data((0.0, 0.0), 0.02, data2)) + + assert layer1.area != layer2.area + assert layer1.window != layer2.window + + calc = layer1 + layer2 + + assert calc.area == layer2.area + assert calc.window == layer2.window + + result = RasterLayer.empty_raster_layer_like(calc) + calc.save(result) + + expected = np.array([[11, 22, 30, 40], [53, 64, 70, 80], [90, 100, 110, 120], [130, 140, 150, 160]]) + actual = result.read_array(0, 0, 4, 4) + assert (expected == actual).all() + +def test_multiply_windows() -> None: + data1 = np.array([[1, 2], [3, 4]]) + data2 = np.array([[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120], [130, 140, 150, 160]]) + + layer1 = RasterLayer(gdal_dataset_with_data((0.0, 0.0), 0.02, data1)) + layer2 = RasterLayer(gdal_dataset_with_data((0.0, 0.0), 0.02, data2)) + + assert layer1.area != layer2.area + assert layer1.window != layer2.window + + calc = layer1 * layer2 + + assert calc.area == layer1.area + assert calc.window == layer1.window + + result = RasterLayer.empty_raster_layer_like(calc) + calc.save(result) + + expected = np.array([[10, 40], [150, 240]]) + actual = result.read_array(0, 0, 2, 2) + assert (expected == actual).all() + +def test_add_windows_offset() -> None: + data1 = np.array([[1, 2], [3, 4]]) + data2 = np.array([[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120], [130, 140, 150, 160]]) + + layer1 = RasterLayer(gdal_dataset_with_data((-0.02, 0.02), 0.02, data1)) + layer2 = RasterLayer(gdal_dataset_with_data((-0.04, 0.04), 0.02, data2)) + + assert layer1.area != layer2.area + assert layer1.window != layer2.window + + calc = layer1 + layer2 + + assert calc.area == layer2.area + + result = RasterLayer.empty_raster_layer_like(calc) + calc.save(result) + + expected = np.array([[10, 20, 30, 40], [50, 61, 72, 80], [90, 103, 114, 120], [130, 140, 150, 160]]) + actual = result.read_array(0, 0, 4, 4) + assert (expected == actual).all() + +def test_multiply_windows_offset() -> None: + data1 = np.array([[1, 2], [3, 4]]) + data2 = np.array([[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120], [130, 140, 150, 160]]) + + layer1 = RasterLayer(gdal_dataset_with_data((-0.02, 0.02), 0.02, data1)) + layer2 = RasterLayer(gdal_dataset_with_data((-0.04, 0.04), 0.02, data2)) + + assert layer1.area != layer2.area + assert layer1.window != layer2.window + + calc = layer1 * layer2 + + assert calc.area == layer1.area + + result = RasterLayer.empty_raster_layer_like(calc) + calc.save(result) + + expected = np.array([[60, 140], [300, 440]]) + actual = result.read_array(0, 0, 2, 2) + assert (expected == actual).all() + +def test_add_windows_sum() -> None: + data1 = np.array([[1, 2], [3, 4]]) + data2 = np.array([[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120], [130, 140, 150, 160]]) + + layer1 = RasterLayer(gdal_dataset_with_data((-0.02, 0.02), 0.02, data1)) + layer2 = RasterLayer(gdal_dataset_with_data((-0.04, 0.04), 0.02, data2)) + + calc = layer1 + layer2 + total = calc.sum() + + expected = np.array([[10, 20, 30, 40], [50, 61, 72, 80], [90, 103, 114, 120], [130, 140, 150, 160]]) + assert total == np.sum(expected) + +def test_multiply_windows_sum() -> None: + data1 = np.array([[1, 2], [3, 4]]) + data2 = np.array([[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120], [130, 140, 150, 160]]) + + layer1 = RasterLayer(gdal_dataset_with_data((-0.02, 0.02), 0.02, data1)) + layer2 = RasterLayer(gdal_dataset_with_data((-0.04, 0.04), 0.02, data2)) + + calc = layer1 * layer2 + total = calc.sum() + + expected = np.array([[60, 140], [300, 440]]) + assert total == np.sum(expected) + +def test_constant_layer_result_rhs_add() -> None: + data1 = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]) + layer1 = RasterLayer(gdal_dataset_with_data((0.0, 0.0), 0.02, data1)) + const_layer = ConstantLayer(1.0) + + calc = layer1 + const_layer + + assert calc.area == layer1.area + + result = RasterLayer.empty_raster_layer_like(calc) + calc.save(result) + actual = result.read_array(0, 0, 4, 2) + + expected = 1.0 + data1 + + assert (expected == actual).all() + +def test_constant_layer_result_lhs_add() -> None: + data1 = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]) + layer1 = RasterLayer(gdal_dataset_with_data((0.0, 0.0), 0.02, data1)) + const_layer = ConstantLayer(1.0) + result = RasterLayer.empty_raster_layer_like(layer1) + + intersection = RasterLayer.find_intersection([layer1, const_layer]) + const_layer.set_window_for_intersection(intersection) + layer1.set_window_for_intersection(intersection) + + calc = const_layer + layer1 + + assert calc.area == layer1.area + + result = RasterLayer.empty_raster_layer_like(calc) + calc.save(result) + actual = result.read_array(0, 0, 4, 2) + + expected = 1.0 + data1 + + assert (expected == actual).all() + +def test_constant_layer_result_rhs_multiply() -> None: + data1 = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]) + layer1 = RasterLayer(gdal_dataset_with_data((0.0, 0.0), 0.02, data1)) + const_layer = ConstantLayer(2.0) + + calc = layer1 * const_layer + + assert calc.area == layer1.area + + result = RasterLayer.empty_raster_layer_like(calc) + calc.save(result) + actual = result.read_array(0, 0, 4, 2) + + expected = data1 * 2.0 + + assert (expected == actual).all() + +def test_constant_layer_result_lhs_multiply() -> None: + data1 = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]) + layer1 = RasterLayer(gdal_dataset_with_data((0.0, 0.0), 0.02, data1)) + const_layer = ConstantLayer(2.0) + result = RasterLayer.empty_raster_layer_like(layer1) + + intersection = RasterLayer.find_intersection([layer1, const_layer]) + const_layer.set_window_for_intersection(intersection) + layer1.set_window_for_intersection(intersection) + + calc = const_layer * layer1 + + assert calc.area == layer1.area + + result = RasterLayer.empty_raster_layer_like(calc) + calc.save(result) + actual = result.read_array(0, 0, 4, 2) + + expected = 2.0 * data1 + + assert (expected == actual).all() + +def test_vector_layers_add() -> None: + data1 = np.array([[1, 2], [3, 4]]) + layer1 = RasterLayer(gdal_dataset_with_data((0.0, 0.0), 1.0, data1)) + + with tempfile.TemporaryDirectory() as tempdir: + path = os.path.join(tempdir, "test.gpkg") + areas = { + (Area(-10.0, 10.0, 0.0, 0.0), 42), + (Area(0.0, 0.0, 10, -10), 43) + } + make_vectors_with_mutlile_ids(areas, path) + + burn_value = 2 + layer2 = VectorLayer.layer_from_file(path, None, layer1.pixel_scale, layer1.projection, burn_value=burn_value) + layer2_total = layer2.sum() + assert layer2_total == ((layer2.window.xsize * layer2.window.ysize) / 2) * burn_value + + calc = layer1 + layer2 + + assert calc.area == layer2.area + + total = calc.sum() + assert total == layer2_total + np.sum(data1) + +def test_vector_layers_multiply() -> None: + data1 = np.array([[1, 2], [3, 4]]) + layer1 = RasterLayer(gdal_dataset_with_data((-1.0, 1.0), 1.0, data1)) + + with tempfile.TemporaryDirectory() as tempdir: + path = os.path.join(tempdir, "test.gpkg") + areas = { + (Area(-10.0, 10.0, 0.0, 0.0), 42), + (Area(0.0, 0.0, 10, -10), 43) + } + make_vectors_with_mutlile_ids(areas, path) + + burn_value = 2 + layer2 = VectorLayer.layer_from_file(path, None, layer1.pixel_scale, layer1.projection, burn_value=burn_value) + layer2_total = layer2.sum() + assert layer2_total == ((layer2.window.xsize * layer2.window.ysize) / 2) * burn_value + + calc = layer1 * layer2 + + assert calc.area == layer1.area + + result = RasterLayer.empty_raster_layer_like(calc) + calc.save(result) + actual = result.read_array(0, 0, 2, 2) + + expected = np.array([[2, 0], [0, 8]]) + assert (expected == actual).all() diff --git a/yirgacheffe/layers/area.py b/yirgacheffe/layers/area.py index 23d8ed4..ea146f2 100644 --- a/yirgacheffe/layers/area.py +++ b/yirgacheffe/layers/area.py @@ -84,8 +84,8 @@ def __init__(self, dataset, name: Optional[str] = None, band: int = 1): ) self._raster_xsize = self.window.xsize - def read_array(self, _xoffset, yoffset, _xsize, ysize) -> Any: + def read_array_with_window(self, xoffset: int, yoffset: int, xsize: int, ysize: int, window: Window) -> Any: if ysize <= 0: raise ValueError("Request dimensions must be positive and non-zero") - offset = self.window.yoff + yoffset + offset = window.yoff + yoffset return self.databand[offset:offset + ysize] diff --git a/yirgacheffe/layers/base.py b/yirgacheffe/layers/base.py index 64f85e1..52d985c 100644 --- a/yirgacheffe/layers/base.py +++ b/yirgacheffe/layers/base.py @@ -211,9 +211,30 @@ def offset_window_by_pixels(self, offset: int): self._window = new_window self._active_area = new_area - def read_array(self, _x: int, _y: int, _xsize: int, _ysize: int) -> Any: + def read_array_with_window(self, _x: int, _y: int, _xsize: int, _ysize: int, window: Window) -> Any: raise NotImplementedError("Must be overridden by subclass") + def read_array_for_area(self, target_area: Area, x: int, y: int, width: int, height: int) -> Any: + + target_window = Window( + xoff=round_down_pixels((target_area.left - self._underlying_area.left) / self._pixel_scale.xstep, + self._pixel_scale.xstep), + yoff=round_down_pixels((self._underlying_area.top - target_area.top) / (self._pixel_scale.ystep * -1.0), + self._pixel_scale.ystep * -1.0), + xsize=round_up_pixels( + (target_area.right - target_area.left) / self._pixel_scale.xstep, + self._pixel_scale.xstep + ), + ysize=round_up_pixels( + (target_area.top - target_area.bottom) / (self._pixel_scale.ystep * -1.0), + (self._pixel_scale.ystep * -1.0) + ), + ) + return self.read_array_with_window(x, y, width, height, target_window) + + def read_array(self, x: int, y: int, width: int, height: int) -> Any: + return self.read_array_with_window(x, y, width, height, self.window) + def latlng_for_pixel(self, x_coord: int, y_coord: int) -> Tuple[float,float]: """Get geo coords for pixel. This is relative to the set view window.""" if "WGS 84" not in self.projection: diff --git a/yirgacheffe/layers/constant.py b/yirgacheffe/layers/constant.py index d4fcced..bf568e4 100644 --- a/yirgacheffe/layers/constant.py +++ b/yirgacheffe/layers/constant.py @@ -3,7 +3,7 @@ import numpy as np from osgeo import gdal -from ..window import Area, PixelScale +from ..window import Area, PixelScale, Window from .base import YirgacheffeLayer from .. import WGS_84_PROJECTION @@ -31,5 +31,11 @@ def check_pixel_scale(self, _scale: PixelScale) -> bool: def set_window_for_intersection(self, _intersection: Area) -> None: pass - def read_array(self, _x: int, _y: int, xsize: int, ysize: int) -> Any: - return np.full((ysize, xsize), self.value) + def read_array(self, _x: int, _y: int, width: int, height: int) -> Any: + return np.full((height, width), self.value) + + def read_array_with_window(self, _x: int, _y: int, width: int, height: int, _window: Window) -> Any: + return np.full((height, width), self.value) + + def read_array_for_area(self, _target_area: Area, x: int, y: int, width: int, height: int) -> Any: + return self.read_array(x, y, width, height) diff --git a/yirgacheffe/layers/group.py b/yirgacheffe/layers/group.py index 86aae8e..eb9b430 100644 --- a/yirgacheffe/layers/group.py +++ b/yirgacheffe/layers/group.py @@ -89,7 +89,8 @@ def reset_window(self) -> None: except AttributeError: pass # called from Base constructor before we've added the extra field - def read_array(self, xoffset: int, yoffset: int, xsize: int, ysize: int) -> Any: + def read_array_with_window(self, xoffset: int, yoffset: int, xsize: int, ysize: int, window: Window) -> Any: + if (xsize <= 0) or (ysize <= 0): raise ValueError("Request dimensions must be positive and non-zero") @@ -97,8 +98,8 @@ def read_array(self, xoffset: int, yoffset: int, xsize: int, ysize: int) -> Any: assert scale is not None target_window = Window( - self.window.xoff + xoffset, - self.window.yoff + yoffset, + window.xoff + xoffset, + window.yoff + yoffset, xsize, ysize ) @@ -138,8 +139,8 @@ def read_array(self, xoffset: int, yoffset: int, xsize: int, ysize: int) -> Any: intersection.xsize, intersection.ysize ) - result_x_offset = (intersection.xoff - xoffset) - self.window.xoff - result_y_offset = (intersection.yoff - yoffset) - self.window.yoff + result_x_offset = (intersection.xoff - xoffset) - window.xoff + result_y_offset = (intersection.yoff - yoffset) - window.yoff result[ result_y_offset:result_y_offset + intersection.ysize, result_x_offset:result_x_offset + intersection.xsize @@ -198,8 +199,7 @@ class TiledGroupLayer(GroupLayer): * You can have missing tiles, and it'll fill in zeros. * The tiles can overlap - e.g., JRC Annual Change tiles all overlap by a few pixels on all edges. """ - - def read_array(self, xoffset: int, yoffset: int, xsize: int, ysize: int) -> Any: + def read_array_with_window(self, xoffset: int, yoffset: int, xsize: int, ysize: int, window: Window) -> Any: if (xsize <= 0) or (ysize <= 0): raise ValueError("Request dimensions must be positive and non-zero") @@ -207,8 +207,8 @@ def read_array(self, xoffset: int, yoffset: int, xsize: int, ysize: int) -> Any: assert scale is not None target_window = Window( - self.window.xoff + xoffset, - self.window.yoff + yoffset, + window.xoff + xoffset, + window.yoff + yoffset, xsize, ysize ) @@ -233,8 +233,8 @@ def read_array(self, xoffset: int, yoffset: int, xsize: int, ysize: int) -> Any: intersection.xsize, intersection.ysize ) - result_x_offset = (intersection.xoff - xoffset) - self.window.xoff - result_y_offset = (intersection.yoff - yoffset) - self.window.yoff + result_x_offset = (intersection.xoff - xoffset) - window.xoff + result_y_offset = (intersection.yoff - yoffset) - window.yoff partials.append(TileData(data, result_x_offset, result_y_offset)) sorted_partials = sorted(partials) diff --git a/yirgacheffe/layers/h3layer.py b/yirgacheffe/layers/h3layer.py index 7cbed01..da0b24a 100644 --- a/yirgacheffe/layers/h3layer.py +++ b/yirgacheffe/layers/h3layer.py @@ -1,4 +1,5 @@ from math import ceil, floor +from typing import Any import h3 import numpy as np @@ -77,7 +78,7 @@ def __init__(self, cell_id: str, scale: PixelScale, projection: str): def datatype(self) -> int: return gdal.GDT_CFloat64 - def read_array(self, xoffset, yoffset, xsize, ysize): + def read_array_with_window(self, xoffset: int, yoffset: int, xsize: int, ysize: int, window: Window) -> Any: if (xsize <= 0) or (ysize <= 0): raise ValueError("Request dimensions must be positive and non-zero") @@ -87,8 +88,8 @@ def read_array(self, xoffset, yoffset, xsize, ysize): if max_width_projection < 180: target_window = Window( - self.window.xoff + xoffset, - self.window.yoff + yoffset, + window.xoff + xoffset, + window.yoff + yoffset, xsize, ysize ) diff --git a/yirgacheffe/layers/rasters.py b/yirgacheffe/layers/rasters.py index 13e31b5..cfc382c 100644 --- a/yirgacheffe/layers/rasters.py +++ b/yirgacheffe/layers/rasters.py @@ -81,7 +81,7 @@ def empty_raster_layer( @staticmethod def empty_raster_layer_like( - layer: YirgacheffeLayer, + layer: Any, filename: Optional[str]=None, area: Optional[Area]=None, datatype: Optional[int]=None, @@ -93,17 +93,19 @@ def empty_raster_layer_like( ) -> RasterLayerT: width = layer.window.xsize height = layer.window.ysize - geo_transform = layer.geo_transform - if area is not None: - scale = layer.pixel_scale - if scale is None: - raise ValueError("Can not work out area without explicit pixel scale") - abs_xstep, abs_ystep = abs(scale.xstep), abs(scale.ystep) - width = round_up_pixels((area.right - area.left) / abs_xstep, abs_xstep) - height = round_up_pixels((area.top - area.bottom) / abs_ystep, abs_ystep) - geo_transform = ( - area.left, scale.xstep, 0.0, area.top, 0.0, scale.ystep - ) + if area is None: + area = layer.area + assert area is not None + + scale = layer.pixel_scale + if scale is None: + raise ValueError("Can not work out area without explicit pixel scale") + abs_xstep, abs_ystep = abs(scale.xstep), abs(scale.ystep) + width = round_up_pixels((area.right - area.left) / abs_xstep, abs_xstep) + height = round_up_pixels((area.top - area.bottom) / abs_ystep, abs_ystep) + geo_transform = ( + area.left, scale.xstep, 0.0, area.top, 0.0, scale.ystep + ) options = [] if threads is not None: @@ -282,7 +284,7 @@ def datatype(self) -> int: assert self._dataset return self._dataset.GetRasterBand(1).DataType - def read_array(self, xoffset, yoffset, xsize, ysize) -> Any: + def read_array_with_window(self, xoffset: int, yoffset: int, xsize: int, ysize: int, window: Window) -> Any: if self._dataset is None: self._unpark() assert self._dataset @@ -293,8 +295,8 @@ def read_array(self, xoffset, yoffset, xsize, ysize) -> Any: # if we're dealing with an intersection, we can just read the data directly, # otherwise we need to read the data into another array with suitable padding target_window = Window( - self.window.xoff + xoffset, - self.window.yoff + yoffset, + window.xoff + xoffset, + window.yoff + yoffset, xsize, ysize ) @@ -320,12 +322,12 @@ def read_array(self, xoffset, yoffset, xsize, ysize) -> Any: subset, ( ( - (intersection.yoff - self.window.yoff) - yoffset, - (ysize - ((intersection.yoff - self.window.yoff) + intersection.ysize)) + yoffset, + (intersection.yoff - window.yoff) - yoffset, + (ysize - ((intersection.yoff - window.yoff) + intersection.ysize)) + yoffset, ), ( - (intersection.xoff - self.window.xoff) - xoffset, - xsize - ((intersection.xoff - self.window.xoff) + intersection.xsize) + xoffset, + (intersection.xoff - window.xoff) - xoffset, + xsize - ((intersection.xoff - window.xoff) + intersection.xsize) + xoffset, ) ), 'constant' diff --git a/yirgacheffe/layers/rescaled.py b/yirgacheffe/layers/rescaled.py index 610f5e6..0814eb9 100644 --- a/yirgacheffe/layers/rescaled.py +++ b/yirgacheffe/layers/rescaled.py @@ -3,7 +3,7 @@ from skimage import transform -from ..window import PixelScale +from ..window import PixelScale, Window from .rasters import RasterLayer, YirgacheffeLayer @@ -54,12 +54,13 @@ def _park(self): def _unpark(self): self._src._unpark() - def read_array(self, xoffset, yoffset, xsize, ysize) -> Any: + def read_array_with_window(self, xoffset: int, yoffset: int, xsize: int, ysize: int, window: Window) -> Any: + # to avoid aliasing issues, we try to scale to the nearest pixel # and recrop when scaling bigger - xoffset = xoffset + self.window.xoff - yoffset = yoffset + self.window.yoff + xoffset = xoffset + window.xoff + yoffset = yoffset + window.yoff src_x_offset = floor(xoffset / self._x_scale) src_y_offset = floor(yoffset / self._y_scale) diff --git a/yirgacheffe/layers/vectors.py b/yirgacheffe/layers/vectors.py index cc82292..9936a66 100644 --- a/yirgacheffe/layers/vectors.py +++ b/yirgacheffe/layers/vectors.py @@ -1,6 +1,7 @@ import os from math import ceil, floor from typing import Any, Optional, Tuple, Union +from typing_extensions import NotRequired from osgeo import gdal, ogr @@ -331,18 +332,18 @@ def _unpark(self): def datatype(self) -> int: return self._datatype - def read_array(self, xoffset, yoffset, xsize, ysize): + def read_array_for_area(self, target_area: Area, x: int, y: int, width: int, height: int) -> Any: if self._original is None: self._unpark() - if (xsize <= 0) or (ysize <= 0): + if (width <= 0) or (height <= 0): raise ValueError("Request dimensions must be positive and non-zero") # I did try recycling this object to save allocation/dealloction, but in practice it # seemed to only make things slower (particularly as you need to zero the memory each time yourself) dataset = gdal.GetDriverByName('mem').Create( 'mem', - xsize, - ysize, + width, + height, 1, self.datatype, [] @@ -352,10 +353,10 @@ def read_array(self, xoffset, yoffset, xsize, ysize): dataset.SetProjection(self._projection) dataset.SetGeoTransform([ - self._active_area.left + (xoffset * self._pixel_scale.xstep), + target_area.left + (x * self._pixel_scale.xstep), self._pixel_scale.xstep, 0.0, - self._active_area.top + (yoffset * self._pixel_scale.ystep), + target_area.top + (y * self._pixel_scale.ystep), 0.0, self._pixel_scale.ystep ]) @@ -366,5 +367,11 @@ def read_array(self, xoffset, yoffset, xsize, ysize): else: raise ValueError("Burn value for layer should be number or field name") - res = dataset.ReadAsArray(0, 0, xsize, ysize) + res = dataset.ReadAsArray(0, 0, width, height) return res + + def read_array_with_window(self, _x, _y, _width, _height, _window) -> Any: + assert NotRequired + + def read_array(self, x: int, y: int, width: int, height: int) -> Any: + return self.read_array_for_area(self._active_area, x, y, width, height) diff --git a/yirgacheffe/operators.py b/yirgacheffe/operators.py index 379e901..e0eb7ae 100644 --- a/yirgacheffe/operators.py +++ b/yirgacheffe/operators.py @@ -2,6 +2,7 @@ import time import multiprocessing import types +from enum import Enum from multiprocessing import Semaphore, Process from multiprocessing.managers import SharedMemoryManager @@ -10,8 +11,15 @@ from dill import dumps, loads from . import constants -from .window import Window +from .rounding import are_pixel_scales_equal_enough, round_up_pixels, round_down_pixels +from .window import Area, PixelScale, Window +class WindowOperation(Enum): + NONE = 1 + UNION = 2 + INTERSECTION = 3 + LEFT = 4 + RIGHT = 5 class LayerConstant: def __init__(self, val): @@ -20,62 +28,63 @@ def __init__(self, val): def __str__(self): return str(self.val) - def _eval(self, _index, _step): + def _eval(self, _area, _index, _step, _target_window): return self.val class LayerMathMixin: def __add__(self, other): - return LayerOperation(self, np.ndarray.__add__, other) + return LayerOperation(self, np.ndarray.__add__, other, window_op=WindowOperation.UNION) def __sub__(self, other): - return LayerOperation(self, np.ndarray.__sub__, other) + return LayerOperation(self, np.ndarray.__sub__, other, window_op=WindowOperation.UNION) def __mul__(self, other): - return LayerOperation(self, np.ndarray.__mul__, other) + return LayerOperation(self, np.ndarray.__mul__, other, window_op=WindowOperation.INTERSECTION) def __truediv__(self, other): - return LayerOperation(self, np.ndarray.__truediv__, other) + return LayerOperation(self, np.ndarray.__truediv__, other, window_op=WindowOperation.INTERSECTION) def __pow__(self, other): - return LayerOperation(self, np.ndarray.__pow__, other) + return LayerOperation(self, np.ndarray.__pow__, other, window_op=WindowOperation.UNION) def __eq__(self, other): - return LayerOperation(self, np.ndarray.__eq__, other) + return LayerOperation(self, np.ndarray.__eq__, other, window_op=WindowOperation.INTERSECTION) def __ne__(self, other): - return LayerOperation(self, np.ndarray.__ne__, other) + return LayerOperation(self, np.ndarray.__ne__, other, window_op=WindowOperation.UNION) def __lt__(self, other): - return LayerOperation(self, np.ndarray.__lt__, other) + return LayerOperation(self, np.ndarray.__lt__, other, window_op=WindowOperation.UNION) def __le__(self, other): - return LayerOperation(self, np.ndarray.__le__, other) + return LayerOperation(self, np.ndarray.__le__, other, window_op=WindowOperation.UNION) def __gt__(self, other): - return LayerOperation(self, np.ndarray.__gt__, other) + return LayerOperation(self, np.ndarray.__gt__, other, window_op=WindowOperation.UNION) def __ge__(self, other): - return LayerOperation(self, np.ndarray.__ge__, other) + return LayerOperation(self, np.ndarray.__ge__, other, window_op=WindowOperation.UNION) def __and__(self, other): - return LayerOperation(self, np.ndarray.__and__, other) + return LayerOperation(self, np.ndarray.__and__, other, window_op=WindowOperation.INTERSECTION) def __or__(self, other): - return LayerOperation(self, np.ndarray.__or__, other) + return LayerOperation(self, np.ndarray.__or__, other, window_op=WindowOperation.UNION) - def _eval(self, index, step, target_window=None): + def _eval(self, area, index, step, target_window=None): try: - window = self.window - return self.read_array(0, index, window.xsize, step) + window = self.window if target_window is None else target_window + return self.read_array_for_area(area, 0, index, window.xsize, step) except AttributeError: - return self.read_array(0, index, target_window.xsize if target_window else 1, step) + return self.read_array_for_area(area, 0, index, target_window.xsize if target_window else 1, step) def nan_to_num(self, nan=0, posinf=None, neginf=None): return LayerOperation( self, np.nan_to_num, + window_op=WindowOperation.NONE, copy=False, nan=nan, posinf=posinf, @@ -86,37 +95,43 @@ def isin(self, test_elements): return LayerOperation( self, np.isin, + window_op=WindowOperation.NONE, test_elements=test_elements, ) def log(self): return LayerOperation( self, - np.log + np.log, + window_op=WindowOperation.NONE, ) def log2(self): return LayerOperation( self, - np.log2 + np.log2, + window_op=WindowOperation.NONE, ) def log10(self): return LayerOperation( self, - np.log10 + np.log10, + window_op=WindowOperation.NONE, ) def exp(self): return LayerOperation( self, - np.exp + np.exp, + window_op=WindowOperation.NONE, ) def exp2(self): return LayerOperation( self, - np.exp2 + np.exp2, + window_op=WindowOperation.NONE, ) def clip(self, min=None, max=None): # pylint: disable=W0622 @@ -127,6 +142,7 @@ def clip(self, min=None, max=None): # pylint: disable=W0622 return LayerOperation( self, np.clip, + window_op=WindowOperation.NONE, a_min=min, a_max=max, ) @@ -170,6 +186,7 @@ def maximum(a, b): a, np.maximum, b, + window_op=WindowOperation.UNION, ) @staticmethod @@ -178,11 +195,13 @@ def minimum(a, b): a, np.minimum, rhs=b, + window_op=WindowOperation.UNION, ) - def __init__(self, lhs, operator=None, rhs=None, other=None, **kwargs): + def __init__(self, lhs, operator=None, rhs=None, other=None, window_op=WindowOperation.NONE, **kwargs): self.ystep = constants.YSTEP self.kwargs = kwargs + self.window_op = window_op if lhs is None: raise ValueError("LHS on operation should not be none") @@ -199,6 +218,8 @@ def __init__(self, lhs, operator=None, rhs=None, other=None, **kwargs): else: raise ValueError("Numpy arrays are no allowed") else: + if not are_pixel_scales_equal_enough([lhs.pixel_scale, rhs.pixel_scale]): + raise ValueError("Not all layers are at the same pixel scale") self.rhs = rhs else: self.rhs = None @@ -212,6 +233,8 @@ def __init__(self, lhs, operator=None, rhs=None, other=None, **kwargs): else: raise ValueError("Numpy arrays are no allowed") else: + if not are_pixel_scales_equal_enough([lhs.pixel_scale, other.pixel_scale]): + raise ValueError("Not all layers are at the same pixel scale") self.other = other else: self.other = None @@ -241,30 +264,109 @@ def __setstate__(self, state): del state['operator_dill'] self.__dict__.update(state) + @property + def area(self) -> Area: + # The type().__name__ here is to avoid a circular import dependancy + lhs_area = self.lhs.area if not type(self.lhs).__name__ == "ConstantLayer" else None + try: + rhs_area = self.rhs.area if not type(self.rhs).__name__ == "ConstantLayer" else None + except AttributeError: + rhs_area = None + try: + other_area = self.other.area if not type(self.other).__name__ == "ConstantLayer" else None + except AttributeError: + other_area = None + + all_areas = [] + if lhs_area is not None: + all_areas.append(lhs_area) + if rhs_area is not None: + all_areas.append(rhs_area) + if other_area is not None: + all_areas.append(other_area) + + match self.window_op: + case WindowOperation.NONE: + return all_areas[0] + case WindowOperation.LEFT: + return lhs_area + case WindowOperation.RIGHT: + assert rhs_area is not None + return rhs_area + case WindowOperation.INTERSECTION: + intersection = Area( + left=max(x.left for x in all_areas), + top=min(x.top for x in all_areas), + right=min(x.right for x in all_areas), + bottom=max(x.bottom for x in all_areas) + ) + if (intersection.left >= intersection.right) or (intersection.bottom >= intersection.top): + raise ValueError('No intersection possible') + return intersection + case WindowOperation.UNION: + return Area( + left=min(x.left for x in all_areas), + top=max(x.top for x in all_areas), + right=max(x.right for x in all_areas), + bottom=min(x.bottom for x in all_areas) + ) + + @property + def pixel_scale(self) -> PixelScale: + # Because we test at construction that pixel scales for RHS/other are roughly equal, + # I believe this should be sufficient... + try: + pixel_scale = self.lhs.pixel_scale + except AttributeError: + pixel_scale = None + + if pixel_scale is None: + return self.rhs.pixel_scale + return pixel_scale + @property def window(self) -> Window: + pixel_scale = self.pixel_scale + area = self.area + + return Window( + xoff=round_down_pixels(area.left / pixel_scale.xstep, pixel_scale.xstep), + yoff=round_down_pixels(area.top / (pixel_scale.ystep * -1.0), pixel_scale.ystep * -1.0), + xsize=round_up_pixels( + (area.right - area.left) / pixel_scale.xstep, pixel_scale.xstep + ), + ysize=round_up_pixels( + (area.top - area.bottom) / (pixel_scale.ystep * -1.0), + (pixel_scale.ystep * -1.0) + ), + ) + + @property + def datatype(self): + # TODO: Work out how to indicate type promotion via numpy + return self.lhs.datatype + + @property + def projection(self): try: - return self.lhs.window + return self.lhs.projection except AttributeError: - # If neither side had a window attribute then - # the operation doesn't have anything useful to - # say, so let the exception propagate up - return self.rhs.window + return self.rhs.projection - def _eval(self, index, step): # pylint: disable=W0221 - lhs_data = self.lhs._eval(index, step) + def _eval(self, area, index, step, target_window=None): + lhs_data = self.lhs._eval(area, index, step, target_window) if self.operator is None: return lhs_data if self.other is not None: assert self.rhs is not None - rhs_data = self.rhs._eval(index, step) - other_data = self.other._eval(index, step) + rhs_data = self.rhs._eval(area, index, step, target_window) + other_data = self.other._eval(area, index, step, target_window) return self.operator(lhs_data, rhs_data, other_data, **self.kwargs) if self.rhs is not None: - rhs_data = self.rhs._eval(index, step) + rhs_data = self.rhs._eval(area, index, step, target_window) return self.operator(lhs_data, rhs_data, **self.kwargs) return self.operator(lhs_data, **self.kwargs) @@ -280,7 +382,7 @@ def sum(self): step=self.ystep if yoffset+step > computation_window.ysize: step = computation_window.ysize - yoffset - chunk = self._eval(yoffset, step) + chunk = self._eval(self.area, yoffset, step, computation_window) res += np.sum(chunk.astype(np.float64)) return res @@ -291,7 +393,7 @@ def min(self): step=self.ystep if yoffset+step > computation_window.ysize: step = computation_window.ysize - yoffset - chunk = self._eval(yoffset, step) + chunk = self._eval(self.area, yoffset, step, computation_window) chunk_min = np.min(chunk) if (res is None) or (res > chunk_min): res = chunk_min @@ -304,7 +406,7 @@ def max(self): step=self.ystep if yoffset+step > computation_window.ysize: step = computation_window.ysize - yoffset - chunk = self._eval(yoffset, step) + chunk = self._eval(self.area, yoffset, step, computation_window) chunk_max = np.max(chunk) if (res is None) or (chunk_max > res): res = chunk_max @@ -338,7 +440,7 @@ def save(self, destination_layer, and_sum=False, callback=None, band=1): step=self.ystep if yoffset+step > computation_window.ysize: step = computation_window.ysize - yoffset - chunk = self._eval(yoffset, step) + chunk = self._eval(self.area, yoffset, step, computation_window) if isinstance(chunk, (float, int)): chunk = np.full((step, destination_window.xsize), chunk) band.WriteArray( @@ -372,7 +474,7 @@ def _parallel_worker(self, index, shared_mem, sem, np_dtype, width, input_queue, break yoffset, step = task - arr[:step] = self._eval(yoffset, step) + arr[:step] = self._eval(self.area, yoffset, step) output_queue.put((index, yoffset, step)) @@ -505,10 +607,12 @@ def parallel_save(self, destination_layer, and_sum=False, callback=None, paralle class ShaderStyleOperation(LayerOperation): - def _eval(self, index, step): - lhs_data = self.lhs._eval(index, step, self.window) + def _eval(self, area, index, step, target_window=None): + if target_window is None: + target_window = self.window + lhs_data = self.lhs._eval(area, index, step, target_window) if self.rhs is not None: - rhs_data = self.rhs._eval(index, step, self.window) + rhs_data = self.rhs._eval(area, index, step, target_window) else: rhs_data = None