Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GeoDataFrame support to Pipeline #173

Merged
merged 4 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ dependencies:
- pdal
- pytest
- meshio
- pandas
- geopandas
22 changes: 20 additions & 2 deletions src/pdal/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
except ModuleNotFoundError: # pragma: no cover
DataFrame = None

try:
from geopandas import GeoDataFrame, points_from_xy
except ModuleNotFoundError: # pragma: no cover
GeoDataFrame = points_from_xy = None

from . import drivers, libpdalpython

LogLevelToPDAL = {
Expand Down Expand Up @@ -45,7 +50,7 @@ def __init__(

# Convert our data frames to Numpy Structured Arrays
if dataframes:
arrays = [df.to_records() for df in dataframes]
arrays = [df.to_records() if not "geometry" in df.columns else df.drop(columns=["geometry"]).to_records() for df in dataframes]

super().__init__()
self._stages: List[Stage] = []
Expand Down Expand Up @@ -124,13 +129,26 @@ def get_meshio(self, idx: int) -> Optional[Mesh]:
[("triangle", np.stack((mesh["A"], mesh["B"], mesh["C"]), 1))],
)


def get_dataframe(self, idx: int) -> Optional[DataFrame]:
if DataFrame is None:
raise RuntimeError("Pandas support requires Pandas to be installed")

return DataFrame(self.arrays[idx])

def get_geodataframe(self, idx: int, xyz: bool=False, crs: Any=None) -> Optional[GeoDataFrame]:
if GeoDataFrame is None:
raise RuntimeError("GeoPandas support requires GeoPandas to be installed")
df = DataFrame(self.arrays[idx])
coords = [df["X"], df["Y"], df["Z"]] if xyz else [df["X"], df["Y"]]
geometry = points_from_xy(*coords)
gdf = GeoDataFrame(
df,
geometry=geometry,
crs=crs,
)
df = coords = geometry = None
return gdf

def _get_json(self) -> str:
return self.toJSON()

Expand Down
59 changes: 59 additions & 0 deletions test/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,65 @@ def test_load(self):
assert data["Intensity"].sum() == 57684


class TestGeoDataFrame:

@pytest.mark.skipif(
not pdal.pipeline.GeoDataFrame,
reason="geopandas is not available",
)
def test_fetch(self):
r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
p = r.pipeline()
p.execute()
record_count = p.arrays[0].shape[0]
dimension_count = len(p.arrays[0].dtype)
gdf = p.get_geodataframe(0)
gdf_xyz = p.get_geodataframe(0, xyz=True)
gdf_crs = p.get_geodataframe(0, crs="EPSG:4326")
assert len(gdf) == record_count
assert len(gdf.columns) == dimension_count + 1
assert isinstance(gdf, pdal.pipeline.GeoDataFrame)
assert gdf.geometry.is_valid.all()
assert not gdf.geometry.is_empty.any()
assert gdf.crs is None
assert gdf.geometry.z.isna().all()
assert not gdf_xyz.geometry.z.isna().any()
assert gdf_crs.crs.srs == "EPSG:4326"

@pytest.mark.skipif(
not pdal.pipeline.GeoDataFrame,
reason="geopandas is not available",
)
def test_load(self):
r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
p = r.pipeline()
p.execute()
data = p.arrays[0]
gdf = pdal.pipeline.GeoDataFrame(
data,
geometry=pdal.pipeline.points_from_xy(data["X"], data["Y"], data["Z"])
)
dataframes = [gdf, gdf, gdf]
filter_intensity = """{
"pipeline":[
{
"type":"filters.range",
"limits":"Intensity[100:300)"
}
]
}"""
p = pdal.Pipeline(filter_intensity, dataframes = dataframes)
p.execute()
arrays = p.arrays
assert len(arrays) == 3

# We copied the array three times. Sum the Intensity values
# post filtering to see if we had our intended effect
for data in arrays:
assert len(data) == 387
assert data["Intensity"].sum() == 57684


class TestPipelineIterator:
@pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
def test_non_streamable(self, filename):
Expand Down
Loading