From 5066e81cc516865a60b26b94ea4bb139942df16d Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 4 Feb 2025 22:03:26 +0100 Subject: [PATCH 1/8] gdal vector filter: add --where and --fields; add documentation --- apps/gdalalg_vector_filter.cpp | 274 +++++++++++++++++- apps/gdalalg_vector_filter.h | 2 + .../utilities/test_gdalalg_vector_filter.py | 110 ++++++- doc/source/conf.py | 7 + doc/source/programs/gdal_vector.rst | 1 + doc/source/programs/gdal_vector_filter.rst | 132 +++++++++ doc/source/programs/gdal_vector_pipeline.rst | 6 + doc/source/programs/index.rst | 2 + .../programs/migration_guide_to_gdal_cli.rst | 13 +- 9 files changed, 541 insertions(+), 6 deletions(-) create mode 100644 doc/source/programs/gdal_vector_filter.rst diff --git a/apps/gdalalg_vector_filter.cpp b/apps/gdalalg_vector_filter.cpp index 50adcc6ca489..414a34cf2373 100644 --- a/apps/gdalalg_vector_filter.cpp +++ b/apps/gdalalg_vector_filter.cpp @@ -14,6 +14,9 @@ #include "gdal_priv.h" #include "ogrsf_frmts.h" +#include "ogr_p.h" + +#include //! @cond Doxygen_Suppress @@ -30,8 +33,237 @@ GDALVectorFilterAlgorithm::GDALVectorFilterAlgorithm(bool standaloneStep) standaloneStep) { AddBBOXArg(&m_bbox); + AddArg("where", 0, + _("Attribute query in a restricted form of the queries used in the " + "SQL WHERE statement"), + &m_where) + .SetReadFromFileAtSyntaxAllowed() + .SetMetaVar("|@") + .SetRemoveSQLCommentsEnabled(); + AddArg("fields", 0, _("Selected fields"), &m_selectedFields); } +namespace +{ + +/************************************************************************/ +/* GDALVectorFilterAlgorithmDataset */ +/************************************************************************/ + +class GDALVectorFilterAlgorithmDataset final : public GDALDataset +{ + std::vector> m_layers{}; + + public: + GDALVectorFilterAlgorithmDataset() = default; + + void AddLayer(std::unique_ptr poLayer) + { + m_layers.push_back(std::move(poLayer)); + } + + int GetLayerCount() override + { + return static_cast(m_layers.size()); + } + + OGRLayer *GetLayer(int idx) override + { + return idx >= 0 && idx < GetLayerCount() ? m_layers[idx].get() + : nullptr; + } +}; + +/************************************************************************/ +/* GDALVectorFilterAlgorithmLayer */ +/************************************************************************/ + +class GDALVectorFilterAlgorithmLayer final : public OGRLayer +{ + private: + bool m_bIsOK = true; + OGRLayer *const m_poSrcLayer; + OGRFeatureDefn *const m_poFeatureDefn = nullptr; + std::vector m_anMapSrcFieldsToDstFields{}; + std::vector m_anMapDstGeomFieldsToSrcGeomFields{}; + + CPL_DISALLOW_COPY_ASSIGN(GDALVectorFilterAlgorithmLayer) + + std::unique_ptr TranslateFeature(OGRFeature *poSrcFeature) const + { + auto poFeature = std::make_unique(m_poFeatureDefn); + poFeature->SetFID(poSrcFeature->GetFID()); + const auto styleString = poSrcFeature->GetStyleString(); + if (styleString) + poFeature->SetStyleString(styleString); + poFeature->SetFieldsFrom( + poSrcFeature, m_anMapSrcFieldsToDstFields.data(), false, false); + int iDstGeomField = 0; + for (int nSrcGeomField : m_anMapDstGeomFieldsToSrcGeomFields) + { + poFeature->SetGeomFieldDirectly( + iDstGeomField, poSrcFeature->StealGeometry(nSrcGeomField)); + ++iDstGeomField; + } + return poFeature; + } + + public: + GDALVectorFilterAlgorithmLayer( + OGRLayer *poSrcLayer, const std::vector &selectedFields, + bool bStrict) + : m_poSrcLayer(poSrcLayer), + m_poFeatureDefn(new OGRFeatureDefn(poSrcLayer->GetName())) + { + SetDescription(poSrcLayer->GetDescription()); + m_poFeatureDefn->SetGeomType(wkbNone); + m_poFeatureDefn->Reference(); + + std::set oSetSelFields; + std::set oSetSelFieldsUC; + for (const std::string &osFieldName : selectedFields) + { + oSetSelFields.insert(osFieldName); + oSetSelFieldsUC.insert(CPLString(osFieldName).toupper()); + } + + std::set oSetUsedSetFieldsUC; + + const auto poSrcLayerDefn = poSrcLayer->GetLayerDefn(); + for (int i = 0; i < poSrcLayerDefn->GetFieldCount(); ++i) + { + const auto poSrcFieldDefn = poSrcLayerDefn->GetFieldDefn(i); + auto oIter = oSetSelFieldsUC.find( + CPLString(poSrcFieldDefn->GetNameRef()).toupper()); + if (oIter != oSetSelFieldsUC.end()) + { + m_anMapSrcFieldsToDstFields.push_back( + m_poFeatureDefn->GetFieldCount()); + OGRFieldDefn oDstFieldDefn(*poSrcFieldDefn); + m_poFeatureDefn->AddFieldDefn(&oDstFieldDefn); + oSetUsedSetFieldsUC.insert(*oIter); + } + else + { + m_anMapSrcFieldsToDstFields.push_back(-1); + } + } + + for (int i = 0; i < poSrcLayerDefn->GetGeomFieldCount(); ++i) + { + const auto poSrcFieldDefn = poSrcLayerDefn->GetGeomFieldDefn(i); + auto oIter = oSetSelFieldsUC.find( + CPLString(poSrcFieldDefn->GetNameRef()).toupper()); + if (oIter != oSetSelFieldsUC.end()) + { + m_anMapDstGeomFieldsToSrcGeomFields.push_back(i); + OGRGeomFieldDefn oDstFieldDefn(*poSrcFieldDefn); + m_poFeatureDefn->AddGeomFieldDefn(&oDstFieldDefn); + oSetUsedSetFieldsUC.insert(*oIter); + } + } + + auto oIter = oSetSelFieldsUC.find( + CPLString(OGR_GEOMETRY_DEFAULT_NON_EMPTY_NAME).toupper()); + if (m_poFeatureDefn->GetGeomFieldCount() == 0 && + oIter != oSetSelFieldsUC.end() && + poSrcLayerDefn->GetGeomFieldCount() == 1) + { + const auto poSrcFieldDefn = poSrcLayerDefn->GetGeomFieldDefn(0); + m_anMapDstGeomFieldsToSrcGeomFields.push_back(0); + OGRGeomFieldDefn oDstFieldDefn(*poSrcFieldDefn); + m_poFeatureDefn->AddGeomFieldDefn(&oDstFieldDefn); + oSetUsedSetFieldsUC.insert(*oIter); + } + + if (oSetUsedSetFieldsUC.size() != oSetSelFields.size()) + { + for (const std::string &osName : oSetSelFields) + { + if (!cpl::contains(oSetUsedSetFieldsUC, + CPLString(osName).toupper())) + { + CPLError(bStrict ? CE_Failure : CE_Warning, CPLE_AppDefined, + "Field '%s' does not exist in layer '%s'.%s", + osName.c_str(), poSrcLayer->GetDescription(), + bStrict ? "" : " It will be ignored"); + if (bStrict) + m_bIsOK = false; + } + } + } + } + + ~GDALVectorFilterAlgorithmLayer() override + { + if (m_poFeatureDefn) + m_poFeatureDefn->Dereference(); + } + + bool IsOK() const + { + return m_bIsOK; + } + + OGRFeatureDefn *GetLayerDefn() override + { + return m_poFeatureDefn; + } + + GIntBig GetFeatureCount(int bForce) override + { + return m_poSrcLayer->GetFeatureCount(bForce); + } + + OGRErr GetExtent(OGREnvelope *psExtent, int bForce) override + { + return m_poSrcLayer->GetExtent(psExtent, bForce); + } + + OGRErr GetExtent(int iGeomField, OGREnvelope *psExtent, int bForce) override + { + return m_poSrcLayer->GetExtent(iGeomField, psExtent, bForce); + } + + void ResetReading() override + { + m_poSrcLayer->ResetReading(); + } + + OGRFeature *GetNextFeature() override + { + auto poSrcFeature = + std::unique_ptr(m_poSrcLayer->GetNextFeature()); + if (!poSrcFeature) + return nullptr; + return TranslateFeature(poSrcFeature.get()).release(); + } + + OGRFeature *GetFeature(GIntBig nFID) override + { + auto poSrcFeature = + std::unique_ptr(m_poSrcLayer->GetFeature(nFID)); + if (!poSrcFeature) + return nullptr; + return TranslateFeature(poSrcFeature.get()).release(); + } + + int TestCapability(const char *pszCap) override + { + if (EQUAL(pszCap, OLCRandomRead) || EQUAL(pszCap, OLCCurveGeometries) || + EQUAL(pszCap, OLCMeasuredGeometries) || + EQUAL(pszCap, OLCZGeometries) || + EQUAL(pszCap, OLCFastFeatureCount) || + EQUAL(pszCap, OLCFastGetExtent) || EQUAL(pszCap, OLCStringsAsUTF8)) + { + return m_poSrcLayer->TestCapability(pszCap); + } + return false; + } +}; + +} // namespace + /************************************************************************/ /* GDALVectorFilterAlgorithm::RunStep() */ /************************************************************************/ @@ -42,6 +274,9 @@ bool GDALVectorFilterAlgorithm::RunStep(GDALProgressFunc, void *) CPLAssert(m_outputDataset.GetName().empty()); CPLAssert(!m_outputDataset.GetDatasetRef()); + auto poSrcDS = m_inputDataset.GetDatasetRef(); + const int nLayerCount = poSrcDS->GetLayerCount(); + bool ret = true; if (m_bbox.size() == 4) { @@ -49,8 +284,6 @@ bool GDALVectorFilterAlgorithm::RunStep(GDALProgressFunc, void *) const double ymin = m_bbox[1]; const double xmax = m_bbox[2]; const double ymax = m_bbox[3]; - auto poSrcDS = m_inputDataset.GetDatasetRef(); - const int nLayerCount = poSrcDS->GetLayerCount(); for (int i = 0; i < nLayerCount; ++i) { auto poSrcLayer = poSrcDS->GetLayer(i); @@ -60,7 +293,42 @@ bool GDALVectorFilterAlgorithm::RunStep(GDALProgressFunc, void *) } } - if (ret) + if (ret && !m_where.empty()) + { + for (int i = 0; i < nLayerCount; ++i) + { + auto poSrcLayer = poSrcDS->GetLayer(i); + ret = ret && (poSrcLayer != nullptr); + if (ret) + ret = poSrcLayer->SetAttributeFilter(m_where.c_str()) == + OGRERR_NONE; + } + } + + if (ret && !m_selectedFields.empty()) + { + auto outDS = std::make_unique(); + outDS->SetDescription(poSrcDS->GetDescription()); + + for (int i = 0; i < nLayerCount; ++i) + { + auto poSrcLayer = poSrcDS->GetLayer(i); + ret = ret && (poSrcLayer != nullptr); + if (ret) + { + auto poLayer = std::make_unique( + poSrcLayer, m_selectedFields, /* bStrict = */ true); + ret = poLayer->IsOK(); + if (ret) + { + outDS->AddLayer(std::move(poLayer)); + } + } + } + + m_outputDataset.Set(std::move(outDS)); + } + else if (ret) { m_outputDataset.Set(m_inputDataset.GetDatasetRef()); } diff --git a/apps/gdalalg_vector_filter.h b/apps/gdalalg_vector_filter.h index f2df3ec32b25..b43bd0cf8ca3 100644 --- a/apps/gdalalg_vector_filter.h +++ b/apps/gdalalg_vector_filter.h @@ -41,6 +41,8 @@ class GDALVectorFilterAlgorithm /* non final */ bool RunStep(GDALProgressFunc pfnProgress, void *pProgressData) override; std::vector m_bbox{}; + std::string m_where{}; + std::vector m_selectedFields{}; }; /************************************************************************/ diff --git a/autotest/utilities/test_gdalalg_vector_filter.py b/autotest/utilities/test_gdalalg_vector_filter.py index 33b11a4239a3..6b7783b838ce 100755 --- a/autotest/utilities/test_gdalalg_vector_filter.py +++ b/autotest/utilities/test_gdalalg_vector_filter.py @@ -11,7 +11,9 @@ # SPDX-License-Identifier: MIT ############################################################################### -from osgeo import gdal +import pytest + +from osgeo import gdal, ogr def get_filter_alg(): @@ -34,7 +36,7 @@ def test_gdalalg_vector_filter_no_filter(tmp_vsimem): assert ds.GetLayer(0).GetFeatureCount() == 10 -def test_gdalalg_vector_filter_base(tmp_vsimem): +def test_gdalalg_vector_filter_bbox(tmp_vsimem): out_filename = str(tmp_vsimem / "out.shp") @@ -45,3 +47,107 @@ def test_gdalalg_vector_filter_base(tmp_vsimem): with gdal.OpenEx(out_filename) as ds: assert ds.GetLayer(0).GetFeatureCount() == 1 + + +def test_gdalalg_vector_filter_where_discard_all(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out.shp") + + filter_alg = get_filter_alg() + assert filter_alg.ParseRunAndFinalize( + ["--where=0=1", "../ogr/data/poly.shp", out_filename] + ) + + with gdal.OpenEx(out_filename) as ds: + assert ds.GetLayer(0).GetFeatureCount() == 0 + + +def test_gdalalg_vector_filter_where_accept_all(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out.shp") + + filter_alg = get_filter_alg() + assert filter_alg.ParseRunAndFinalize( + ["--where=1=1", "../ogr/data/poly.shp", out_filename] + ) + + with gdal.OpenEx(out_filename) as ds: + assert ds.GetLayer(0).GetFeatureCount() == 10 + + +def test_gdalalg_vector_filter_where_error(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out.shp") + + filter_alg = get_filter_alg() + with pytest.raises( + Exception, match='"invalid" not recognised as an available field.' + ): + filter_alg.ParseRunAndFinalize( + ["--where=invalid", "../ogr/data/poly.shp", out_filename] + ) + + +def test_gdalalg_vector_fields(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out.shp") + + filter_alg = get_filter_alg() + assert filter_alg.ParseRunAndFinalize( + ["--fields=EAS_ID,_ogr_geometry_", "../ogr/data/poly.shp", out_filename] + ) + + with gdal.OpenEx(out_filename) as ds: + lyr = ds.GetLayer(0) + assert lyr.GetLayerDefn().GetFieldCount() == 1 + assert lyr.GetLayerDefn().GetGeomFieldCount() == 1 + assert lyr.GetFeatureCount() == 10 + f = lyr.GetNextFeature() + assert f["EAS_ID"] == 168 + assert f.GetGeometryRef() is not None + lyr.ResetReading() + assert len([f for f in lyr]) == 10 + f = lyr.GetFeature(0) + assert f["EAS_ID"] == 168 + with pytest.raises(Exception): + lyr.GetFeature(10) + assert lyr.TestCapability(ogr.OLCFastFeatureCount) == 1 + assert lyr.TestCapability(ogr.OLCRandomWrite) == 0 + assert lyr.GetExtent() == (478315.53125, 481645.3125, 4762880.5, 4765610.5) + assert lyr.GetExtent(0) == (478315.53125, 481645.3125, 4762880.5, 4765610.5) + + +def test_gdalalg_vector_fields_geom_named(tmp_vsimem): + + src_ds = gdal.GetDriverByName("Memory").Create("", 0, 0, 0, gdal.GDT_Unknown) + src_lyr = src_ds.CreateLayer("test", geom_type=ogr.wkbNone, srs=None) + src_lyr.CreateGeomField(ogr.GeomFieldDefn("geom_field")) + src_lyr.CreateGeomField(ogr.GeomFieldDefn("geom_field2")) + out_filename = str(tmp_vsimem / "out.shp") + + filter_alg = get_filter_alg() + filter_alg.GetArg("input").Get().SetDataset(src_ds) + filter_alg.GetArg("output").Set(out_filename) + assert filter_alg.ParseCommandLineArguments( + ["--of", "Memory", "--fields=geom_field2"] + ) + assert filter_alg.Run() + + ds = filter_alg.GetArg("output").Get().GetDataset() + lyr = ds.GetLayer(0) + assert lyr.GetLayerDefn().GetGeomFieldCount() == 1 + assert lyr.GetLayerDefn().GetGeomFieldDefn(0).GetName() == "geom_field2" + + +def test_gdalalg_vector_fields_non_existing(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out.shp") + + filter_alg = get_filter_alg() + with pytest.raises( + Exception, + match="Field 'i_do_not_exist' does not exist in layer 'poly'.", + ): + filter_alg.ParseRunAndFinalize( + ["--fields=EAS_ID,i_do_not_exist", "../ogr/data/poly.shp", out_filename] + ) diff --git a/doc/source/conf.py b/doc/source/conf.py index 01247cffc559..281809a1987f 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -313,6 +313,13 @@ [author_evenr], 1, ), + ( + "programs/gdal_vector_filter", + "gdal-vector-filter", + "Filter a vector dataset", + [author_evenr], + 1, + ), ( "programs/gdal_vector_pipeline", "gdal-vector-pipeline", diff --git a/doc/source/programs/gdal_vector.rst b/doc/source/programs/gdal_vector.rst index 1ba9017f8620..c6b766f7f9b6 100644 --- a/doc/source/programs/gdal_vector.rst +++ b/doc/source/programs/gdal_vector.rst @@ -32,6 +32,7 @@ Available sub-commands - :ref:`gdal_vector_clip_subcommand` - :ref:`gdal_vector_convert_subcommand` +- :ref:`gdal_vector_filter_subcommand` - :ref:`gdal_vector_info_subcommand` - :ref:`gdal_vector_pipeline_subcommand` - :ref:`gdal_vector_sql_subcommand` diff --git a/doc/source/programs/gdal_vector_filter.rst b/doc/source/programs/gdal_vector_filter.rst new file mode 100644 index 000000000000..885b88921df7 --- /dev/null +++ b/doc/source/programs/gdal_vector_filter.rst @@ -0,0 +1,132 @@ +.. _gdal_vector_filter_subcommand: + +================================================================================ +"gdal vector filter" sub-command +================================================================================ + +.. versionadded:: 3.11 + +.. only:: html + + Filter a vector dataset. + +.. Index:: gdal vector filter + +Synopsis +-------- + +.. code-block:: + + Usage: gdal vector filter [OPTIONS] + + Clip a vector dataset. + + Positional arguments: + -i, --input Input vector dataset [required] + -o, --output Output vector dataset [required] + + Common Options: + -h, --help Display help message and exit + --version Display GDAL version and exit + --json-usage Display usage as JSON document and exit + --drivers Display driver list as JSON document and exit + --config = Configuration option [may be repeated] + --progress Display progress bar + + Options: + -l, --layer, --input-layer Input layer name(s) [may be repeated] + -f, --of, --format, --output-format Output format + --co, --creation-option = Creation option [may be repeated] + --lco, --layer-creation-option = Layer creation option [may be repeated] + --overwrite Whether overwriting existing output is allowed + --update Whether to open existing dataset in update mode + --overwrite-layer Whether overwriting existing layer is allowed + --append Whether appending to existing layer is allowed + --output-layer Output layer name + --bbox Bounding box as xmin,ymin,xmax,ymax + --where |@ Attribute query in a restricted form of the queries used in the SQL WHERE statement + --fields Selected fields [may be repeated] + + Advanced Options: + --if, --input-format Input formats [may be repeated] + --oo, --open-option Open options [may be repeated] + + +Description +----------- + +:program:`gdal vector filter` can be used to filter a vector dataset from +their spatial extent, a SQL WHERE clause or a subset of fields. + +``filter`` can also be used as a step of :ref:`gdal_vector_pipeline_subcommand`. + + +Standard options +++++++++++++++++ + +.. include:: gdal_options/of_vector.rst + +.. include:: gdal_options/co_vector.rst + +.. include:: gdal_options/overwrite.rst + +.. option:: --bbox ,,, + + Bounds to which to filter the dataset. They are assumed to be in the CRS of + the input dataset. + The X and Y axis are the "GIS friendly ones", that is X is longitude or easting, + and Y is latitude or northing. + Note that filtering does not clip geometries to the bounding box. + +.. option:: --where |@ + + Attribute query (like SQL WHERE). + +.. option:: --fields + + Comma-separated list of fields from input layer to copy to the new layer. + + Field names with spaces, commas or double-quote + should be surrounded with a starting and ending double-quote character, and + double-quote characters in a field name should be escaped with backslash. + + Depending on the shell used, this might require further quoting. For example, + to select ``regular_field``, ``a_field_with space, and comma`` and + ``a field with " double quote`` with a Unix shell: + + .. code-block:: bash + + --fields "regular_field,\"a_field_with space, and comma\",\"a field with \\\" double quote\"" + + A field is only selected once, even if mentioned several times in the list. + + Geometry fields can also be specified in the list. If the source layer has + no explicit name for the geometry field, ``_ogr_geometry_`` must be used to + select the unique geometry field. + + Specifying a non-existing source field name results in an error. + + +Advanced options +++++++++++++++++ + +.. include:: gdal_options/oo.rst + +.. include:: gdal_options/if.rst + +Examples +-------- + +.. example:: + :title: Select features from a GeoPackage file that intersect the bounding box from longitude 2, latitude 49, to longitude 3, latitude 50 in WGS 84 + + .. code-block:: bash + + $ gdal vector filter --bbox=2,49,3,50 in.gpkg out.gpkg --overwrite + +.. example:: + :title: Select the EAS_ID field and the geometry field from a Shapefile + + .. code-block:: bash + + $ gdal vector filter --fields=EAS_ID,_ogr_geometry_ in.shp out.gpkg --overwrite diff --git a/doc/source/programs/gdal_vector_pipeline.rst b/doc/source/programs/gdal_vector_pipeline.rst index a0eba8db1950..e5cfc22bf3c4 100644 --- a/doc/source/programs/gdal_vector_pipeline.rst +++ b/doc/source/programs/gdal_vector_pipeline.rst @@ -82,6 +82,12 @@ Details for options can be found in :ref:`gdal_vector_clip_subcommand`. Options: --bbox Bounding box as xmin,ymin,xmax,ymax + --where |@ Attribute query in a restricted form of the queries used in the SQL WHERE statement + --fields Selected fields [may be repeated] + + +Details for options can be found in :ref:`gdal_vector_filter_subcommand`. + * reproject [OPTIONS] diff --git a/doc/source/programs/index.rst b/doc/source/programs/index.rst index 350ee14f05a1..cbedd8eb4591 100644 --- a/doc/source/programs/index.rst +++ b/doc/source/programs/index.rst @@ -46,6 +46,7 @@ single :program:`gdal` program that accepts commands and subcommands. gdal_vector_info gdal_vector_clip gdal_vector_convert + gdal_vector_filter gdal_vector_pipeline gdal_vector_sql @@ -71,6 +72,7 @@ single :program:`gdal` program that accepts commands and subcommands. - :ref:`gdal_vector_command`: Entry point for vector commands - :ref:`gdal_vector_info_subcommand`: Get information on a vector dataset - :ref:`gdal_vector_clip_subcommand`: Clip a vector dataset + - :ref:`gdal_vector_filter_subcommand`: Filter a vector dataset - :ref:`gdal_vector_convert_subcommand`: Convert a vector dataset - :ref:`gdal_vector_pipeline_subcommand`: Process a vector dataset - :ref:`gdal_vector_sql_subcommand`: Apply SQL statement(s) to a dataset diff --git a/doc/source/programs/migration_guide_to_gdal_cli.rst b/doc/source/programs/migration_guide_to_gdal_cli.rst index 31952e6065c0..4f47077db1a2 100644 --- a/doc/source/programs/migration_guide_to_gdal_cli.rst +++ b/doc/source/programs/migration_guide_to_gdal_cli.rst @@ -173,7 +173,7 @@ Vector commands gdal vector filter --bbox=2,49,3,50 in.gpkg out.gpkg -* Selecting features from a GeoPackage file intersecting a bounding box, but not clipping them to it and reprojecting +* Selecting features from a shapefile intersecting a bounding box, but not clipping them to it and reprojecting .. code-block:: @@ -182,3 +182,14 @@ Vector commands ==> gdal vector pipeline read in.gpkg ! filter --bbox=2,49,3,50 ! reproject --dst-crs=EPSG:32631 ! write out.gpkg + + +* Selecting features from a shapefile based on an attribute query, and restricting to a few fields + +.. code-block:: + + ogr2ogr -where "country='Greenland'" -select population,_ogr_geometry_ out.gpkg in.shp + + ==> + + gdal vector filter --where "country='Greenland'" --fields population,_ogr_geometry_ in.shp out.gpkg From b549bd82665148b6a21adf54a02e2d6046e91072 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 5 Feb 2025 22:22:09 +0100 Subject: [PATCH 2/8] gdal vector pipeline steps in standalone mode: allow --of=stream --- apps/gdalalg_vector_pipeline.cpp | 18 +++++++++++++----- gcore/gdalalgorithm.cpp | 27 ++++++++++++++++++--------- gcore/gdalalgorithm.h | 5 +++-- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/apps/gdalalg_vector_pipeline.cpp b/apps/gdalalg_vector_pipeline.cpp index 82764aa8f934..eb1680484668 100644 --- a/apps/gdalalg_vector_pipeline.cpp +++ b/apps/gdalalg_vector_pipeline.cpp @@ -76,7 +76,7 @@ void GDALVectorPipelineStepAlgorithm::AddInputArgs(bool hiddenForCLI) void GDALVectorPipelineStepAlgorithm::AddOutputArgs( bool hiddenForCLI, bool shortNameOutputLayerAllowed) { - AddOutputFormatArg(&m_format) + AddOutputFormatArg(&m_format, true) .AddMetadataItem(GAAMDI_REQUIRED_CAPABILITIES, {GDAL_DCAP_VECTOR, GDAL_DCAP_CREATE}) .SetHiddenForCLI(hiddenForCLI); @@ -145,13 +145,21 @@ bool GDALVectorPipelineStepAlgorithm::RunImpl(GDALProgressFunc pfnProgress, m_outputDataset.Set(nullptr); if (RunStep(nullptr, nullptr)) { - writeAlg.m_inputDataset.Set(m_outputDataset.GetDatasetRef()); - if (writeAlg.Run(pfnProgress, pProgressData)) + if (m_format == "stream") { - m_outputDataset.Set( - writeAlg.m_outputDataset.GetDatasetRef()); ret = true; } + else + { + writeAlg.m_inputDataset.Set( + m_outputDataset.GetDatasetRef()); + if (writeAlg.Run(pfnProgress, pProgressData)) + { + m_outputDataset.Set( + writeAlg.m_outputDataset.GetDatasetRef()); + ret = true; + } + } } } diff --git a/gcore/gdalalgorithm.cpp b/gcore/gdalalgorithm.cpp index 4695cd7031f4..8fa5377eb253 100644 --- a/gcore/gdalalgorithm.cpp +++ b/gcore/gdalalgorithm.cpp @@ -2348,12 +2348,17 @@ GDALAlgorithm::AddOpenOptionsArg(std::vector *pValue) /* ValidateFormat() */ /************************************************************************/ -bool GDALAlgorithm::ValidateFormat(const GDALAlgorithmArg &arg) const +bool GDALAlgorithm::ValidateFormat(const GDALAlgorithmArg &arg, + bool bStreamAllowed) const { if (arg.GetChoices().empty()) { - const auto Validate = [this, &arg](const std::string &val) + const auto Validate = + [this, &arg, bStreamAllowed](const std::string &val) { + if (bStreamAllowed && val == "stream") + return true; + auto hDriver = GDALGetDriverByName(val.c_str()); if (!hDriver) { @@ -2471,7 +2476,8 @@ GDALAlgorithm::AddInputFormatsArg(std::vector *pValue) AddArg(GDAL_ARG_NAME_INPUT_FORMAT, 0, _("Input formats"), pValue) .AddAlias("if") .SetCategory(GAAC_ADVANCED); - arg.AddValidationAction([this, &arg]() { return ValidateFormat(arg); }); + arg.AddValidationAction([this, &arg]() + { return ValidateFormat(arg, false); }); arg.SetAutoCompleteFunction([&arg](const std::string &) { return FormatAutoCompleteFunction(arg); }); return arg; @@ -2482,13 +2488,16 @@ GDALAlgorithm::AddInputFormatsArg(std::vector *pValue) /************************************************************************/ GDALInConstructionAlgorithmArg & -GDALAlgorithm::AddOutputFormatArg(std::string *pValue) +GDALAlgorithm::AddOutputFormatArg(std::string *pValue, bool bStreamAllowed) { - auto &arg = - AddArg(GDAL_ARG_NAME_OUTPUT_FORMAT, 'f', _("Output format"), pValue) - .AddAlias("of") - .AddAlias("format"); - arg.AddValidationAction([this, &arg]() { return ValidateFormat(arg); }); + auto &arg = AddArg(GDAL_ARG_NAME_OUTPUT_FORMAT, 'f', + bStreamAllowed ? _("Output format (\"stream\" allowed)") + : _("Output format"), + pValue) + .AddAlias("of") + .AddAlias("format"); + arg.AddValidationAction([this, &arg, bStreamAllowed]() + { return ValidateFormat(arg, bStreamAllowed); }); arg.SetAutoCompleteFunction([&arg](const std::string &) { return FormatAutoCompleteFunction(arg); }); return arg; diff --git a/gcore/gdalalgorithm.h b/gcore/gdalalgorithm.h index 3c4dbf8255c7..2853d23bb350 100644 --- a/gcore/gdalalgorithm.h +++ b/gcore/gdalalgorithm.h @@ -2152,7 +2152,8 @@ class CPL_DLL GDALAlgorithmRegistry GDALInConstructionAlgorithmArg &AddOutputStringArg(std::string *pValue); /** Add output format argument. */ - GDALInConstructionAlgorithmArg &AddOutputFormatArg(std::string *pValue); + GDALInConstructionAlgorithmArg & + AddOutputFormatArg(std::string *pValue, bool bStreamAllowed = false); /** Add creation option(s) argument. */ GDALInConstructionAlgorithmArg & @@ -2234,7 +2235,7 @@ class CPL_DLL GDALAlgorithmRegistry std::vector, std::vector>> &inConstructionValues); - bool ValidateFormat(const GDALAlgorithmArg &arg) const; + bool ValidateFormat(const GDALAlgorithmArg &arg, bool bStreamAllowed) const; virtual bool RunImpl(GDALProgressFunc pfnProgress, void *pProgressData) = 0; From 3ab228794f8dec23e4221f51ab530b836f94300e Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 5 Feb 2025 22:22:45 +0100 Subject: [PATCH 3/8] Test gdal vector filter --of=stream --- .../utilities/test_gdalalg_vector_filter.py | 51 ++++++++++--------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/autotest/utilities/test_gdalalg_vector_filter.py b/autotest/utilities/test_gdalalg_vector_filter.py index 6b7783b838ce..c9efc57ffa9e 100755 --- a/autotest/utilities/test_gdalalg_vector_filter.py +++ b/autotest/utilities/test_gdalalg_vector_filter.py @@ -88,33 +88,38 @@ def test_gdalalg_vector_filter_where_error(tmp_vsimem): ) -def test_gdalalg_vector_fields(tmp_vsimem): - - out_filename = str(tmp_vsimem / "out.shp") +def test_gdalalg_vector_fields(): filter_alg = get_filter_alg() - assert filter_alg.ParseRunAndFinalize( - ["--fields=EAS_ID,_ogr_geometry_", "../ogr/data/poly.shp", out_filename] + assert filter_alg.ParseCommandLineArguments( + [ + "--fields=EAS_ID,_ogr_geometry_", + "--of=stream", + "../ogr/data/poly.shp", + "streamed_output", + ] ) + assert filter_alg.Run() - with gdal.OpenEx(out_filename) as ds: - lyr = ds.GetLayer(0) - assert lyr.GetLayerDefn().GetFieldCount() == 1 - assert lyr.GetLayerDefn().GetGeomFieldCount() == 1 - assert lyr.GetFeatureCount() == 10 - f = lyr.GetNextFeature() - assert f["EAS_ID"] == 168 - assert f.GetGeometryRef() is not None - lyr.ResetReading() - assert len([f for f in lyr]) == 10 - f = lyr.GetFeature(0) - assert f["EAS_ID"] == 168 - with pytest.raises(Exception): - lyr.GetFeature(10) - assert lyr.TestCapability(ogr.OLCFastFeatureCount) == 1 - assert lyr.TestCapability(ogr.OLCRandomWrite) == 0 - assert lyr.GetExtent() == (478315.53125, 481645.3125, 4762880.5, 4765610.5) - assert lyr.GetExtent(0) == (478315.53125, 481645.3125, 4762880.5, 4765610.5) + ds = filter_alg.GetArg("output").Get().GetDataset() + + lyr = ds.GetLayer(0) + assert lyr.GetLayerDefn().GetFieldCount() == 1 + assert lyr.GetLayerDefn().GetGeomFieldCount() == 1 + assert lyr.GetFeatureCount() == 10 + f = lyr.GetNextFeature() + assert f["EAS_ID"] == 168 + assert f.GetGeometryRef() is not None + lyr.ResetReading() + assert len([f for f in lyr]) == 10 + f = lyr.GetFeature(0) + assert f["EAS_ID"] == 168 + with pytest.raises(Exception): + lyr.GetFeature(10) + assert lyr.TestCapability(ogr.OLCFastFeatureCount) == 1 + assert lyr.TestCapability(ogr.OLCRandomWrite) == 0 + assert lyr.GetExtent() == (478315.53125, 481645.3125, 4762880.5, 4765610.5) + assert lyr.GetExtent(0) == (478315.53125, 481645.3125, 4762880.5, 4765610.5) def test_gdalalg_vector_fields_geom_named(tmp_vsimem): From c6c22c10c2b03c29cbc86666b7dffc4d525975fa Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 6 Feb 2025 15:45:12 +0100 Subject: [PATCH 4/8] gdal vector filter: fix taking into account attribute and spatial filter on result layer --- apps/gdalalg_vector_filter.cpp | 15 +++++++++++---- .../utilities/test_gdalalg_vector_filter.py | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/apps/gdalalg_vector_filter.cpp b/apps/gdalalg_vector_filter.cpp index 414a34cf2373..51f3c8edb842 100644 --- a/apps/gdalalg_vector_filter.cpp +++ b/apps/gdalalg_vector_filter.cpp @@ -78,7 +78,9 @@ class GDALVectorFilterAlgorithmDataset final : public GDALDataset /* GDALVectorFilterAlgorithmLayer */ /************************************************************************/ -class GDALVectorFilterAlgorithmLayer final : public OGRLayer +class GDALVectorFilterAlgorithmLayer final + : public OGRLayer, + public OGRGetNextFeatureThroughRaw { private: bool m_bIsOK = true; @@ -108,6 +110,8 @@ class GDALVectorFilterAlgorithmLayer final : public OGRLayer return poFeature; } + DEFINE_GET_NEXT_FEATURE_THROUGH_RAW(GDALVectorFilterAlgorithmLayer) + public: GDALVectorFilterAlgorithmLayer( OGRLayer *poSrcLayer, const std::vector &selectedFields, @@ -212,7 +216,9 @@ class GDALVectorFilterAlgorithmLayer final : public OGRLayer GIntBig GetFeatureCount(int bForce) override { - return m_poSrcLayer->GetFeatureCount(bForce); + if (!m_poAttrQuery && !m_poFilterGeom) + return m_poSrcLayer->GetFeatureCount(bForce); + return OGRLayer::GetFeatureCount(bForce); } OGRErr GetExtent(OGREnvelope *psExtent, int bForce) override @@ -230,7 +236,7 @@ class GDALVectorFilterAlgorithmLayer final : public OGRLayer m_poSrcLayer->ResetReading(); } - OGRFeature *GetNextFeature() override + OGRFeature *GetNextRawFeature() { auto poSrcFeature = std::unique_ptr(m_poSrcLayer->GetNextFeature()); @@ -253,7 +259,8 @@ class GDALVectorFilterAlgorithmLayer final : public OGRLayer if (EQUAL(pszCap, OLCRandomRead) || EQUAL(pszCap, OLCCurveGeometries) || EQUAL(pszCap, OLCMeasuredGeometries) || EQUAL(pszCap, OLCZGeometries) || - EQUAL(pszCap, OLCFastFeatureCount) || + (EQUAL(pszCap, OLCFastFeatureCount) && !m_poAttrQuery && + !m_poFilterGeom) || EQUAL(pszCap, OLCFastGetExtent) || EQUAL(pszCap, OLCStringsAsUTF8)) { return m_poSrcLayer->TestCapability(pszCap); diff --git a/autotest/utilities/test_gdalalg_vector_filter.py b/autotest/utilities/test_gdalalg_vector_filter.py index c9efc57ffa9e..7a067a69aa60 100755 --- a/autotest/utilities/test_gdalalg_vector_filter.py +++ b/autotest/utilities/test_gdalalg_vector_filter.py @@ -121,6 +121,24 @@ def test_gdalalg_vector_fields(): assert lyr.GetExtent() == (478315.53125, 481645.3125, 4762880.5, 4765610.5) assert lyr.GetExtent(0) == (478315.53125, 481645.3125, 4762880.5, 4765610.5) + # Test attribute filter on result layer + lyr.SetAttributeFilter("EAS_ID = 170") + lyr.ResetReading() + f = lyr.GetNextFeature() + assert f["EAS_ID"] == 170 + assert lyr.GetNextFeature() is None + assert lyr.TestCapability(ogr.OLCFastFeatureCount) == 0 + assert lyr.GetFeatureCount() == 1 + lyr.SetAttributeFilter(None) + + # Test spatial filter on reslt layer + lyr.SetSpatialFilterRect(-1, -1, -1, -1) + lyr.ResetReading() + assert lyr.GetNextFeature() is None + assert lyr.TestCapability(ogr.OLCFastFeatureCount) == 0 + assert lyr.GetFeatureCount() == 0 + lyr.SetSpatialFilter(None) + def test_gdalalg_vector_fields_geom_named(tmp_vsimem): From 8e3dda47453a15f9f5e7f3fe0fd0a0ba1a834de6 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 6 Feb 2025 15:48:58 +0100 Subject: [PATCH 5/8] gdal vector clip: fix taking into account attribute and spatial filter on result layer --- apps/gdalalg_vector_clip.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/apps/gdalalg_vector_clip.cpp b/apps/gdalalg_vector_clip.cpp index 8e529f1fda0d..2963ea620c1c 100644 --- a/apps/gdalalg_vector_clip.cpp +++ b/apps/gdalalg_vector_clip.cpp @@ -89,8 +89,13 @@ class GDALVectorClipAlgorithmDataset final : public GDALDataset } }; -class GDALVectorClipAlgorithmLayer final : public OGRLayer +class GDALVectorClipAlgorithmLayer final + : public OGRLayer, + public OGRGetNextFeatureThroughRaw { + + DEFINE_GET_NEXT_FEATURE_THROUGH_RAW(GDALVectorClipAlgorithmLayer) + public: GDALVectorClipAlgorithmLayer(OGRLayer *poSrcLayer, std::unique_ptr poClipGeom) @@ -117,7 +122,7 @@ class GDALVectorClipAlgorithmLayer final : public OGRLayer m_idxInCurGeomColl = 0; } - OGRFeature *GetNextFeature() override + OGRFeature *GetNextRawFeature() { if (m_poSrcFeature && m_poCurGeomColl) { From 7e7be151ffe194b24596ac38b22d7b1d37506406 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 6 Feb 2025 16:01:18 +0100 Subject: [PATCH 6/8] Factor out a GDALVectorPipelineOutputDataset class --- apps/gdalalg_vector_clip.cpp | 26 +-------------------- apps/gdalalg_vector_filter.cpp | 30 +----------------------- apps/gdalalg_vector_pipeline.h | 39 +++++++++++++++++++++++++++++++ apps/gdalalg_vector_read.cpp | 32 +------------------------ apps/gdalalg_vector_reproject.cpp | 33 +------------------------- 5 files changed, 43 insertions(+), 117 deletions(-) diff --git a/apps/gdalalg_vector_clip.cpp b/apps/gdalalg_vector_clip.cpp index 2963ea620c1c..ed5e9e714426 100644 --- a/apps/gdalalg_vector_clip.cpp +++ b/apps/gdalalg_vector_clip.cpp @@ -65,30 +65,6 @@ GDALVectorClipAlgorithm::GDALVectorClipAlgorithm(bool standaloneStep) namespace { -class GDALVectorClipAlgorithmDataset final : public GDALDataset -{ - std::vector> m_layers{}; - - public: - GDALVectorClipAlgorithmDataset() = default; - - void AddLayer(std::unique_ptr poLayer) - { - m_layers.push_back(std::move(poLayer)); - } - - int GetLayerCount() override - { - return static_cast(m_layers.size()); - } - - OGRLayer *GetLayer(int idx) override - { - return idx >= 0 && idx < GetLayerCount() ? m_layers[idx].get() - : nullptr; - } -}; - class GDALVectorClipAlgorithmLayer final : public OGRLayer, public OGRGetNextFeatureThroughRaw @@ -452,7 +428,7 @@ bool GDALVectorClipAlgorithm::RunStep(GDALProgressFunc, void *) return false; } - auto outDS = std::make_unique(); + auto outDS = std::make_unique(); outDS->SetDescription(poSrcDS->GetDescription()); bool ret = true; diff --git a/apps/gdalalg_vector_filter.cpp b/apps/gdalalg_vector_filter.cpp index 51f3c8edb842..f2a928143c8f 100644 --- a/apps/gdalalg_vector_filter.cpp +++ b/apps/gdalalg_vector_filter.cpp @@ -46,34 +46,6 @@ GDALVectorFilterAlgorithm::GDALVectorFilterAlgorithm(bool standaloneStep) namespace { -/************************************************************************/ -/* GDALVectorFilterAlgorithmDataset */ -/************************************************************************/ - -class GDALVectorFilterAlgorithmDataset final : public GDALDataset -{ - std::vector> m_layers{}; - - public: - GDALVectorFilterAlgorithmDataset() = default; - - void AddLayer(std::unique_ptr poLayer) - { - m_layers.push_back(std::move(poLayer)); - } - - int GetLayerCount() override - { - return static_cast(m_layers.size()); - } - - OGRLayer *GetLayer(int idx) override - { - return idx >= 0 && idx < GetLayerCount() ? m_layers[idx].get() - : nullptr; - } -}; - /************************************************************************/ /* GDALVectorFilterAlgorithmLayer */ /************************************************************************/ @@ -314,7 +286,7 @@ bool GDALVectorFilterAlgorithm::RunStep(GDALProgressFunc, void *) if (ret && !m_selectedFields.empty()) { - auto outDS = std::make_unique(); + auto outDS = std::make_unique(); outDS->SetDescription(poSrcDS->GetDescription()); for (int i = 0; i < nLayerCount; ++i) diff --git a/apps/gdalalg_vector_pipeline.h b/apps/gdalalg_vector_pipeline.h index 97564af4e04e..4bf66820e0f0 100644 --- a/apps/gdalalg_vector_pipeline.h +++ b/apps/gdalalg_vector_pipeline.h @@ -16,6 +16,8 @@ #include "gdalalgorithm.h" #include "gdalalg_abstract_pipeline.h" +#include "ogrsf_frmts.h" + //! @cond Doxygen_Suppress /************************************************************************/ @@ -106,6 +108,43 @@ class GDALVectorPipelineAlgorithm final } }; +/************************************************************************/ +/* GDALVectorPipelineOutputDataset */ +/************************************************************************/ + +/** Class used by vector pipeline steps to create an output on-the-fly + * dataset where they can store on-the-fly layers. + */ +class GDALVectorPipelineOutputDataset final : public GDALDataset +{ + std::vector> m_layersToDestroy{}; + std::vector m_layers{}; + + public: + GDALVectorPipelineOutputDataset() = default; + + void AddLayer(std::unique_ptr poLayer) + { + m_layersToDestroy.push_back(std::move(poLayer)); + m_layers.push_back(m_layersToDestroy.back().get()); + } + + void AddLayer(OGRLayer *poLayer) + { + m_layers.push_back(poLayer); + } + + int GetLayerCount() override + { + return static_cast(m_layers.size()); + } + + OGRLayer *GetLayer(int idx) override + { + return idx >= 0 && idx < GetLayerCount() ? m_layers[idx] : nullptr; + } +}; + //! @endcond #endif diff --git a/apps/gdalalg_vector_read.cpp b/apps/gdalalg_vector_read.cpp index ed8a4947d7bd..4af6c5e15d6e 100644 --- a/apps/gdalalg_vector_read.cpp +++ b/apps/gdalalg_vector_read.cpp @@ -32,36 +32,6 @@ GDALVectorReadAlgorithm::GDALVectorReadAlgorithm() AddInputArgs(/* hiddenForCLI = */ false); } -/************************************************************************/ -/* GDALVectorReadAlgorithmDataset */ -/************************************************************************/ - -namespace -{ -class GDALVectorReadAlgorithmDataset final : public GDALDataset -{ - std::vector m_srcLayers{}; - - public: - GDALVectorReadAlgorithmDataset() = default; - - void AddLayer(OGRLayer *poSrcLayer) - { - m_srcLayers.push_back(poSrcLayer); - } - - int GetLayerCount() override - { - return static_cast(m_srcLayers.size()); - } - - OGRLayer *GetLayer(int idx) override - { - return idx >= 0 && idx < GetLayerCount() ? m_srcLayers[idx] : nullptr; - } -}; -} // namespace - /************************************************************************/ /* GDALVectorReadAlgorithm::RunStep() */ /************************************************************************/ @@ -79,7 +49,7 @@ bool GDALVectorReadAlgorithm::RunStep(GDALProgressFunc, void *) else { auto poSrcDS = m_inputDataset.GetDatasetRef(); - auto poOutDS = std::make_unique(); + auto poOutDS = std::make_unique(); poOutDS->SetDescription(poSrcDS->GetDescription()); for (const auto &srcLayerName : m_inputLayerNames) { diff --git a/apps/gdalalg_vector_reproject.cpp b/apps/gdalalg_vector_reproject.cpp index 8d3fa8ef11c7..fccacbbe1f4b 100644 --- a/apps/gdalalg_vector_reproject.cpp +++ b/apps/gdalalg_vector_reproject.cpp @@ -40,37 +40,6 @@ GDALVectorReprojectAlgorithm::GDALVectorReprojectAlgorithm(bool standaloneStep) .AddHiddenAlias("t_srs"); } -/************************************************************************/ -/* GDALVectorReprojectAlgorithmDataset */ -/************************************************************************/ - -namespace -{ -class GDALVectorReprojectAlgorithmDataset final : public GDALDataset -{ - std::vector> m_layers{}; - - public: - GDALVectorReprojectAlgorithmDataset() = default; - - void AddLayer(std::unique_ptr poLayer) - { - m_layers.push_back(std::move(poLayer)); - } - - int GetLayerCount() override - { - return static_cast(m_layers.size()); - } - - OGRLayer *GetLayer(int idx) override - { - return idx >= 0 && idx < GetLayerCount() ? m_layers[idx].get() - : nullptr; - } -}; -} // namespace - /************************************************************************/ /* GDALVectorReprojectAlgorithm::RunStep() */ /************************************************************************/ @@ -96,7 +65,7 @@ bool GDALVectorReprojectAlgorithm::RunStep(GDALProgressFunc, void *) auto poSrcDS = m_inputDataset.GetDatasetRef(); auto reprojectedDataset = - std::make_unique(); + std::make_unique(); reprojectedDataset->SetDescription(poSrcDS->GetDescription()); const int nLayerCount = poSrcDS->GetLayerCount(); From c4a6fa75f60116017ccb724a3690ae0344c143eb Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 11 Feb 2025 01:44:42 +0100 Subject: [PATCH 7/8] Add 'gdal vector select' (extracting --fields functionality from 'gdal vector filter') --- apps/CMakeLists.txt | 1 + apps/gdalalg_vector.cpp | 2 + apps/gdalalg_vector_filter.cpp | 226 +-------------- apps/gdalalg_vector_filter.h | 4 +- apps/gdalalg_vector_pipeline.cpp | 2 + apps/gdalalg_vector_select.cpp | 274 ++++++++++++++++++ apps/gdalalg_vector_select.h | 63 ++++ .../utilities/test_gdalalg_vector_filter.py | 90 +----- .../utilities/test_gdalalg_vector_select.py | 129 +++++++++ doc/source/conf.py | 7 + doc/source/programs/gdal_vector.rst | 2 + doc/source/programs/gdal_vector_filter.rst | 33 +-- doc/source/programs/gdal_vector_pipeline.rst | 18 ++ doc/source/programs/gdal_vector_select.rst | 116 ++++++++ doc/source/programs/index.rst | 2 + .../programs/migration_guide_to_gdal_cli.rst | 2 +- 16 files changed, 621 insertions(+), 350 deletions(-) create mode 100644 apps/gdalalg_vector_select.cpp create mode 100644 apps/gdalalg_vector_select.h create mode 100755 autotest/utilities/test_gdalalg_vector_select.py create mode 100644 doc/source/programs/gdal_vector_select.rst diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 9f48f3be08f1..530bd8eecf07 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -31,6 +31,7 @@ add_library( gdalalg_vector_read.cpp gdalalg_vector_filter.cpp gdalalg_vector_reproject.cpp + gdalalg_vector_select.cpp gdalalg_vector_sql.cpp gdalalg_vector_write.cpp gdalinfo_lib.cpp diff --git a/apps/gdalalg_vector.cpp b/apps/gdalalg_vector.cpp index 6445cd808577..61f4a627f097 100644 --- a/apps/gdalalg_vector.cpp +++ b/apps/gdalalg_vector.cpp @@ -18,6 +18,7 @@ #include "gdalalg_vector_pipeline.h" #include "gdalalg_vector_filter.h" #include "gdalalg_vector_reproject.h" +#include "gdalalg_vector_select.h" #include "gdalalg_vector_sql.h" /************************************************************************/ @@ -44,6 +45,7 @@ class GDALVectorAlgorithm final : public GDALAlgorithm RegisterSubAlgorithm(); RegisterSubAlgorithm(); RegisterSubAlgorithm(); + RegisterSubAlgorithm(); RegisterSubAlgorithm(); } diff --git a/apps/gdalalg_vector_filter.cpp b/apps/gdalalg_vector_filter.cpp index f2a928143c8f..6f3212c234fa 100644 --- a/apps/gdalalg_vector_filter.cpp +++ b/apps/gdalalg_vector_filter.cpp @@ -40,209 +40,8 @@ GDALVectorFilterAlgorithm::GDALVectorFilterAlgorithm(bool standaloneStep) .SetReadFromFileAtSyntaxAllowed() .SetMetaVar("|@") .SetRemoveSQLCommentsEnabled(); - AddArg("fields", 0, _("Selected fields"), &m_selectedFields); } -namespace -{ - -/************************************************************************/ -/* GDALVectorFilterAlgorithmLayer */ -/************************************************************************/ - -class GDALVectorFilterAlgorithmLayer final - : public OGRLayer, - public OGRGetNextFeatureThroughRaw -{ - private: - bool m_bIsOK = true; - OGRLayer *const m_poSrcLayer; - OGRFeatureDefn *const m_poFeatureDefn = nullptr; - std::vector m_anMapSrcFieldsToDstFields{}; - std::vector m_anMapDstGeomFieldsToSrcGeomFields{}; - - CPL_DISALLOW_COPY_ASSIGN(GDALVectorFilterAlgorithmLayer) - - std::unique_ptr TranslateFeature(OGRFeature *poSrcFeature) const - { - auto poFeature = std::make_unique(m_poFeatureDefn); - poFeature->SetFID(poSrcFeature->GetFID()); - const auto styleString = poSrcFeature->GetStyleString(); - if (styleString) - poFeature->SetStyleString(styleString); - poFeature->SetFieldsFrom( - poSrcFeature, m_anMapSrcFieldsToDstFields.data(), false, false); - int iDstGeomField = 0; - for (int nSrcGeomField : m_anMapDstGeomFieldsToSrcGeomFields) - { - poFeature->SetGeomFieldDirectly( - iDstGeomField, poSrcFeature->StealGeometry(nSrcGeomField)); - ++iDstGeomField; - } - return poFeature; - } - - DEFINE_GET_NEXT_FEATURE_THROUGH_RAW(GDALVectorFilterAlgorithmLayer) - - public: - GDALVectorFilterAlgorithmLayer( - OGRLayer *poSrcLayer, const std::vector &selectedFields, - bool bStrict) - : m_poSrcLayer(poSrcLayer), - m_poFeatureDefn(new OGRFeatureDefn(poSrcLayer->GetName())) - { - SetDescription(poSrcLayer->GetDescription()); - m_poFeatureDefn->SetGeomType(wkbNone); - m_poFeatureDefn->Reference(); - - std::set oSetSelFields; - std::set oSetSelFieldsUC; - for (const std::string &osFieldName : selectedFields) - { - oSetSelFields.insert(osFieldName); - oSetSelFieldsUC.insert(CPLString(osFieldName).toupper()); - } - - std::set oSetUsedSetFieldsUC; - - const auto poSrcLayerDefn = poSrcLayer->GetLayerDefn(); - for (int i = 0; i < poSrcLayerDefn->GetFieldCount(); ++i) - { - const auto poSrcFieldDefn = poSrcLayerDefn->GetFieldDefn(i); - auto oIter = oSetSelFieldsUC.find( - CPLString(poSrcFieldDefn->GetNameRef()).toupper()); - if (oIter != oSetSelFieldsUC.end()) - { - m_anMapSrcFieldsToDstFields.push_back( - m_poFeatureDefn->GetFieldCount()); - OGRFieldDefn oDstFieldDefn(*poSrcFieldDefn); - m_poFeatureDefn->AddFieldDefn(&oDstFieldDefn); - oSetUsedSetFieldsUC.insert(*oIter); - } - else - { - m_anMapSrcFieldsToDstFields.push_back(-1); - } - } - - for (int i = 0; i < poSrcLayerDefn->GetGeomFieldCount(); ++i) - { - const auto poSrcFieldDefn = poSrcLayerDefn->GetGeomFieldDefn(i); - auto oIter = oSetSelFieldsUC.find( - CPLString(poSrcFieldDefn->GetNameRef()).toupper()); - if (oIter != oSetSelFieldsUC.end()) - { - m_anMapDstGeomFieldsToSrcGeomFields.push_back(i); - OGRGeomFieldDefn oDstFieldDefn(*poSrcFieldDefn); - m_poFeatureDefn->AddGeomFieldDefn(&oDstFieldDefn); - oSetUsedSetFieldsUC.insert(*oIter); - } - } - - auto oIter = oSetSelFieldsUC.find( - CPLString(OGR_GEOMETRY_DEFAULT_NON_EMPTY_NAME).toupper()); - if (m_poFeatureDefn->GetGeomFieldCount() == 0 && - oIter != oSetSelFieldsUC.end() && - poSrcLayerDefn->GetGeomFieldCount() == 1) - { - const auto poSrcFieldDefn = poSrcLayerDefn->GetGeomFieldDefn(0); - m_anMapDstGeomFieldsToSrcGeomFields.push_back(0); - OGRGeomFieldDefn oDstFieldDefn(*poSrcFieldDefn); - m_poFeatureDefn->AddGeomFieldDefn(&oDstFieldDefn); - oSetUsedSetFieldsUC.insert(*oIter); - } - - if (oSetUsedSetFieldsUC.size() != oSetSelFields.size()) - { - for (const std::string &osName : oSetSelFields) - { - if (!cpl::contains(oSetUsedSetFieldsUC, - CPLString(osName).toupper())) - { - CPLError(bStrict ? CE_Failure : CE_Warning, CPLE_AppDefined, - "Field '%s' does not exist in layer '%s'.%s", - osName.c_str(), poSrcLayer->GetDescription(), - bStrict ? "" : " It will be ignored"); - if (bStrict) - m_bIsOK = false; - } - } - } - } - - ~GDALVectorFilterAlgorithmLayer() override - { - if (m_poFeatureDefn) - m_poFeatureDefn->Dereference(); - } - - bool IsOK() const - { - return m_bIsOK; - } - - OGRFeatureDefn *GetLayerDefn() override - { - return m_poFeatureDefn; - } - - GIntBig GetFeatureCount(int bForce) override - { - if (!m_poAttrQuery && !m_poFilterGeom) - return m_poSrcLayer->GetFeatureCount(bForce); - return OGRLayer::GetFeatureCount(bForce); - } - - OGRErr GetExtent(OGREnvelope *psExtent, int bForce) override - { - return m_poSrcLayer->GetExtent(psExtent, bForce); - } - - OGRErr GetExtent(int iGeomField, OGREnvelope *psExtent, int bForce) override - { - return m_poSrcLayer->GetExtent(iGeomField, psExtent, bForce); - } - - void ResetReading() override - { - m_poSrcLayer->ResetReading(); - } - - OGRFeature *GetNextRawFeature() - { - auto poSrcFeature = - std::unique_ptr(m_poSrcLayer->GetNextFeature()); - if (!poSrcFeature) - return nullptr; - return TranslateFeature(poSrcFeature.get()).release(); - } - - OGRFeature *GetFeature(GIntBig nFID) override - { - auto poSrcFeature = - std::unique_ptr(m_poSrcLayer->GetFeature(nFID)); - if (!poSrcFeature) - return nullptr; - return TranslateFeature(poSrcFeature.get()).release(); - } - - int TestCapability(const char *pszCap) override - { - if (EQUAL(pszCap, OLCRandomRead) || EQUAL(pszCap, OLCCurveGeometries) || - EQUAL(pszCap, OLCMeasuredGeometries) || - EQUAL(pszCap, OLCZGeometries) || - (EQUAL(pszCap, OLCFastFeatureCount) && !m_poAttrQuery && - !m_poFilterGeom) || - EQUAL(pszCap, OLCFastGetExtent) || EQUAL(pszCap, OLCStringsAsUTF8)) - { - return m_poSrcLayer->TestCapability(pszCap); - } - return false; - } -}; - -} // namespace - /************************************************************************/ /* GDALVectorFilterAlgorithm::RunStep() */ /************************************************************************/ @@ -284,30 +83,7 @@ bool GDALVectorFilterAlgorithm::RunStep(GDALProgressFunc, void *) } } - if (ret && !m_selectedFields.empty()) - { - auto outDS = std::make_unique(); - outDS->SetDescription(poSrcDS->GetDescription()); - - for (int i = 0; i < nLayerCount; ++i) - { - auto poSrcLayer = poSrcDS->GetLayer(i); - ret = ret && (poSrcLayer != nullptr); - if (ret) - { - auto poLayer = std::make_unique( - poSrcLayer, m_selectedFields, /* bStrict = */ true); - ret = poLayer->IsOK(); - if (ret) - { - outDS->AddLayer(std::move(poLayer)); - } - } - } - - m_outputDataset.Set(std::move(outDS)); - } - else if (ret) + if (ret) { m_outputDataset.Set(m_inputDataset.GetDatasetRef()); } diff --git a/apps/gdalalg_vector_filter.h b/apps/gdalalg_vector_filter.h index b43bd0cf8ca3..57c07ca99c15 100644 --- a/apps/gdalalg_vector_filter.h +++ b/apps/gdalalg_vector_filter.h @@ -27,8 +27,7 @@ class GDALVectorFilterAlgorithm /* non final */ public: static constexpr const char *NAME = "filter"; static constexpr const char *DESCRIPTION = "Filter a vector dataset."; - static constexpr const char *HELP_URL = - "/programs/gdal_vector_pipeline.html"; + static constexpr const char *HELP_URL = "/programs/gdal_vector_filter.html"; static std::vector GetAliases() { @@ -42,7 +41,6 @@ class GDALVectorFilterAlgorithm /* non final */ std::vector m_bbox{}; std::string m_where{}; - std::vector m_selectedFields{}; }; /************************************************************************/ diff --git a/apps/gdalalg_vector_pipeline.cpp b/apps/gdalalg_vector_pipeline.cpp index eb1680484668..cade33a94b40 100644 --- a/apps/gdalalg_vector_pipeline.cpp +++ b/apps/gdalalg_vector_pipeline.cpp @@ -15,6 +15,7 @@ #include "gdalalg_vector_clip.h" #include "gdalalg_vector_filter.h" #include "gdalalg_vector_reproject.h" +#include "gdalalg_vector_select.h" #include "gdalalg_vector_sql.h" #include "gdalalg_vector_write.h" @@ -193,6 +194,7 @@ GDALVectorPipelineAlgorithm::GDALVectorPipelineAlgorithm() m_stepRegistry.Register(); m_stepRegistry.Register(); m_stepRegistry.Register(); + m_stepRegistry.Register(); m_stepRegistry.Register(); } diff --git a/apps/gdalalg_vector_select.cpp b/apps/gdalalg_vector_select.cpp new file mode 100644 index 000000000000..bdf50024115d --- /dev/null +++ b/apps/gdalalg_vector_select.cpp @@ -0,0 +1,274 @@ +/****************************************************************************** + * + * Project: GDAL + * Purpose: "select" step of "vector pipeline" + * Author: Even Rouault + * + ****************************************************************************** + * Copyright (c) 2024, Even Rouault + * + * SPDX-License-Identifier: MIT + ****************************************************************************/ + +#include "gdalalg_vector_select.h" + +#include "gdal_priv.h" +#include "ogrsf_frmts.h" +#include "ogr_p.h" + +#include + +//! @cond Doxygen_Suppress + +#ifndef _ +#define _(x) (x) +#endif + +/************************************************************************/ +/* GDALVectorSelectAlgorithm::GDALVectorSelectAlgorithm() */ +/************************************************************************/ + +GDALVectorSelectAlgorithm::GDALVectorSelectAlgorithm(bool standaloneStep) + : GDALVectorPipelineStepAlgorithm(NAME, DESCRIPTION, HELP_URL, + standaloneStep) +{ + AddArg("fields", 0, _("Selected fields"), &m_selectedFields) + .SetPositional() + .SetRequired(); + AddArg("ignore-missing-fields", 0, _("Ignore missing fields"), + &m_ignoreMissingFields); +} + +namespace +{ + +/************************************************************************/ +/* GDALVectorSelectAlgorithmLayer */ +/************************************************************************/ + +class GDALVectorSelectAlgorithmLayer final + : public OGRLayer, + public OGRGetNextFeatureThroughRaw +{ + private: + bool m_bIsOK = true; + OGRLayer &m_oSrcLayer; + OGRFeatureDefn *const m_poFeatureDefn = nullptr; + std::vector m_anMapSrcFieldsToDstFields{}; + std::vector m_anMapDstGeomFieldsToSrcGeomFields{}; + + CPL_DISALLOW_COPY_ASSIGN(GDALVectorSelectAlgorithmLayer) + + std::unique_ptr TranslateFeature(OGRFeature *poSrcFeature) const + { + auto poFeature = std::make_unique(m_poFeatureDefn); + poFeature->SetFID(poSrcFeature->GetFID()); + const auto styleString = poSrcFeature->GetStyleString(); + if (styleString) + poFeature->SetStyleString(styleString); + poFeature->SetFieldsFrom( + poSrcFeature, m_anMapSrcFieldsToDstFields.data(), false, false); + int iDstGeomField = 0; + for (int nSrcGeomField : m_anMapDstGeomFieldsToSrcGeomFields) + { + poFeature->SetGeomFieldDirectly( + iDstGeomField, poSrcFeature->StealGeometry(nSrcGeomField)); + ++iDstGeomField; + } + return poFeature; + } + + DEFINE_GET_NEXT_FEATURE_THROUGH_RAW(GDALVectorSelectAlgorithmLayer) + + public: + GDALVectorSelectAlgorithmLayer( + OGRLayer &oSrcLayer, const std::vector &selectedFields, + bool bStrict) + : m_oSrcLayer(oSrcLayer), + m_poFeatureDefn(new OGRFeatureDefn(oSrcLayer.GetName())) + { + SetDescription(oSrcLayer.GetDescription()); + m_poFeatureDefn->SetGeomType(wkbNone); + m_poFeatureDefn->Reference(); + + std::set oSetSelFields; + std::set oSetSelFieldsUC; + for (const std::string &osFieldName : selectedFields) + { + oSetSelFields.insert(osFieldName); + oSetSelFieldsUC.insert(CPLString(osFieldName).toupper()); + } + + std::set oSetUsedSetFieldsUC; + + const auto poSrcLayerDefn = oSrcLayer.GetLayerDefn(); + for (int i = 0; i < poSrcLayerDefn->GetFieldCount(); ++i) + { + const auto poSrcFieldDefn = poSrcLayerDefn->GetFieldDefn(i); + auto oIter = oSetSelFieldsUC.find( + CPLString(poSrcFieldDefn->GetNameRef()).toupper()); + if (oIter != oSetSelFieldsUC.end()) + { + m_anMapSrcFieldsToDstFields.push_back( + m_poFeatureDefn->GetFieldCount()); + OGRFieldDefn oDstFieldDefn(*poSrcFieldDefn); + m_poFeatureDefn->AddFieldDefn(&oDstFieldDefn); + oSetUsedSetFieldsUC.insert(*oIter); + } + else + { + m_anMapSrcFieldsToDstFields.push_back(-1); + } + } + + for (int i = 0; i < poSrcLayerDefn->GetGeomFieldCount(); ++i) + { + const auto poSrcFieldDefn = poSrcLayerDefn->GetGeomFieldDefn(i); + auto oIter = oSetSelFieldsUC.find( + CPLString(poSrcFieldDefn->GetNameRef()).toupper()); + if (oIter != oSetSelFieldsUC.end()) + { + m_anMapDstGeomFieldsToSrcGeomFields.push_back(i); + OGRGeomFieldDefn oDstFieldDefn(*poSrcFieldDefn); + m_poFeatureDefn->AddGeomFieldDefn(&oDstFieldDefn); + oSetUsedSetFieldsUC.insert(*oIter); + } + } + + auto oIter = oSetSelFieldsUC.find( + CPLString(OGR_GEOMETRY_DEFAULT_NON_EMPTY_NAME).toupper()); + if (m_poFeatureDefn->GetGeomFieldCount() == 0 && + oIter != oSetSelFieldsUC.end() && + poSrcLayerDefn->GetGeomFieldCount() == 1) + { + const auto poSrcFieldDefn = poSrcLayerDefn->GetGeomFieldDefn(0); + m_anMapDstGeomFieldsToSrcGeomFields.push_back(0); + OGRGeomFieldDefn oDstFieldDefn(*poSrcFieldDefn); + m_poFeatureDefn->AddGeomFieldDefn(&oDstFieldDefn); + oSetUsedSetFieldsUC.insert(*oIter); + } + + if (oSetUsedSetFieldsUC.size() != oSetSelFields.size()) + { + for (const std::string &osName : oSetSelFields) + { + if (!cpl::contains(oSetUsedSetFieldsUC, + CPLString(osName).toupper())) + { + CPLError(bStrict ? CE_Failure : CE_Warning, CPLE_AppDefined, + "Field '%s' does not exist in layer '%s'.%s", + osName.c_str(), oSrcLayer.GetDescription(), + bStrict ? " You may specify " + "--ignore-missing-fields to skip it" + : " It will be ignored"); + if (bStrict) + m_bIsOK = false; + } + } + } + } + + ~GDALVectorSelectAlgorithmLayer() override + { + if (m_poFeatureDefn) + m_poFeatureDefn->Dereference(); + } + + bool IsOK() const + { + return m_bIsOK; + } + + OGRFeatureDefn *GetLayerDefn() override + { + return m_poFeatureDefn; + } + + GIntBig GetFeatureCount(int bForce) override + { + if (!m_poAttrQuery && !m_poFilterGeom) + return m_oSrcLayer.GetFeatureCount(bForce); + return OGRLayer::GetFeatureCount(bForce); + } + + OGRErr GetExtent(OGREnvelope *psExtent, int bForce) override + { + return m_oSrcLayer.GetExtent(psExtent, bForce); + } + + OGRErr GetExtent(int iGeomField, OGREnvelope *psExtent, int bForce) override + { + return m_oSrcLayer.GetExtent(iGeomField, psExtent, bForce); + } + + void ResetReading() override + { + m_oSrcLayer.ResetReading(); + } + + OGRFeature *GetNextRawFeature() + { + auto poSrcFeature = + std::unique_ptr(m_oSrcLayer.GetNextFeature()); + if (!poSrcFeature) + return nullptr; + return TranslateFeature(poSrcFeature.get()).release(); + } + + OGRFeature *GetFeature(GIntBig nFID) override + { + auto poSrcFeature = + std::unique_ptr(m_oSrcLayer.GetFeature(nFID)); + if (!poSrcFeature) + return nullptr; + return TranslateFeature(poSrcFeature.get()).release(); + } + + int TestCapability(const char *pszCap) override + { + if (EQUAL(pszCap, OLCRandomRead) || EQUAL(pszCap, OLCCurveGeometries) || + EQUAL(pszCap, OLCMeasuredGeometries) || + EQUAL(pszCap, OLCZGeometries) || + (EQUAL(pszCap, OLCFastFeatureCount) && !m_poAttrQuery && + !m_poFilterGeom) || + EQUAL(pszCap, OLCFastGetExtent) || EQUAL(pszCap, OLCStringsAsUTF8)) + { + return m_oSrcLayer.TestCapability(pszCap); + } + return false; + } +}; + +} // namespace + +/************************************************************************/ +/* GDALVectorSelectAlgorithm::RunStep() */ +/************************************************************************/ + +bool GDALVectorSelectAlgorithm::RunStep(GDALProgressFunc, void *) +{ + CPLAssert(m_inputDataset.GetDatasetRef()); + CPLAssert(m_outputDataset.GetName().empty()); + CPLAssert(!m_outputDataset.GetDatasetRef()); + + auto poSrcDS = m_inputDataset.GetDatasetRef(); + + auto outDS = std::make_unique(); + outDS->SetDescription(poSrcDS->GetDescription()); + + for (auto &&poSrcLayer : poSrcDS->GetLayers()) + { + auto poLayer = std::make_unique( + *poSrcLayer, m_selectedFields, + /* bStrict = */ !m_ignoreMissingFields); + if (!poLayer->IsOK()) + return false; + outDS->AddLayer(std::move(poLayer)); + } + + m_outputDataset.Set(std::move(outDS)); + + return true; +} + +//! @endcond diff --git a/apps/gdalalg_vector_select.h b/apps/gdalalg_vector_select.h new file mode 100644 index 000000000000..c90859a4f8ea --- /dev/null +++ b/apps/gdalalg_vector_select.h @@ -0,0 +1,63 @@ +/****************************************************************************** + * + * Project: GDAL + * Purpose: "select" step of "vector pipeline" + * Author: Even Rouault + * + ****************************************************************************** + * Copyright (c) 2024, Even Rouault + * + * SPDX-License-Identifier: MIT + ****************************************************************************/ + +#ifndef GDALALG_VECTOR_SELECT_INCLUDED +#define GDALALG_VECTOR_SELECT_INCLUDED + +#include "gdalalg_vector_pipeline.h" + +//! @cond Doxygen_Suppress + +/************************************************************************/ +/* GDALVectorSelectAlgorithm */ +/************************************************************************/ + +class GDALVectorSelectAlgorithm /* non final */ + : public GDALVectorPipelineStepAlgorithm +{ + public: + static constexpr const char *NAME = "select"; + static constexpr const char *DESCRIPTION = + "Select a subset of fields from a vector dataset."; + static constexpr const char *HELP_URL = "/programs/gdal_vector_select.html"; + + static std::vector GetAliases() + { + return {}; + } + + explicit GDALVectorSelectAlgorithm(bool standaloneStep = false); + + private: + bool RunStep(GDALProgressFunc pfnProgress, void *pProgressData) override; + + std::vector m_selectedFields{}; + bool m_ignoreMissingFields = false; +}; + +/************************************************************************/ +/* GDALVectorSelectAlgorithmStandalone */ +/************************************************************************/ + +class GDALVectorSelectAlgorithmStandalone final + : public GDALVectorSelectAlgorithm +{ + public: + GDALVectorSelectAlgorithmStandalone() + : GDALVectorSelectAlgorithm(/* standaloneStep = */ true) + { + } +}; + +//! @endcond + +#endif /* GDALALG_VECTOR_SELECT_INCLUDED */ diff --git a/autotest/utilities/test_gdalalg_vector_filter.py b/autotest/utilities/test_gdalalg_vector_filter.py index 7a067a69aa60..c1f7a4912a0f 100755 --- a/autotest/utilities/test_gdalalg_vector_filter.py +++ b/autotest/utilities/test_gdalalg_vector_filter.py @@ -13,7 +13,7 @@ import pytest -from osgeo import gdal, ogr +from osgeo import gdal def get_filter_alg(): @@ -86,91 +86,3 @@ def test_gdalalg_vector_filter_where_error(tmp_vsimem): filter_alg.ParseRunAndFinalize( ["--where=invalid", "../ogr/data/poly.shp", out_filename] ) - - -def test_gdalalg_vector_fields(): - - filter_alg = get_filter_alg() - assert filter_alg.ParseCommandLineArguments( - [ - "--fields=EAS_ID,_ogr_geometry_", - "--of=stream", - "../ogr/data/poly.shp", - "streamed_output", - ] - ) - assert filter_alg.Run() - - ds = filter_alg.GetArg("output").Get().GetDataset() - - lyr = ds.GetLayer(0) - assert lyr.GetLayerDefn().GetFieldCount() == 1 - assert lyr.GetLayerDefn().GetGeomFieldCount() == 1 - assert lyr.GetFeatureCount() == 10 - f = lyr.GetNextFeature() - assert f["EAS_ID"] == 168 - assert f.GetGeometryRef() is not None - lyr.ResetReading() - assert len([f for f in lyr]) == 10 - f = lyr.GetFeature(0) - assert f["EAS_ID"] == 168 - with pytest.raises(Exception): - lyr.GetFeature(10) - assert lyr.TestCapability(ogr.OLCFastFeatureCount) == 1 - assert lyr.TestCapability(ogr.OLCRandomWrite) == 0 - assert lyr.GetExtent() == (478315.53125, 481645.3125, 4762880.5, 4765610.5) - assert lyr.GetExtent(0) == (478315.53125, 481645.3125, 4762880.5, 4765610.5) - - # Test attribute filter on result layer - lyr.SetAttributeFilter("EAS_ID = 170") - lyr.ResetReading() - f = lyr.GetNextFeature() - assert f["EAS_ID"] == 170 - assert lyr.GetNextFeature() is None - assert lyr.TestCapability(ogr.OLCFastFeatureCount) == 0 - assert lyr.GetFeatureCount() == 1 - lyr.SetAttributeFilter(None) - - # Test spatial filter on reslt layer - lyr.SetSpatialFilterRect(-1, -1, -1, -1) - lyr.ResetReading() - assert lyr.GetNextFeature() is None - assert lyr.TestCapability(ogr.OLCFastFeatureCount) == 0 - assert lyr.GetFeatureCount() == 0 - lyr.SetSpatialFilter(None) - - -def test_gdalalg_vector_fields_geom_named(tmp_vsimem): - - src_ds = gdal.GetDriverByName("Memory").Create("", 0, 0, 0, gdal.GDT_Unknown) - src_lyr = src_ds.CreateLayer("test", geom_type=ogr.wkbNone, srs=None) - src_lyr.CreateGeomField(ogr.GeomFieldDefn("geom_field")) - src_lyr.CreateGeomField(ogr.GeomFieldDefn("geom_field2")) - out_filename = str(tmp_vsimem / "out.shp") - - filter_alg = get_filter_alg() - filter_alg.GetArg("input").Get().SetDataset(src_ds) - filter_alg.GetArg("output").Set(out_filename) - assert filter_alg.ParseCommandLineArguments( - ["--of", "Memory", "--fields=geom_field2"] - ) - assert filter_alg.Run() - - ds = filter_alg.GetArg("output").Get().GetDataset() - lyr = ds.GetLayer(0) - assert lyr.GetLayerDefn().GetGeomFieldCount() == 1 - assert lyr.GetLayerDefn().GetGeomFieldDefn(0).GetName() == "geom_field2" - - -def test_gdalalg_vector_fields_non_existing(tmp_vsimem): - - out_filename = str(tmp_vsimem / "out.shp") - - filter_alg = get_filter_alg() - with pytest.raises( - Exception, - match="Field 'i_do_not_exist' does not exist in layer 'poly'.", - ): - filter_alg.ParseRunAndFinalize( - ["--fields=EAS_ID,i_do_not_exist", "../ogr/data/poly.shp", out_filename] - ) diff --git a/autotest/utilities/test_gdalalg_vector_select.py b/autotest/utilities/test_gdalalg_vector_select.py new file mode 100755 index 000000000000..1c21941ead63 --- /dev/null +++ b/autotest/utilities/test_gdalalg_vector_select.py @@ -0,0 +1,129 @@ +#!/usr/bin/env pytest +# -*- coding: utf-8 -*- +############################################################################### +# Project: GDAL/OGR Test Suite +# Purpose: 'gdal vector select' testing +# Author: Even Rouault +# +############################################################################### +# Copyright (c) 2025, Even Rouault +# +# SPDX-License-Identifier: MIT +############################################################################### + +import gdaltest +import pytest + +from osgeo import gdal, ogr + + +def get_select_alg(): + return gdal.GetGlobalAlgorithmRegistry()["vector"]["select"] + + +def test_gdalalg_vector_select_fields(): + + select_alg = get_select_alg() + assert select_alg.ParseCommandLineArguments( + [ + "--fields=EAS_ID,_ogr_geometry_", + "--of=stream", + "../ogr/data/poly.shp", + "streamed_output", + ] + ) + assert select_alg.Run() + + ds = select_alg.GetArg("output").Get().GetDataset() + + lyr = ds.GetLayer(0) + assert lyr.GetLayerDefn().GetFieldCount() == 1 + assert lyr.GetLayerDefn().GetGeomFieldCount() == 1 + assert lyr.GetFeatureCount() == 10 + f = lyr.GetNextFeature() + assert f["EAS_ID"] == 168 + assert f.GetGeometryRef() is not None + lyr.ResetReading() + assert len([f for f in lyr]) == 10 + f = lyr.GetFeature(0) + assert f["EAS_ID"] == 168 + with pytest.raises(Exception): + lyr.GetFeature(10) + assert lyr.TestCapability(ogr.OLCFastFeatureCount) == 1 + assert lyr.TestCapability(ogr.OLCRandomWrite) == 0 + assert lyr.GetExtent() == (478315.53125, 481645.3125, 4762880.5, 4765610.5) + assert lyr.GetExtent(0) == (478315.53125, 481645.3125, 4762880.5, 4765610.5) + + # Test attribute select on result layer + lyr.SetAttributeFilter("EAS_ID = 170") + lyr.ResetReading() + f = lyr.GetNextFeature() + assert f["EAS_ID"] == 170 + assert lyr.GetNextFeature() is None + assert lyr.TestCapability(ogr.OLCFastFeatureCount) == 0 + assert lyr.GetFeatureCount() == 1 + lyr.SetAttributeFilter(None) + + # Test spatial select on result layer + lyr.SetSpatialFilterRect(-1, -1, -1, -1) + lyr.ResetReading() + assert lyr.GetNextFeature() is None + assert lyr.TestCapability(ogr.OLCFastFeatureCount) == 0 + assert lyr.GetFeatureCount() == 0 + lyr.SetSpatialFilter(None) + + +def test_gdalalg_vector_select_fields_geom_named(tmp_vsimem): + + src_ds = gdal.GetDriverByName("Memory").Create("", 0, 0, 0, gdal.GDT_Unknown) + src_lyr = src_ds.CreateLayer("test", geom_type=ogr.wkbNone, srs=None) + src_lyr.CreateGeomField(ogr.GeomFieldDefn("geom_field")) + src_lyr.CreateGeomField(ogr.GeomFieldDefn("geom_field2")) + out_filename = str(tmp_vsimem / "out.shp") + + select_alg = get_select_alg() + select_alg["input"] = src_ds + select_alg["output"] = out_filename + select_alg["fields"] = ["geom_field2"] + assert select_alg.ParseCommandLineArguments(["--of", "Memory"]) + assert select_alg.Run() + + ds = select_alg["output"].GetDataset() + lyr = ds.GetLayer(0) + assert lyr.GetLayerDefn().GetGeomFieldCount() == 1 + assert lyr.GetLayerDefn().GetGeomFieldDefn(0).GetName() == "geom_field2" + + +def test_gdalalg_vector_select_fields_non_existing(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out.shp") + + select_alg = get_select_alg() + with pytest.raises( + Exception, + match="Field 'i_do_not_exist' does not exist in layer 'poly'. You may specify --ignore-missing-fields to skip it", + ): + select_alg.ParseRunAndFinalize( + ["--fields=EAS_ID,i_do_not_exist", "../ogr/data/poly.shp", out_filename] + ) + + +def test_gdalalg_vector_select_fields_non_existing_ignore_missing_fields(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out.shp") + + select_alg = get_select_alg() + with gdaltest.error_handler(): + assert select_alg.ParseRunAndFinalize( + [ + "--ignore-missing-fields", + "--fields=EAS_ID,_ogr_geometry_,i_do_not_exist", + "../ogr/data/poly.shp", + out_filename, + ] + ) + + with gdal.OpenEx(out_filename) as ds: + lyr = ds.GetLayer(0) + assert lyr.GetLayerDefn().GetFieldCount() == 1 + assert lyr.GetLayerDefn().GetGeomFieldCount() == 1 diff --git a/doc/source/conf.py b/doc/source/conf.py index 281809a1987f..0ef43b9f837b 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -327,6 +327,13 @@ [author_evenr], 1, ), + ( + "programs/gdal_vector_select", + "gdal-vector-select", + "Select a subset of fields from a vector dataset", + [author_evenr], + 1, + ), ( "programs/gdal_vector_sql", "gdal-vector-sql", diff --git a/doc/source/programs/gdal_vector.rst b/doc/source/programs/gdal_vector.rst index c6b766f7f9b6..a104263981aa 100644 --- a/doc/source/programs/gdal_vector.rst +++ b/doc/source/programs/gdal_vector.rst @@ -25,6 +25,7 @@ Synopsis - info: Return information on a vector dataset. - pipeline: Process a vector dataset. - reproject: Reproject a vector dataset. + - select: Select a subset of fields from a vector dataset. - sql: Apply SQL statement(s) to a dataset. Available sub-commands @@ -35,6 +36,7 @@ Available sub-commands - :ref:`gdal_vector_filter_subcommand` - :ref:`gdal_vector_info_subcommand` - :ref:`gdal_vector_pipeline_subcommand` +- :ref:`gdal_vector_select_subcommand` - :ref:`gdal_vector_sql_subcommand` Examples diff --git a/doc/source/programs/gdal_vector_filter.rst b/doc/source/programs/gdal_vector_filter.rst index 885b88921df7..f8bac444d42f 100644 --- a/doc/source/programs/gdal_vector_filter.rst +++ b/doc/source/programs/gdal_vector_filter.rst @@ -56,7 +56,7 @@ Description ----------- :program:`gdal vector filter` can be used to filter a vector dataset from -their spatial extent, a SQL WHERE clause or a subset of fields. +their spatial extent or a SQL WHERE clause. ``filter`` can also be used as a step of :ref:`gdal_vector_pipeline_subcommand`. @@ -82,30 +82,6 @@ Standard options Attribute query (like SQL WHERE). -.. option:: --fields - - Comma-separated list of fields from input layer to copy to the new layer. - - Field names with spaces, commas or double-quote - should be surrounded with a starting and ending double-quote character, and - double-quote characters in a field name should be escaped with backslash. - - Depending on the shell used, this might require further quoting. For example, - to select ``regular_field``, ``a_field_with space, and comma`` and - ``a field with " double quote`` with a Unix shell: - - .. code-block:: bash - - --fields "regular_field,\"a_field_with space, and comma\",\"a field with \\\" double quote\"" - - A field is only selected once, even if mentioned several times in the list. - - Geometry fields can also be specified in the list. If the source layer has - no explicit name for the geometry field, ``_ogr_geometry_`` must be used to - select the unique geometry field. - - Specifying a non-existing source field name results in an error. - Advanced options ++++++++++++++++ @@ -123,10 +99,3 @@ Examples .. code-block:: bash $ gdal vector filter --bbox=2,49,3,50 in.gpkg out.gpkg --overwrite - -.. example:: - :title: Select the EAS_ID field and the geometry field from a Shapefile - - .. code-block:: bash - - $ gdal vector filter --fields=EAS_ID,_ogr_geometry_ in.shp out.gpkg --overwrite diff --git a/doc/source/programs/gdal_vector_pipeline.rst b/doc/source/programs/gdal_vector_pipeline.rst index e5cfc22bf3c4..ce0128afceba 100644 --- a/doc/source/programs/gdal_vector_pipeline.rst +++ b/doc/source/programs/gdal_vector_pipeline.rst @@ -99,6 +99,24 @@ Details for options can be found in :ref:`gdal_vector_filter_subcommand`. -s, --src-crs Source CRS -d, --dst-crs Destination CRS [required] + +* select [OPTIONS] + +.. code-block:: + + Select a subset of fields from a vector dataset. + + Positional arguments: + --fields Selected fields [may be repeated] [required] + + Options: + --ignore-missing-fields Ignore missing fields + + + +Details for options can be found in :ref:`gdal_vector_select_subcommand`. + + * sql [OPTIONS] .. code-block:: diff --git a/doc/source/programs/gdal_vector_select.rst b/doc/source/programs/gdal_vector_select.rst new file mode 100644 index 000000000000..1dd2f7703328 --- /dev/null +++ b/doc/source/programs/gdal_vector_select.rst @@ -0,0 +1,116 @@ +.. _gdal_vector_select_subcommand: + +================================================================================ +"gdal vector select" sub-command +================================================================================ + +.. versionadded:: 3.11 + +.. only:: html + + Select a subset of fields from a vector dataset. + +.. Index:: gdal vector select + +Synopsis +-------- + +.. code-block:: + + Usage: gdal vector select [OPTIONS] + + Positional arguments: + -i, --input Input vector dataset [required] + -o, --output Output vector dataset [required] + --fields Selected fields [may be repeated] [required] + + Common Options: + -h, --help Display help message and exit + --version Display GDAL version and exit + --json-usage Display usage as JSON document and exit + --drivers Display driver list as JSON document and exit + --config = Configuration option [may be repeated] + --progress Display progress bar + + Options: + -l, --layer, --input-layer Input layer name(s) [may be repeated] + -f, --of, --format, --output-format Output format ("stream" allowed) + --co, --creation-option = Creation option [may be repeated] + --lco, --layer-creation-option = Layer creation option [may be repeated] + --overwrite Whether overwriting existing output is allowed + --update Whether to open existing dataset in update mode + --overwrite-layer Whether overwriting existing layer is allowed + --append Whether appending to existing layer is allowed + --output-layer Output layer name + --fields Selected fields [may be repeated] + --ignore-missing-fields Ignore missing fields + + Advanced Options: + --if, --input-format Input formats [may be repeated] + --oo, --open-option Open options [may be repeated] + + +Description +----------- + +:program:`gdal vector select` can be used to select a subset of fields. + +``select`` can also be used as a step of :ref:`gdal_vector_pipeline_subcommand`. + +Standard options +++++++++++++++++ + +.. include:: gdal_options/of_vector.rst + +.. include:: gdal_options/co_vector.rst + +.. include:: gdal_options/overwrite.rst + +.. option:: --fields + + Comma-separated list of fields from input layer to copy to the new layer. + + Field names with spaces, commas or double-quote + should be surrounded with a starting and ending double-quote character, and + double-quote characters in a field name should be escaped with backslash. + + Depending on the shell used, this might require further quoting. For example, + to select ``regular_field``, ``a_field_with space, and comma`` and + ``a field with " double quote`` with a Unix shell: + + .. code-block:: bash + + --fields "regular_field,\"a_field_with space, and comma\",\"a field with \\\" double quote\"" + + A field is only selected once, even if mentioned several times in the list. + + Geometry fields can also be specified in the list. If the source layer has + no explicit name for the geometry field, ``_ogr_geometry_`` must be used to + select the unique geometry field. + + Specifying a non-existing source field name results in an error. + +.. option:: --ignore-missing-fields + + By default, if a field specified by :option:`--fields` does not exist in the input + layer(s), an error is emitted and the processing is stopped. + When specifying :option:`--ignore-missing-fields`, only a warning is + emitted and the non existing fields are just ignored. + + +Advanced options +++++++++++++++++ + +.. include:: gdal_options/oo.rst + +.. include:: gdal_options/if.rst + +Examples +-------- + +.. example:: + :title: Select the EAS_ID field and the geometry field from a Shapefile + + .. code-block:: bash + + $ gdal vector select in.shp out.gpkg "EAS_ID,_ogr_geometry_" --overwrite diff --git a/doc/source/programs/index.rst b/doc/source/programs/index.rst index cbedd8eb4591..23afc8cb9aab 100644 --- a/doc/source/programs/index.rst +++ b/doc/source/programs/index.rst @@ -48,6 +48,7 @@ single :program:`gdal` program that accepts commands and subcommands. gdal_vector_convert gdal_vector_filter gdal_vector_pipeline + gdal_vector_select gdal_vector_sql .. only:: html @@ -75,6 +76,7 @@ single :program:`gdal` program that accepts commands and subcommands. - :ref:`gdal_vector_filter_subcommand`: Filter a vector dataset - :ref:`gdal_vector_convert_subcommand`: Convert a vector dataset - :ref:`gdal_vector_pipeline_subcommand`: Process a vector dataset + - :ref:`gdal_vector_select_subcommand`: - :ref:`gdal_vector_sql_subcommand`: Apply SQL statement(s) to a dataset diff --git a/doc/source/programs/migration_guide_to_gdal_cli.rst b/doc/source/programs/migration_guide_to_gdal_cli.rst index 4f47077db1a2..3f2923806ea4 100644 --- a/doc/source/programs/migration_guide_to_gdal_cli.rst +++ b/doc/source/programs/migration_guide_to_gdal_cli.rst @@ -192,4 +192,4 @@ Vector commands ==> - gdal vector filter --where "country='Greenland'" --fields population,_ogr_geometry_ in.shp out.gpkg + gdal vector pipeline ! read in.shp ! filter --where "country='Greenland'" ! select --fields population,_ogr_geometry_ ! write out.gpkg From 32d4ef0c70d753ab2e0df07c6563e841810a1859 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 11 Feb 2025 11:45:00 +0100 Subject: [PATCH 8/8] 'gdal vector select': add a --exclude switch --- apps/gdalalg_vector_select.cpp | 103 ++++++++++++++---- apps/gdalalg_vector_select.h | 3 +- .../utilities/test_gdalalg_vector_select.py | 85 +++++++++++++++ doc/source/programs/gdal_vector_pipeline.rst | 6 +- doc/source/programs/gdal_vector_select.rst | 22 +++- 5 files changed, 192 insertions(+), 27 deletions(-) diff --git a/apps/gdalalg_vector_select.cpp b/apps/gdalalg_vector_select.cpp index bdf50024115d..73dec61fc0e1 100644 --- a/apps/gdalalg_vector_select.cpp +++ b/apps/gdalalg_vector_select.cpp @@ -32,11 +32,15 @@ GDALVectorSelectAlgorithm::GDALVectorSelectAlgorithm(bool standaloneStep) : GDALVectorPipelineStepAlgorithm(NAME, DESCRIPTION, HELP_URL, standaloneStep) { - AddArg("fields", 0, _("Selected fields"), &m_selectedFields) + AddArg("fields", 0, _("Fields to select (or exclude if --exclude)"), + &m_fields) .SetPositional() .SetRequired(); + AddArg("exclude", 0, _("Exclude specified fields"), &m_exclude) + .SetMutualExclusionGroup("exclude-ignore"); AddArg("ignore-missing-fields", 0, _("Ignore missing fields"), - &m_ignoreMissingFields); + &m_ignoreMissingFields) + .SetMutualExclusionGroup("exclude-ignore"); } namespace @@ -51,7 +55,6 @@ class GDALVectorSelectAlgorithmLayer final public OGRGetNextFeatureThroughRaw { private: - bool m_bIsOK = true; OGRLayer &m_oSrcLayer; OGRFeatureDefn *const m_poFeatureDefn = nullptr; std::vector m_anMapSrcFieldsToDstFields{}; @@ -81,16 +84,24 @@ class GDALVectorSelectAlgorithmLayer final DEFINE_GET_NEXT_FEATURE_THROUGH_RAW(GDALVectorSelectAlgorithmLayer) public: - GDALVectorSelectAlgorithmLayer( - OGRLayer &oSrcLayer, const std::vector &selectedFields, - bool bStrict) + explicit GDALVectorSelectAlgorithmLayer(OGRLayer &oSrcLayer) : m_oSrcLayer(oSrcLayer), m_poFeatureDefn(new OGRFeatureDefn(oSrcLayer.GetName())) { SetDescription(oSrcLayer.GetDescription()); m_poFeatureDefn->SetGeomType(wkbNone); m_poFeatureDefn->Reference(); + } + + ~GDALVectorSelectAlgorithmLayer() override + { + if (m_poFeatureDefn) + m_poFeatureDefn->Dereference(); + } + bool IncludeFields(const std::vector &selectedFields, + bool bStrict) + { std::set oSetSelFields; std::set oSetSelFieldsUC; for (const std::string &osFieldName : selectedFields) @@ -101,7 +112,7 @@ class GDALVectorSelectAlgorithmLayer final std::set oSetUsedSetFieldsUC; - const auto poSrcLayerDefn = oSrcLayer.GetLayerDefn(); + const auto poSrcLayerDefn = m_oSrcLayer.GetLayerDefn(); for (int i = 0; i < poSrcLayerDefn->GetFieldCount(); ++i) { const auto poSrcFieldDefn = poSrcLayerDefn->GetFieldDefn(i); @@ -157,26 +168,70 @@ class GDALVectorSelectAlgorithmLayer final { CPLError(bStrict ? CE_Failure : CE_Warning, CPLE_AppDefined, "Field '%s' does not exist in layer '%s'.%s", - osName.c_str(), oSrcLayer.GetDescription(), + osName.c_str(), m_oSrcLayer.GetDescription(), bStrict ? " You may specify " "--ignore-missing-fields to skip it" : " It will be ignored"); if (bStrict) - m_bIsOK = false; + return false; } } } - } - ~GDALVectorSelectAlgorithmLayer() override - { - if (m_poFeatureDefn) - m_poFeatureDefn->Dereference(); + return true; } - bool IsOK() const + void ExcludeFields(const std::vector &fields) { - return m_bIsOK; + std::set oSetSelFields; + std::set oSetSelFieldsUC; + for (const std::string &osFieldName : fields) + { + oSetSelFields.insert(osFieldName); + oSetSelFieldsUC.insert(CPLString(osFieldName).toupper()); + } + + const auto poSrcLayerDefn = m_oSrcLayer.GetLayerDefn(); + for (int i = 0; i < poSrcLayerDefn->GetFieldCount(); ++i) + { + const auto poSrcFieldDefn = poSrcLayerDefn->GetFieldDefn(i); + auto oIter = oSetSelFieldsUC.find( + CPLString(poSrcFieldDefn->GetNameRef()).toupper()); + if (oIter != oSetSelFieldsUC.end()) + { + m_anMapSrcFieldsToDstFields.push_back(-1); + } + else + { + m_anMapSrcFieldsToDstFields.push_back( + m_poFeatureDefn->GetFieldCount()); + OGRFieldDefn oDstFieldDefn(*poSrcFieldDefn); + m_poFeatureDefn->AddFieldDefn(&oDstFieldDefn); + } + } + + if (oSetSelFieldsUC.find( + CPLString(OGR_GEOMETRY_DEFAULT_NON_EMPTY_NAME).toupper()) != + oSetSelFieldsUC.end() && + poSrcLayerDefn->GetGeomFieldCount() == 1) + { + // exclude default geometry field + } + else + { + for (int i = 0; i < poSrcLayerDefn->GetGeomFieldCount(); ++i) + { + const auto poSrcFieldDefn = poSrcLayerDefn->GetGeomFieldDefn(i); + auto oIter = oSetSelFieldsUC.find( + CPLString(poSrcFieldDefn->GetNameRef()).toupper()); + if (oIter == oSetSelFieldsUC.end()) + { + m_anMapDstGeomFieldsToSrcGeomFields.push_back(i); + OGRGeomFieldDefn oDstFieldDefn(*poSrcFieldDefn); + m_poFeatureDefn->AddGeomFieldDefn(&oDstFieldDefn); + } + } + } } OGRFeatureDefn *GetLayerDefn() override @@ -258,11 +313,17 @@ bool GDALVectorSelectAlgorithm::RunStep(GDALProgressFunc, void *) for (auto &&poSrcLayer : poSrcDS->GetLayers()) { - auto poLayer = std::make_unique( - *poSrcLayer, m_selectedFields, - /* bStrict = */ !m_ignoreMissingFields); - if (!poLayer->IsOK()) - return false; + auto poLayer = + std::make_unique(*poSrcLayer); + if (m_exclude) + { + poLayer->ExcludeFields(m_fields); + } + else + { + if (!poLayer->IncludeFields(m_fields, !m_ignoreMissingFields)) + return false; + } outDS->AddLayer(std::move(poLayer)); } diff --git a/apps/gdalalg_vector_select.h b/apps/gdalalg_vector_select.h index c90859a4f8ea..7da5e4cfd38c 100644 --- a/apps/gdalalg_vector_select.h +++ b/apps/gdalalg_vector_select.h @@ -40,8 +40,9 @@ class GDALVectorSelectAlgorithm /* non final */ private: bool RunStep(GDALProgressFunc pfnProgress, void *pProgressData) override; - std::vector m_selectedFields{}; + std::vector m_fields{}; bool m_ignoreMissingFields = false; + bool m_exclude = false; }; /************************************************************************/ diff --git a/autotest/utilities/test_gdalalg_vector_select.py b/autotest/utilities/test_gdalalg_vector_select.py index 1c21941ead63..4fd882fd399b 100755 --- a/autotest/utilities/test_gdalalg_vector_select.py +++ b/autotest/utilities/test_gdalalg_vector_select.py @@ -127,3 +127,88 @@ def test_gdalalg_vector_select_fields_non_existing_ignore_missing_fields(tmp_vsi lyr = ds.GetLayer(0) assert lyr.GetLayerDefn().GetFieldCount() == 1 assert lyr.GetLayerDefn().GetGeomFieldCount() == 1 + + +def test_gdalalg_vector_select_fields_exclude(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out.shp") + + select_alg = get_select_alg() + assert select_alg.ParseRunAndFinalize( + [ + "--exclude", + "--fields=EAS_ID,i_do_not_exist", + "../ogr/data/poly.shp", + out_filename, + ] + ) + + with gdal.OpenEx(out_filename) as ds: + lyr = ds.GetLayer(0) + lyr_defn = lyr.GetLayerDefn() + assert [ + lyr_defn.GetFieldDefn(i).GetName() for i in range(lyr_defn.GetFieldCount()) + ] == ["AREA", "PRFEDEA"] + assert lyr_defn.GetGeomFieldCount() == 1 + + +def test_gdalalg_vector_select_fields_exclude_ogr_geometry(tmp_vsimem): + + out_filename = str(tmp_vsimem / "out.dbf") + + select_alg = get_select_alg() + assert select_alg.ParseRunAndFinalize( + ["--exclude", "--fields=_ogr_geometry_", "../ogr/data/poly.shp", out_filename] + ) + + with gdal.OpenEx(out_filename) as ds: + lyr = ds.GetLayer(0) + lyr_defn = lyr.GetLayerDefn() + assert [ + lyr_defn.GetFieldDefn(i).GetName() for i in range(lyr_defn.GetFieldCount()) + ] == ["AREA", "EAS_ID", "PRFEDEA"] + assert lyr_defn.GetGeomFieldCount() == 0 + + +@pytest.mark.require_driver("GPKG") +def test_gdalalg_vector_select_fields_exclude_name_geom_fields(tmp_vsimem): + + tmp_filename = str(tmp_vsimem / "tmp.gpkg") + out_filename = str(tmp_vsimem / "out.dbf") + + gdal.VectorTranslate(tmp_filename, "../ogr/data/poly.shp") + + select_alg = get_select_alg() + assert select_alg.ParseRunAndFinalize( + ["--exclude", "--fields=geom", tmp_filename, out_filename] + ) + + with gdal.OpenEx(out_filename) as ds: + lyr = ds.GetLayer(0) + lyr_defn = lyr.GetLayerDefn() + assert [ + lyr_defn.GetFieldDefn(i).GetName() for i in range(lyr_defn.GetFieldCount()) + ] == ["AREA", "EAS_ID", "PRFEDEA"] + assert lyr_defn.GetGeomFieldCount() == 0 + + +@pytest.mark.require_driver("GPKG") +def test_gdalalg_vector_select_fields_exclude_name_geom_fields_not_excluded(tmp_vsimem): + + tmp_filename = str(tmp_vsimem / "tmp.gpkg") + out_filename = str(tmp_vsimem / "out.dbf") + + gdal.VectorTranslate(tmp_filename, "../ogr/data/poly.shp") + + select_alg = get_select_alg() + assert select_alg.ParseRunAndFinalize( + ["--exclude", "--fields=i_do_not_exist", tmp_filename, out_filename] + ) + + with gdal.OpenEx(out_filename) as ds: + lyr = ds.GetLayer(0) + lyr_defn = lyr.GetLayerDefn() + assert [ + lyr_defn.GetFieldDefn(i).GetName() for i in range(lyr_defn.GetFieldCount()) + ] == ["AREA", "EAS_ID", "PRFEDEA"] + assert lyr_defn.GetGeomFieldCount() == 1 diff --git a/doc/source/programs/gdal_vector_pipeline.rst b/doc/source/programs/gdal_vector_pipeline.rst index ce0128afceba..57121485eb9a 100644 --- a/doc/source/programs/gdal_vector_pipeline.rst +++ b/doc/source/programs/gdal_vector_pipeline.rst @@ -107,11 +107,13 @@ Details for options can be found in :ref:`gdal_vector_filter_subcommand`. Select a subset of fields from a vector dataset. Positional arguments: - --fields Selected fields [may be repeated] [required] + --fields Fields to select (or exclude if --exclude) [may be repeated] [required] Options: + --exclude Exclude specified fields + Mutually exclusive with --ignore-missing-fields --ignore-missing-fields Ignore missing fields - + Mutually exclusive with --exclude Details for options can be found in :ref:`gdal_vector_select_subcommand`. diff --git a/doc/source/programs/gdal_vector_select.rst b/doc/source/programs/gdal_vector_select.rst index 1dd2f7703328..5d6f8d5a09b6 100644 --- a/doc/source/programs/gdal_vector_select.rst +++ b/doc/source/programs/gdal_vector_select.rst @@ -22,7 +22,7 @@ Synopsis Positional arguments: -i, --input Input vector dataset [required] -o, --output Output vector dataset [required] - --fields Selected fields [may be repeated] [required] + --fields Fields to select (or exclude if --exclude) [may be repeated] [required] Common Options: -h, --help Display help message and exit @@ -42,8 +42,10 @@ Synopsis --overwrite-layer Whether overwriting existing layer is allowed --append Whether appending to existing layer is allowed --output-layer Output layer name - --fields Selected fields [may be repeated] + --exclude Exclude specified fields + Mutually exclusive with --ignore-missing-fields --ignore-missing-fields Ignore missing fields + Mutually exclusive with --exclude Advanced Options: --if, --input-format Input formats [may be repeated] @@ -68,7 +70,8 @@ Standard options .. option:: --fields - Comma-separated list of fields from input layer to copy to the new layer. + Comma-separated list of fields from input layer to copy to the new layer + (or to exclude if :option:`--exclude` is specified) Field names with spaces, commas or double-quote should be surrounded with a starting and ending double-quote character, and @@ -97,6 +100,11 @@ Standard options When specifying :option:`--ignore-missing-fields`, only a warning is emitted and the non existing fields are just ignored. +.. option:: --exclude + + Modifies the behavior of the algorithm such that all fields are selected, + except the ones mentioned by :option:`--fields`. + Advanced options ++++++++++++++++ @@ -114,3 +122,11 @@ Examples .. code-block:: bash $ gdal vector select in.shp out.gpkg "EAS_ID,_ogr_geometry_" --overwrite + + +.. example:: + :title: Remove sensitive fields from a layer + + .. code-block:: bash + + $ gdal vector select in.shp out.gpkg --exclude "name,surname,address" --overwrite