From 876f7eedac51d55ef02a21e286dce8fda428ae3a Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 9 May 2024 23:28:03 +0200 Subject: [PATCH 001/191] STACIT: add a OVERLAP_STRATEGY opening option to determine how to handle sources fully overlapping others, and taking into accout nodata values ``` - .. oo:: OVERLAP_STRATEGY :choices: REMOVE_IF_NO_NODATA, USE_ALL, USE_MOST_RECENT :default: REMOVE_IF_NO_NODATA :since: 3.9.1 Strategy to use when the ItemCollections contains overlapping items, and that some items are fully covered by other items that are more recent. Starting with GDAL 3.9.1, the ``REMOVE_IF_NO_NODATA`` strategy is applied by default. The STACIT virtual mosaic will omit fully covered items, only if no band declares a nodata value. (Note that the determination whether a band has a nodata value of not is done by opening one of the items, and assuming it is representative of the characteristics of the others in the collection). This strategy can be forced in all cases by selecting the ``USE_MOST_RECENT`` strategy (this was the strategy applied prior to 3.9.1) The ``USE_ALL`` strategy always causes all items to be listed in the virtual mosaic, with the most recent ones being rendered on top of the less recent ones. ``` --- autotest/gdrivers/data/byte_nodata_0.tif | Bin 0 -> 770 bytes .../overlapping_sources_with_nodata.json | 103 ++++++++++++++++++ autotest/gdrivers/stacit.py | 78 ++++++++++++- doc/source/drivers/raster/stacit.rst | 26 ++++- frmts/stacit/stacitdataset.cpp | 26 ++++- 5 files changed, 224 insertions(+), 9 deletions(-) create mode 100644 autotest/gdrivers/data/byte_nodata_0.tif create mode 100644 autotest/gdrivers/data/stacit/overlapping_sources_with_nodata.json diff --git a/autotest/gdrivers/data/byte_nodata_0.tif b/autotest/gdrivers/data/byte_nodata_0.tif new file mode 100644 index 0000000000000000000000000000000000000000..c10d7f2aabd7d0e81d122d0d185b35bc03781b0a GIT binary patch literal 770 zcmZutJ&P1U5Utr=)(aL&&2c&OisK_{DMgvK`2nLGV2nxV3S4+I&6$? z!bUcjaKp=Ff5!ays$qFY4Om@1$4l`JleP!>)OE$N3J2yuND$6Nd_Uw8h8j<4>Wc>ULp=Y4iu^{=@>G}&TU?`xyUYB~4*-fB6e zIZg2?6@5FTnddj}CVjqtxHx(G_(}b+e)aOK{`BGf+xqBeao!itiPgj`ni=C7tK=j| zNm_}l8cfj(S-??LqL_jw!otZ^F|2~il338R>|!FKNn15$%epQlB}teQ%fUxBd|6VA z26X}5mlOrVSrJ+W`JkZ-@MF?jRJDMbZ;mm|$GvhwBNmi&8xaNP`*_ p@Hx4I1Gv>^FdVbBFkF8B*%(Jn*0u>>$+%sYQ_i_F|8YMO_%B#0;otxO literal 0 HcmV?d00001 diff --git a/autotest/gdrivers/data/stacit/overlapping_sources_with_nodata.json b/autotest/gdrivers/data/stacit/overlapping_sources_with_nodata.json new file mode 100644 index 000000000000..6986e6945141 --- /dev/null +++ b/autotest/gdrivers/data/stacit/overlapping_sources_with_nodata.json @@ -0,0 +1,103 @@ +{ + "type": "FeatureCollection", + "stac_version": "1.0.0-beta.2", + "stac_extensions": [], + "features": [ + { + "type": "Feature", + "stac_version": "1.0.0-beta.2", + "stac_extensions": [ + "eo", + "proj" + ], + "id": "byte", + "geometry": null, + "properties": { + "datetime": "2021-07-25T00:00:00Z", + "proj:epsg": 26711, + }, + "collection": "my_collection", + "assets": { + "B01": { + "title": "Band 1 (coastal)", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "roles": [ + "data" + ], + "eo:bands": [ + { + "name": "B01", + "common_name": "coastal", + "center_wavelength": 0.4439, + "full_width_half_max": 0.027 + } + ], + "href": "data/byte_nodata_0.tif", + "proj:bbox": [ + 440720.000, 3750120.000, + 441920.000, 3751320.000 + ], + "proj:transform": [ + 60, + 0, + 440720, + 0, + -60, + 3751320, + 0, + 0, + 1 + ] + } + } + }, + { + "type": "Feature", + "stac_version": "1.0.0-beta.2", + "stac_extensions": [ + "eo", + "proj" + ], + "id": "under", + "geometry": null, + "properties": { + "datetime": "2021-07-19T10:57:30Z", + "proj:epsg": 26711, + }, + "collection": "my_collection", + "assets": { + "B01": { + "title": "Band 1 (coastal)", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "roles": [ + "data" + ], + "eo:bands": [ + { + "name": "B01", + "common_name": "coastal", + "center_wavelength": 0.4439, + "full_width_half_max": 0.027 + } + ], + "href": "data/byte.tif", + "proj:bbox": [ + 440720.000, 3750120.000, + 441920.000, 3751320.000 + ], + "proj:transform": [ + 60, + 0, + 440720, + 0, + -60, + 3751320, + 0, + 0, + 1 + ] + } + } + } + ] +} diff --git a/autotest/gdrivers/stacit.py b/autotest/gdrivers/stacit.py index c0e5054d751c..6b0da1308bf3 100755 --- a/autotest/gdrivers/stacit.py +++ b/autotest/gdrivers/stacit.py @@ -156,7 +156,7 @@ def test_stacit_overlapping_sources(): # Check that the source covered by another one is not listed vrt = ds.GetMetadata("xml:VRT")[0] - placement_vrt = """ + only_one_simple_source = """ Gray data/byte.tif @@ -166,4 +166,78 @@ def test_stacit_overlapping_sources(): """ # print(vrt) - assert placement_vrt in vrt + assert only_one_simple_source in vrt + + ds = gdal.OpenEx( + "data/stacit/overlapping_sources.json", + open_options=["OVERLAP_STRATEGY=REMOVE_IF_NO_NODATA"], + ) + assert ds is not None + vrt = ds.GetMetadata("xml:VRT")[0] + assert only_one_simple_source in vrt + + ds = gdal.OpenEx( + "data/stacit/overlapping_sources.json", + open_options=["OVERLAP_STRATEGY=USE_MOST_RECENT"], + ) + assert ds is not None + vrt = ds.GetMetadata("xml:VRT")[0] + assert only_one_simple_source in vrt + + ds = gdal.OpenEx( + "data/stacit/overlapping_sources.json", + open_options=["OVERLAP_STRATEGY=USE_ALL"], + ) + assert ds is not None + assert len(ds.GetFileList()) == 4 + vrt = ds.GetMetadata("xml:VRT")[0] + + +@pytest.mark.require_geos +def test_stacit_overlapping_sources_with_nodata(): + + ds = gdal.Open("data/stacit/overlapping_sources_with_nodata.json") + assert ds is not None + assert len(ds.GetFileList()) == 3 + vrt = ds.GetMetadata("xml:VRT")[0] + # print(vrt) + two_sources = """ + data/byte.tif + 1 + + + 0 + + + data/byte_nodata_0.tif + 1 + + + 0 + """ + assert two_sources in vrt + + ds = gdal.OpenEx( + "data/stacit/overlapping_sources_with_nodata.json", + open_options=["OVERLAP_STRATEGY=REMOVE_IF_NO_NODATA"], + ) + assert ds is not None + vrt = ds.GetMetadata("xml:VRT")[0] + assert len(ds.GetFileList()) == 3 + assert two_sources in vrt + + ds = gdal.OpenEx( + "data/stacit/overlapping_sources_with_nodata.json", + open_options=["OVERLAP_STRATEGY=USE_MOST_RECENT"], + ) + assert ds is not None + assert len(ds.GetFileList()) == 2 + + ds = gdal.OpenEx( + "data/stacit/overlapping_sources_with_nodata.json", + open_options=["OVERLAP_STRATEGY=USE_ALL"], + ) + assert ds is not None + vrt = ds.GetMetadata("xml:VRT")[0] + assert len(ds.GetFileList()) == 3 + assert two_sources in vrt diff --git a/doc/source/drivers/raster/stacit.rst b/doc/source/drivers/raster/stacit.rst index 65299adde151..0c1e0d012e76 100644 --- a/doc/source/drivers/raster/stacit.rst +++ b/doc/source/drivers/raster/stacit.rst @@ -18,10 +18,6 @@ Thus, translating it into VRT will result in a VRT file that directly references Note that `STAC API ItemCollections `_ are not the same as `STAC Collections `_. STAC API ItemCollections are GeoJSON FeatureCollections enhanced with STAC entities. -Note that when the ItemCollections contains overlapping items, and that some items -are fully covered by other items that are more recent, the STACIT virtual mosaic will -not list those fully covered items not participating to the pixel values of the mosaic. - Open syntax ----------- @@ -67,6 +63,28 @@ The following open options are supported: Strategy to use to determine dataset resolution. +- .. oo:: OVERLAP_STRATEGY + :choices: REMOVE_IF_NO_NODATA, USE_ALL, USE_MOST_RECENT + :default: REMOVE_IF_NO_NODATA + :since: 3.9.1 + + Strategy to use when the ItemCollections contains overlapping items, and + that some items are fully covered by other items that are more recent. + + Starting with GDAL 3.9.1, the ``REMOVE_IF_NO_NODATA`` strategy is applied + by default. The STACIT virtual mosaic will omit fully covered items, + only if no band declares a nodata value. + (Note that the determination whether a band has a nodata value of not is + done by opening one of the items, and assuming it is representative of + the characteristics of the others in the collection). + + This strategy can be forced in all cases by selecting the ``USE_MOST_RECENT`` + strategy (this was the strategy applied prior to 3.9.1) + + The ``USE_ALL`` strategy always causes all items to be listed in the virtual + mosaic, with the most recent ones being rendered on top of the less recent ones. + + Subdatasets ----------- diff --git a/frmts/stacit/stacitdataset.cpp b/frmts/stacit/stacitdataset.cpp index 50374a73ec30..069fd2273575 100644 --- a/frmts/stacit/stacitdataset.cpp +++ b/frmts/stacit/stacitdataset.cpp @@ -550,6 +550,7 @@ bool STACITDataset::SetupDataset( }); // Create VRT bands and add sources + bool bAtLeastOneBandHasNoData = false; for (int i = 0; i < poItemDS->GetRasterCount(); i++) { auto poItemBand = poItemDS->GetRasterBand(i + 1); @@ -559,7 +560,10 @@ bool STACITDataset::SetupDataset( int bHasNoData = FALSE; const double dfNoData = poItemBand->GetNoDataValue(&bHasNoData); if (bHasNoData) + { + bAtLeastOneBandHasNoData = true; poVRTBand->SetNoDataValue(dfNoData); + } const auto eInterp = poItemBand->GetColorInterpretation(); if (eInterp != GCI_Undefined) @@ -627,9 +631,17 @@ bool STACITDataset::SetupDataset( } } - const char *apszOptions[] = {"EMIT_ERROR_IF_GEOS_NOT_AVAILABLE=NO", - nullptr}; - poVRTBand->RemoveCoveredSources(apszOptions); + const char *pszOverlapStrategy = + CSLFetchNameValueDef(poOpenInfo->papszOpenOptions, + "OVERLAP_STRATEGY", "REMOVE_IF_NO_NODATA"); + if ((EQUAL(pszOverlapStrategy, "REMOVE_IF_NO_NODATA") && + !bAtLeastOneBandHasNoData) || + EQUAL(pszOverlapStrategy, "USE_MOST_RECENT")) + { + const char *const apszOptions[] = { + "EMIT_ERROR_IF_GEOS_NOT_AVAILABLE=NO", nullptr}; + poVRTBand->RemoveCoveredSources(apszOptions); + } } return true; } @@ -940,6 +952,14 @@ void GDALRegister_STACIT() " HIGHEST" " LOWEST" " " + " " ""); poDriver->pfnOpen = STACITDataset::OpenStatic; From 5b28d32de4abd0aa68ee2d0aea334b980457b4a2 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 15 May 2024 00:45:27 +0200 Subject: [PATCH 002/191] GDALNoDataMaskBand::IRasterIO(): fix crash on memory allocation failure The method had a fallback path when allocating a temporary buffer of the size of the one passed ot the method, that intended to use a less memory intensive block-based approach, but this was buggy. Fix this, and add testing for the fallback. Also, when attempting the fallback, emits a CE_Warning instead of a CE_Failure. Fixes https://github.com/rasterio/rasterio/discussions/3028 --- autotest/gcore/mask.py | 55 ++++++++++++++++----- gcore/gdalnodatamaskband.cpp | 94 ++++++++++++++++++++++++++---------- 2 files changed, 110 insertions(+), 39 deletions(-) diff --git a/autotest/gcore/mask.py b/autotest/gcore/mask.py index 2ff7fd5073cb..180521e11b65 100755 --- a/autotest/gcore/mask.py +++ b/autotest/gcore/mask.py @@ -996,7 +996,10 @@ def test_mask_27(): @pytest.mark.parametrize("dt", [gdal.GDT_Byte, gdal.GDT_Int64, gdal.GDT_UInt64]) -def test_mask_setting_nodata(dt): +@pytest.mark.parametrize( + "GDAL_SIMUL_MEM_ALLOC_FAILURE_NODATA_MASK_BAND", [None, "YES", "ALWAYS"] +) +def test_mask_setting_nodata(dt, GDAL_SIMUL_MEM_ALLOC_FAILURE_NODATA_MASK_BAND): def set_nodata_value(ds, val): if dt == gdal.GDT_Byte: ds.GetRasterBand(1).SetNoDataValue(val) @@ -1005,15 +1008,41 @@ def set_nodata_value(ds, val): else: ds.GetRasterBand(1).SetNoDataValueAsUInt64(val) - ds = gdal.GetDriverByName("MEM").Create("", 1, 1, 1, dt) - assert ds.GetRasterBand(1).GetMaskBand().ReadRaster() == struct.pack("B", 255) - assert ds.GetRasterBand(1).GetMaskBand().ReadRaster() == struct.pack("B", 255) - set_nodata_value(ds, 0) - assert ds.GetRasterBand(1).GetMaskBand().ReadRaster() == struct.pack("B", 0) - assert ds.GetRasterBand(1).GetMaskBand().ReadRaster() == struct.pack("B", 0) - set_nodata_value(ds, 1) - assert ds.GetRasterBand(1).GetMaskBand().ReadRaster() == struct.pack("B", 255) - set_nodata_value(ds, 0) - assert ds.GetRasterBand(1).GetMaskBand().ReadRaster() == struct.pack("B", 0) - ds.GetRasterBand(1).DeleteNoDataValue() - assert ds.GetRasterBand(1).GetMaskBand().ReadRaster() == struct.pack("B", 255) + def test(): + ds = gdal.GetDriverByName("MEM").Create("__debug__", 1, 1, 1, dt) + assert ds.GetRasterBand(1).GetMaskBand().ReadRaster() == struct.pack("B", 255) + assert ds.GetRasterBand(1).GetMaskBand().ReadRaster() == struct.pack("B", 255) + set_nodata_value(ds, 0) + got = ds.GetRasterBand(1).GetMaskBand().ReadRaster() + if ( + GDAL_SIMUL_MEM_ALLOC_FAILURE_NODATA_MASK_BAND == "ALWAYS" + and dt != gdal.GDT_Byte + ): + assert got is None + assert gdal.GetLastErrorType() == gdal.CE_Failure + else: + if ( + GDAL_SIMUL_MEM_ALLOC_FAILURE_NODATA_MASK_BAND == "YES" + and dt != gdal.GDT_Byte + ): + assert gdal.GetLastErrorType() == gdal.CE_Warning + assert got == struct.pack("B", 0) + assert ds.GetRasterBand(1).GetMaskBand().ReadRaster() == struct.pack("B", 0) + set_nodata_value(ds, 1) + assert ds.GetRasterBand(1).GetMaskBand().ReadRaster() == struct.pack( + "B", 255 + ) + set_nodata_value(ds, 0) + assert ds.GetRasterBand(1).GetMaskBand().ReadRaster() == struct.pack("B", 0) + + ds.GetRasterBand(1).DeleteNoDataValue() + assert ds.GetRasterBand(1).GetMaskBand().ReadRaster() == struct.pack("B", 255) + + if GDAL_SIMUL_MEM_ALLOC_FAILURE_NODATA_MASK_BAND: + with gdal.quiet_errors(), gdal.config_option( + "GDAL_SIMUL_MEM_ALLOC_FAILURE_NODATA_MASK_BAND", + GDAL_SIMUL_MEM_ALLOC_FAILURE_NODATA_MASK_BAND, + ): + test() + else: + test() diff --git a/gcore/gdalnodatamaskband.cpp b/gcore/gdalnodatamaskband.cpp index 635f936e7e4d..135679647b25 100644 --- a/gcore/gdalnodatamaskband.cpp +++ b/gcore/gdalnodatamaskband.cpp @@ -33,6 +33,7 @@ #include #include +#include #include "cpl_conv.h" #include "cpl_error.h" @@ -166,7 +167,6 @@ bool GDALNoDataMaskBand::IsNoDataInRange(double dfNoDataValue, { return GDALIsValueInRange(dfNoDataValue); } - case GDT_Int32: { return GDALIsValueInRange(dfNoDataValue); @@ -279,19 +279,65 @@ CPLErr GDALNoDataMaskBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, return CE_None; } - if (eBufType == GDT_Byte) + const auto AllocTempBufferOrFallback = + [this, eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize, + nBufYSize, eBufType, nPixelSpace, nLineSpace, + psExtraArg](int nWrkDTSize) -> std::pair { - const int nWrkDTSize = GDALGetDataTypeSizeBytes(eWrkDT); - void *pTemp = VSI_MALLOC3_VERBOSE(nWrkDTSize, nBufXSize, nBufYSize); - if (pTemp == nullptr) + auto poParentDS = m_poParent->GetDataset(); + // Check if we must simulate a memory allocation failure + // Before checking the env variable, which is slightly expensive, + // check first for a special dataset name, which is a cheap test. + const char *pszOptVal = + poParentDS && strcmp(poParentDS->GetDescription(), "__debug__") == 0 + ? CPLGetConfigOption( + "GDAL_SIMUL_MEM_ALLOC_FAILURE_NODATA_MASK_BAND", "NO") + : "NO"; + const bool bSimulMemAllocFailure = + EQUAL(pszOptVal, "ALWAYS") || + (CPLTestBool(pszOptVal) && + GDALMajorObject::GetMetadataItem(__func__, "__INTERNAL__") == + nullptr); + void *pTemp = nullptr; + if (!bSimulMemAllocFailure) + { + CPLErrorStateBackuper oErrorStateBackuper(CPLQuietErrorHandler); + pTemp = VSI_MALLOC3_VERBOSE(nWrkDTSize, nBufXSize, nBufYSize); + } + if (!pTemp) { - return GDALRasterBand::IRasterIO( - eRWFlag, nXOff, nYOff, nXSize, nYSize, pTemp, nBufXSize, - nBufYSize, eWrkDT, nWrkDTSize, - static_cast(nBufXSize) * nWrkDTSize, psExtraArg); + const bool bAllocHasAlreadyFailed = + GDALMajorObject::GetMetadataItem(__func__, "__INTERNAL__") != + nullptr; + CPLError(bAllocHasAlreadyFailed ? CE_Failure : CE_Warning, + CPLE_OutOfMemory, + "GDALNoDataMaskBand::IRasterIO(): cannot allocate %d x %d " + "x %d bytes%s", + nBufXSize, nBufYSize, nWrkDTSize, + bAllocHasAlreadyFailed + ? "" + : ". Falling back to block-based approach"); + if (bAllocHasAlreadyFailed) + return std::pair(CE_Failure, nullptr); + // Sets a metadata item to prevent potential infinite recursion + GDALMajorObject::SetMetadataItem(__func__, "IN", "__INTERNAL__"); + const CPLErr eErr = GDALRasterBand::IRasterIO( + eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize, + nBufYSize, eBufType, nPixelSpace, nLineSpace, psExtraArg); + GDALMajorObject::SetMetadataItem(__func__, nullptr, "__INTERNAL__"); + return std::pair(eErr, nullptr); } + return std::pair(CE_None, pTemp); + }; - const CPLErr eErr = m_poParent->RasterIO( + if (eBufType == GDT_Byte) + { + const int nWrkDTSize = GDALGetDataTypeSizeBytes(eWrkDT); + auto [eErr, pTemp] = AllocTempBufferOrFallback(nWrkDTSize); + if (!pTemp) + return eErr; + + eErr = m_poParent->RasterIO( GF_Read, nXOff, nYOff, nXSize, nYSize, pTemp, nBufXSize, nBufYSize, eWrkDT, nWrkDTSize, static_cast(nBufXSize) * nWrkDTSize, psExtraArg); @@ -453,30 +499,26 @@ CPLErr GDALNoDataMaskBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, // Output buffer is non-Byte. Ask for Byte and expand to user requested // type - GByte *pabyBuf = - static_cast(VSI_MALLOC2_VERBOSE(nBufXSize, nBufYSize)); - if (pabyBuf == nullptr) - { - return GDALRasterBand::IRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, - pData, nBufXSize, nBufYSize, eBufType, - nPixelSpace, nLineSpace, psExtraArg); - } - const CPLErr eErr = - IRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pabyBuf, nBufXSize, - nBufYSize, GDT_Byte, 1, nBufXSize, psExtraArg); + auto [eErr, pTemp] = AllocTempBufferOrFallback(sizeof(GByte)); + if (!pTemp) + return eErr; + + eErr = IRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pTemp, nBufXSize, + nBufYSize, GDT_Byte, 1, nBufXSize, psExtraArg); if (eErr != CE_None) { - VSIFree(pabyBuf); + VSIFree(pTemp); return eErr; } for (int iY = 0; iY < nBufYSize; iY++) { - GDALCopyWords(pabyBuf + static_cast(iY) * nBufXSize, GDT_Byte, - 1, static_cast(pData) + iY * nLineSpace, - eBufType, static_cast(nPixelSpace), nBufXSize); + GDALCopyWords( + static_cast(pTemp) + static_cast(iY) * nBufXSize, + GDT_Byte, 1, static_cast(pData) + iY * nLineSpace, + eBufType, static_cast(nPixelSpace), nBufXSize); } - VSIFree(pabyBuf); + VSIFree(pTemp); return CE_None; } From f1764a1488a8d6dd886eb1566305bdc0b5f1fe84 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 15 May 2024 03:08:42 +0200 Subject: [PATCH 003/191] GetNextArrowArray() generic implementation: avoid calling VSI_MALLOC_ALIGNED_AUTO_VERBOSE() with a zero size --- ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp | 39 ++++++++++++----------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp b/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp index 23a2e5ef7c8a..4717db14cdd0 100644 --- a/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp +++ b/ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp @@ -889,7 +889,7 @@ static inline bool IsValidField(const OGRField *psRawField) static uint8_t *AllocValidityBitmap(size_t nSize) { auto pabyValidity = static_cast( - VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nSize + 7) / 8)); + VSI_MALLOC_ALIGNED_AUTO_VERBOSE((1 + nSize + 7) / 8)); if (pabyValidity) { // All valid initially @@ -912,7 +912,7 @@ static bool FillArray(struct ArrowArray *psChild, psChild->buffers = static_cast(CPLCalloc(2, sizeof(void *))); uint8_t *pabyValidity = nullptr; T *panValues = static_cast( - VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * nFeatureCountLimit)); + VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (1 + nFeatureCountLimit))); if (panValues == nullptr) return false; psChild->buffers[1] = panValues; @@ -959,7 +959,7 @@ static bool FillBoolArray(struct ArrowArray *psChild, psChild->buffers = static_cast(CPLCalloc(2, sizeof(void *))); uint8_t *pabyValidity = nullptr; uint8_t *panValues = static_cast( - VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 7) / 8)); + VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 7 + 1) / 8)); if (panValues == nullptr) return false; memset(panValues, 0, (nFeatureCountLimit + 7) / 8); @@ -1094,8 +1094,8 @@ FillListArray(struct ArrowArray *psChild, psValueChild->buffers = static_cast(CPLCalloc(2, sizeof(void *))); psValueChild->length = nOffset; - T *panValues = - static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * nOffset)); + T *panValues = static_cast( + VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(T) * (nOffset + 1))); if (panValues == nullptr) return 0; psValueChild->buffers[1] = panValues; @@ -1188,7 +1188,7 @@ FillListArrayBool(struct ArrowArray *psChild, static_cast(CPLCalloc(2, sizeof(void *))); psValueChild->length = nOffset; uint8_t *panValues = static_cast( - VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nOffset + 7) / 8)); + VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nOffset + 7 + 1) / 8)); if (panValues == nullptr) return 0; memset(panValues, 0, (nOffset + 7) / 8); @@ -1269,7 +1269,7 @@ FillStringArray(struct ArrowArray *psChild, panOffsets[nFeatCount] = static_cast(nOffset); char *pachValues = - static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset)); + static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1)); if (pachValues == nullptr) return 0; psChild->buffers[2] = pachValues; @@ -1378,7 +1378,7 @@ FillStringListArray(struct ArrowArray *psChild, psValueChild->buffers[1] = panChildOffsets; char *pachValues = - static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCountChars)); + static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCountChars + 1)); if (pachValues == nullptr) return 0; psValueChild->buffers[2] = pachValues; @@ -1461,7 +1461,7 @@ FillBinaryArray(struct ArrowArray *psChild, panOffsets[nFeatCount] = nOffset; GByte *pabyValues = - static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset)); + static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1)); if (pabyValues == nullptr) return 0; psChild->buffers[2] = pabyValues; @@ -1496,9 +1496,10 @@ FillFixedWidthBinaryArray(struct ArrowArray *psChild, psChild->buffers = static_cast(CPLCalloc(3, sizeof(void *))); uint8_t *pabyValidity = nullptr; - assert(nFeatureCountLimit <= std::numeric_limits::max() / nWidth); + assert(nFeatureCountLimit + 1 <= + std::numeric_limits::max() / nWidth); GByte *pabyValues = static_cast( - VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nFeatureCountLimit * nWidth)); + VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nFeatureCountLimit + 1) * nWidth)); if (pabyValues == nullptr) return false; psChild->buffers[1] = pabyValues; @@ -1614,7 +1615,7 @@ FillWKBGeometryArray(struct ArrowArray *psChild, panOffsets[nFeatCount] = static_cast(nOffset); GByte *pabyValues = - static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset)); + static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nOffset + 1)); if (pabyValues == nullptr) return 0; psChild->buffers[2] = pabyValues; @@ -1653,8 +1654,8 @@ static bool FillDateArray(struct ArrowArray *psChild, psChild->n_buffers = 2; psChild->buffers = static_cast(CPLCalloc(2, sizeof(void *))); uint8_t *pabyValidity = nullptr; - int32_t *panValues = static_cast( - VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int32_t) * nFeatureCountLimit)); + int32_t *panValues = static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE( + sizeof(int32_t) * (nFeatureCountLimit + 1))); if (panValues == nullptr) return false; psChild->buffers[1] = panValues; @@ -1705,8 +1706,8 @@ static bool FillTimeArray(struct ArrowArray *psChild, psChild->n_buffers = 2; psChild->buffers = static_cast(CPLCalloc(2, sizeof(void *))); uint8_t *pabyValidity = nullptr; - int32_t *panValues = static_cast( - VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int32_t) * nFeatureCountLimit)); + int32_t *panValues = static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE( + sizeof(int32_t) * (nFeatureCountLimit + 1))); if (panValues == nullptr) return false; psChild->buffers[1] = panValues; @@ -1755,8 +1756,8 @@ FillDateTimeArray(struct ArrowArray *psChild, psChild->n_buffers = 2; psChild->buffers = static_cast(CPLCalloc(2, sizeof(void *))); uint8_t *pabyValidity = nullptr; - int64_t *panValues = static_cast( - VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int64_t) * nFeatureCountLimit)); + int64_t *panValues = static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE( + sizeof(int64_t) * (nFeatureCountLimit + 1))); if (panValues == nullptr) return false; psChild->buffers[1] = panValues; @@ -1933,7 +1934,7 @@ int OGRLayer::GetNextArrowArray(struct ArrowArrayStream *stream, static_cast(CPLCalloc(2, sizeof(void *))); int64_t *panValues = static_cast(VSI_MALLOC_ALIGNED_AUTO_VERBOSE( - sizeof(int64_t) * oFeatureQueue.size())); + sizeof(int64_t) * (oFeatureQueue.size() + 1))); if (panValues == nullptr) goto error; psChild->buffers[1] = panValues; From 2299f2f6776a31259e8be3d25e77abf89391986c Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 15 May 2024 02:21:01 +0200 Subject: [PATCH 004/191] VSIMallocAligned(): make behaviour more predictable when nSize == 0 also remove comment suggesting using std::aligned_storage since it is deprecated in C++23 --- port/cpl_vsisimple.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/port/cpl_vsisimple.cpp b/port/cpl_vsisimple.cpp index 1aa8e3077d03..d265a91519b5 100644 --- a/port/cpl_vsisimple.cpp +++ b/port/cpl_vsisimple.cpp @@ -907,6 +907,11 @@ void VSIFree(void *pData) void *VSIMallocAligned(size_t nAlignment, size_t nSize) { + // In particular for posix_memalign() where behaviour when passing + // nSize == 0 is technically implementation defined (Valgrind complains), + // so let's always return NULL. + if (nSize == 0) + return nullptr; #if defined(HAVE_POSIX_MEMALIGN) && !defined(DEBUG_VSIMALLOC) void *pRet = nullptr; if (posix_memalign(&pRet, nAlignment, nSize) != 0) @@ -924,7 +929,6 @@ void *VSIMallocAligned(size_t nAlignment, size_t nSize) // Detect overflow. if (nSize + nAlignment < nSize) return nullptr; - // TODO(schwehr): C++11 has std::aligned_storage, alignas, and related. GByte *pabyData = static_cast(VSIMalloc(nSize + nAlignment)); if (pabyData == nullptr) return nullptr; From 93703e7d6a201807c48bd0951359457c75fcf028 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 15 May 2024 01:36:24 +0200 Subject: [PATCH 005/191] GDALNoDataMaskBand::IRasterIO(): speed optimization, and reduce copy&paste --- gcore/gdalnodatamaskband.cpp | 188 +++++++++++++++++++++-------------- 1 file changed, 114 insertions(+), 74 deletions(-) diff --git a/gcore/gdalnodatamaskband.cpp b/gcore/gdalnodatamaskband.cpp index 635f936e7e4d..615336e6468e 100644 --- a/gcore/gdalnodatamaskband.cpp +++ b/gcore/gdalnodatamaskband.cpp @@ -111,13 +111,19 @@ static GDALDataType GetWorkDataType(GDALDataType eDataType) eWrkDT = GDT_Byte; break; + case GDT_Int16: + eWrkDT = GDT_Int16; + break; + case GDT_UInt16: + eWrkDT = GDT_UInt16; + break; + case GDT_UInt32: eWrkDT = GDT_UInt32; break; case GDT_Int8: - case GDT_Int16: case GDT_Int32: case GDT_CInt16: case GDT_CInt32: @@ -162,6 +168,16 @@ bool GDALNoDataMaskBand::IsNoDataInRange(double dfNoDataValue, return GDALIsValueInRange(dfNoDataValue); } + case GDT_Int16: + { + return GDALIsValueInRange(dfNoDataValue); + } + + case GDT_UInt16: + { + return GDALIsValueInRange(dfNoDataValue); + } + case GDT_UInt32: { return GDALIsValueInRange(dfNoDataValue); @@ -224,6 +240,70 @@ CPLErr GDALNoDataMaskBand::IReadBlock(int nXBlockOff, int nYBlockOff, nBlockXSize, &sExtraArg); } +/************************************************************************/ +/* SetZeroOr255() */ +/************************************************************************/ + +#if (defined(__GNUC__) && !defined(__clang__)) +__attribute__((optimize("tree-vectorize"))) +#endif +static void +SetZeroOr255(GByte *pabyDestAndSrc, size_t nBufSize, GByte byNoData) +{ + for (size_t i = 0; i < nBufSize; ++i) + { + pabyDestAndSrc[i] = (pabyDestAndSrc[i] == byNoData) ? 0 : 255; + } +} + +template +#if (defined(__GNUC__) && !defined(__clang__)) +__attribute__((optimize("tree-vectorize"))) +#endif +static void +SetZeroOr255(GByte *pabyDest, const T *panSrc, size_t nBufSize, T nNoData) +{ + for (size_t i = 0; i < nBufSize; ++i) + { + pabyDest[i] = (panSrc[i] == nNoData) ? 0 : 255; + } +} + +template +static void SetZeroOr255(GByte *pabyDest, const T *panSrc, int nBufXSize, + int nBufYSize, GSpacing nPixelSpace, + GSpacing nLineSpace, T nNoData) +{ + if (nPixelSpace == 1 && nLineSpace == nBufXSize) + { + const size_t nBufSize = static_cast(nBufXSize) * nBufYSize; + SetZeroOr255(pabyDest, panSrc, nBufSize, nNoData); + } + else if (nPixelSpace == 1) + { + for (int iY = 0; iY < nBufYSize; iY++) + { + SetZeroOr255(pabyDest, panSrc, nBufXSize, nNoData); + pabyDest += nLineSpace; + panSrc += nBufXSize; + } + } + else + { + size_t i = 0; + for (int iY = 0; iY < nBufYSize; iY++) + { + GByte *pabyLineDest = pabyDest + iY * nLineSpace; + for (int iX = 0; iX < nBufXSize; iX++) + { + *pabyLineDest = (panSrc[i] == nNoData) ? 0 : 255; + ++i; + pabyLineDest += nPixelSpace; + } + } + } +} + /************************************************************************/ /* IRasterIO() */ /************************************************************************/ @@ -259,22 +339,12 @@ CPLErr GDALNoDataMaskBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, if (nPixelSpace == 1 && nLineSpace == nBufXSize) { const size_t nBufSize = static_cast(nBufXSize) * nBufYSize; - for (size_t i = 0; i < nBufSize; ++i) - { - pabyData[i] = pabyData[i] == byNoData ? 0 : 255; - } + SetZeroOr255(pabyData, nBufSize, byNoData); } else { - for (int iY = 0; iY < nBufYSize; iY++) - { - GByte *pabyLine = pabyData + iY * nLineSpace; - for (int iX = 0; iX < nBufXSize; iX++) - { - *pabyLine = *pabyLine == byNoData ? 0 : 255; - pabyLine += nPixelSpace; - } - } + SetZeroOr255(pabyData, pabyData, nBufXSize, nBufYSize, nPixelSpace, + nLineSpace, byNoData); } return CE_None; } @@ -311,41 +381,39 @@ CPLErr GDALNoDataMaskBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, */ switch (eWrkDT) { - case GDT_UInt32: + case GDT_Int16: { - const GUInt32 nNoData = static_cast(m_dfNoDataValue); - const GUInt32 *panSrc = static_cast(pTemp); + const auto nNoData = static_cast(m_dfNoDataValue); + const auto *panSrc = static_cast(pTemp); + SetZeroOr255(pabyDest, panSrc, nBufXSize, nBufYSize, + nPixelSpace, nLineSpace, nNoData); + } + break; - size_t i = 0; - for (int iY = 0; iY < nBufYSize; iY++) - { - GByte *pabyLineDest = pabyDest + iY * nLineSpace; - for (int iX = 0; iX < nBufXSize; iX++) - { - *pabyLineDest = panSrc[i] == nNoData ? 0 : 255; - ++i; - pabyLineDest += nPixelSpace; - } - } + case GDT_UInt16: + { + const auto nNoData = static_cast(m_dfNoDataValue); + const auto *panSrc = static_cast(pTemp); + SetZeroOr255(pabyDest, panSrc, nBufXSize, nBufYSize, + nPixelSpace, nLineSpace, nNoData); } break; - case GDT_Int32: + case GDT_UInt32: { - const GInt32 nNoData = static_cast(m_dfNoDataValue); - const GInt32 *panSrc = static_cast(pTemp); + const auto nNoData = static_cast(m_dfNoDataValue); + const auto *panSrc = static_cast(pTemp); + SetZeroOr255(pabyDest, panSrc, nBufXSize, nBufYSize, + nPixelSpace, nLineSpace, nNoData); + } + break; - size_t i = 0; - for (int iY = 0; iY < nBufYSize; iY++) - { - GByte *pabyLineDest = pabyDest + iY * nLineSpace; - for (int iX = 0; iX < nBufXSize; iX++) - { - *pabyLineDest = panSrc[i] == nNoData ? 0 : 255; - ++i; - pabyLineDest += nPixelSpace; - } - } + case GDT_Int32: + { + const auto nNoData = static_cast(m_dfNoDataValue); + const auto *panSrc = static_cast(pTemp); + SetZeroOr255(pabyDest, panSrc, nBufXSize, nBufYSize, + nPixelSpace, nLineSpace, nNoData); } break; @@ -401,44 +469,16 @@ CPLErr GDALNoDataMaskBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, case GDT_Int64: { const auto *panSrc = static_cast(pTemp); - - size_t i = 0; - for (int iY = 0; iY < nBufYSize; iY++) - { - GByte *pabyLineDest = pabyDest + iY * nLineSpace; - for (int iX = 0; iX < nBufXSize; iX++) - { - const auto nVal = panSrc[i]; - if (nVal == m_nNoDataValueInt64) - *pabyLineDest = 0; - else - *pabyLineDest = 255; - ++i; - pabyLineDest += nPixelSpace; - } - } + SetZeroOr255(pabyDest, panSrc, nBufXSize, nBufYSize, + nPixelSpace, nLineSpace, m_nNoDataValueInt64); } break; case GDT_UInt64: { const auto *panSrc = static_cast(pTemp); - - size_t i = 0; - for (int iY = 0; iY < nBufYSize; iY++) - { - GByte *pabyLineDest = pabyDest + iY * nLineSpace; - for (int iX = 0; iX < nBufXSize; iX++) - { - const auto nVal = panSrc[i]; - if (nVal == m_nNoDataValueUInt64) - *pabyLineDest = 0; - else - *pabyLineDest = 255; - ++i; - pabyLineDest += nPixelSpace; - } - } + SetZeroOr255(pabyDest, panSrc, nBufXSize, nBufYSize, + nPixelSpace, nLineSpace, m_nNoDataValueUInt64); } break; From 95b723fec5d94be15c75195503329370cbac4db9 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 13 May 2024 19:46:35 +0200 Subject: [PATCH 006/191] Parquet dataset (multi-file typically): enable use of bounding box columns for spatial filter --- autotest/ogr/ogr_parquet.py | 24 ++ .../arrow_common/ograrrowlayer.hpp | 7 +- ogr/ogrsf_frmts/parquet/ogr_parquet.h | 29 ++- .../parquet/ogrparquetdatasetlayer.cpp | 238 +++++++++++++++++- ogr/ogrsf_frmts/parquet/ogrparquetdriver.cpp | 53 +--- ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp | 21 +- 6 files changed, 299 insertions(+), 73 deletions(-) diff --git a/autotest/ogr/ogr_parquet.py b/autotest/ogr/ogr_parquet.py index a7679ad7d3a5..16efed6dae6b 100755 --- a/autotest/ogr/ogr_parquet.py +++ b/autotest/ogr/ogr_parquet.py @@ -1643,10 +1643,34 @@ def test_ogr_parquet_read_partitioned_geo(): assert lyr.GetExtent() == (1, 3, 2, 4) assert lyr.GetExtent() == (1, 3, 2, 4) + assert lyr.GetLayerDefn().GetFieldCount() == 0 + + assert lyr.TestCapability(ogr.OLCFastSpatialFilter) == 1 + lyr.SetSpatialFilterRect(0, 0, 10, 10) lyr.ResetReading() assert lyr.GetFeatureCount() == 2 + lyr.SetSpatialFilterRect(0.9, 1.9, 1.1, 2.1) + lyr.ResetReading() + assert lyr.GetFeatureCount() == 1 + + lyr.SetSpatialFilterRect(0.9, 1.9, 0.95, 2.1) + lyr.ResetReading() + assert lyr.GetFeatureCount() == 0 + + lyr.SetSpatialFilterRect(1.05, 1.9, 1.1, 2.1) + lyr.ResetReading() + assert lyr.GetFeatureCount() == 0 + + lyr.SetSpatialFilterRect(0.9, 1.9, 1.1, 1.95) + lyr.ResetReading() + assert lyr.GetFeatureCount() == 0 + + lyr.SetSpatialFilterRect(0.9, 2.05, 1.1, 2.1) + lyr.ResetReading() + assert lyr.GetFeatureCount() == 0 + lyr.SetSpatialFilterRect(-100, -100, -100, -100) lyr.ResetReading() assert lyr.GetNextFeature() is None diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp index d1ee6ec52263..94f7bd405663 100644 --- a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp +++ b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp @@ -4813,13 +4813,8 @@ inline void OGRArrowLayer::SetSpatialFilter(int iGeomField, OGRGeometry *poGeomIn) { - if (iGeomField < 0 || (iGeomField >= GetLayerDefn()->GetGeomFieldCount() && - !(iGeomField == 0 && poGeomIn == nullptr))) - { - CPLError(CE_Failure, CPLE_AppDefined, - "Invalid geometry field index : %d", iGeomField); + if (!ValidateGeometryFieldIndexForSetSpatialFilter(iGeomField, poGeomIn)) return; - } // When changing filters, we need to invalidate cached batches, as // PostFilterArrowArray() has potentially modified array contents diff --git a/ogr/ogrsf_frmts/parquet/ogr_parquet.h b/ogr/ogrsf_frmts/parquet/ogr_parquet.h index 8b6c84c08c65..75ff9cdb6031 100644 --- a/ogr/ogrsf_frmts/parquet/ogr_parquet.h +++ b/ogr/ogrsf_frmts/parquet/ogr_parquet.h @@ -31,6 +31,8 @@ #include "ogrsf_frmts.h" +#include "cpl_json.h" + #include #include @@ -63,6 +65,13 @@ class OGRParquetLayerBase CPL_NON_FINAL : public OGRArrowLayer int iFieldIdx, const std::shared_ptr &field, std::function computeGeometryTypeFun); + static bool ParseGeometryColumnCovering(const CPLJSONObject &oJSONDef, + std::string &osBBOXColumn, + std::string &osXMin, + std::string &osYMin, + std::string &osXMax, + std::string &osYMax); + public: int TestCapability(const char *) override; @@ -224,9 +233,17 @@ class OGRParquetLayer final : public OGRParquetLayerBase class OGRParquetDatasetLayer final : public OGRParquetLayerBase { + bool m_bIsVSI = false; + bool m_bRebuildScanner = true; + std::shared_ptr m_poDataset{}; std::shared_ptr m_poScanner{}; void EstablishFeatureDefn(); + void + ProcessGeometryColumnCovering(const std::shared_ptr &field, + const CPLJSONObject &oJSONGeometryColumn); + + void BuildScanner(); protected: std::string GetDriverUCName() const override @@ -242,9 +259,8 @@ class OGRParquetDatasetLayer final : public OGRParquetLayerBase public: OGRParquetDatasetLayer( - OGRParquetDataset *poDS, const char *pszLayerName, - const std::shared_ptr &scanner, - const std::shared_ptr &schema, + OGRParquetDataset *poDS, const char *pszLayerName, bool bIsVSI, + const std::shared_ptr &dataset, CSLConstList papszOpenOptions); GIntBig GetFeatureCount(int bForce) override; @@ -252,6 +268,13 @@ class OGRParquetDatasetLayer final : public OGRParquetLayerBase OGRErr GetExtent(int iGeomField, OGREnvelope *psExtent, int bForce = TRUE) override; + void SetSpatialFilter(OGRGeometry *poGeom) override + { + SetSpatialFilter(0, poGeom); + } + + void SetSpatialFilter(int iGeomField, OGRGeometry *poGeom) override; + // TODO std::unique_ptr BuildDomain(const std::string & /*osDomainName*/, diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp index da81f47b0f44..171afd25fb32 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp @@ -5,7 +5,7 @@ * Author: Even Rouault, * ****************************************************************************** - * Copyright (c) 2022, Planet Labs + * Copyright (c) 2022-2024, Planet Labs * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -29,6 +29,7 @@ #include "ogrsf_frmts.h" #include +#include #include #include #include @@ -43,18 +44,79 @@ /************************************************************************/ OGRParquetDatasetLayer::OGRParquetDatasetLayer( - OGRParquetDataset *poDS, const char *pszLayerName, - const std::shared_ptr &scanner, - const std::shared_ptr &schema, CSLConstList papszOpenOptions) + OGRParquetDataset *poDS, const char *pszLayerName, bool bIsVSI, + const std::shared_ptr &dataset, + CSLConstList papszOpenOptions) : OGRParquetLayerBase(poDS, pszLayerName, papszOpenOptions), - m_poScanner(scanner) + m_bIsVSI(bIsVSI), m_poDataset(dataset) { - m_poSchema = schema; + m_poSchema = m_poDataset->schema(); EstablishFeatureDefn(); CPLAssert(static_cast(m_aeGeomEncoding.size()) == m_poFeatureDefn->GetGeomFieldCount()); } +/************************************************************************/ +/* ProcessGeometryColumnCovering() */ +/************************************************************************/ + +/** Process GeoParquet JSON geometry field object to extract information about + * its bounding box column, and appropriately fill m_oMapGeomFieldIndexToGeomColBBOX + * member with information on that bounding box column. + */ +void OGRParquetDatasetLayer::ProcessGeometryColumnCovering( + const std::shared_ptr &field, + const CPLJSONObject &oJSONGeometryColumn) +{ + std::string osBBOXColumn; + std::string osXMin, osYMin, osXMax, osYMax; + if (ParseGeometryColumnCovering(oJSONGeometryColumn, osBBOXColumn, osXMin, + osYMin, osXMax, osYMax)) + { + OGRArrowLayer::GeomColBBOX sDesc; + sDesc.iArrowCol = m_poSchema->GetFieldIndex(osBBOXColumn); + const auto fieldBBOX = m_poSchema->GetFieldByName(osBBOXColumn); + if (sDesc.iArrowCol >= 0 && fieldBBOX && + fieldBBOX->type()->id() == arrow::Type::STRUCT) + { + const auto fieldBBOXStruct = + std::static_pointer_cast(fieldBBOX->type()); + const auto fieldXMin = fieldBBOXStruct->GetFieldByName(osXMin); + const auto fieldYMin = fieldBBOXStruct->GetFieldByName(osYMin); + const auto fieldXMax = fieldBBOXStruct->GetFieldByName(osXMax); + const auto fieldYMax = fieldBBOXStruct->GetFieldByName(osYMax); + const int nXMinIdx = fieldBBOXStruct->GetFieldIndex(osXMin); + const int nYMinIdx = fieldBBOXStruct->GetFieldIndex(osYMin); + const int nXMaxIdx = fieldBBOXStruct->GetFieldIndex(osXMax); + const int nYMaxIdx = fieldBBOXStruct->GetFieldIndex(osYMax); + if (nXMinIdx >= 0 && nYMinIdx >= 0 && nXMaxIdx >= 0 && + nYMaxIdx >= 0 && fieldXMin && fieldYMin && fieldXMax && + fieldYMax && + (fieldXMin->type()->id() == arrow::Type::FLOAT || + fieldXMin->type()->id() == arrow::Type::DOUBLE) && + fieldXMin->type()->id() == fieldYMin->type()->id() && + fieldXMin->type()->id() == fieldXMax->type()->id() && + fieldXMin->type()->id() == fieldYMax->type()->id()) + { + CPLDebug("PARQUET", + "Bounding box column '%s' detected for " + "geometry column '%s'", + osBBOXColumn.c_str(), field->name().c_str()); + sDesc.iArrowSubfieldXMin = nXMinIdx; + sDesc.iArrowSubfieldYMin = nYMinIdx; + sDesc.iArrowSubfieldXMax = nXMaxIdx; + sDesc.iArrowSubfieldYMax = nYMaxIdx; + sDesc.bIsFloat = + (fieldXMin->type()->id() == arrow::Type::FLOAT); + + m_oMapGeomFieldIndexToGeomColBBOX + [m_poFeatureDefn->GetGeomFieldCount() - 1] = + std::move(sDesc); + } + } + } +} + /************************************************************************/ /* EstablishFeatureDefn() */ /************************************************************************/ @@ -69,6 +131,26 @@ void OGRParquetDatasetLayer::EstablishFeatureDefn() LoadGDALMetadata(kv_metadata.get()); + const bool bUseBBOX = + CPLTestBool(CPLGetConfigOption("OGR_PARQUET_USE_BBOX", "YES")); + + // Keep track of declared bounding box columns in GeoParquet JSON metadata, + // in order not to expose them as regular fields. + std::set oSetBBOXColumns; + if (bUseBBOX) + { + for (const auto &iter : m_oMapGeometryColumns) + { + std::string osBBOXColumn; + std::string osXMin, osYMin, osXMax, osYMax; + if (ParseGeometryColumnCovering(iter.second, osBBOXColumn, osXMin, + osYMin, osXMax, osYMax)) + { + oSetBBOXColumns.insert(osBBOXColumn); + } + } + } + const auto &fields = m_poSchema->fields(); for (int i = 0; i < m_poSchema->num_fields(); ++i) { @@ -80,9 +162,23 @@ void OGRParquetDatasetLayer::EstablishFeatureDefn() continue; } + if (oSetBBOXColumns.find(field->name()) != oSetBBOXColumns.end()) + { + m_oSetBBoxArrowColumns.insert(i); + continue; + } + const bool bGeometryField = DealWithGeometryColumn(i, field, []() { return wkbUnknown; }); - if (!bGeometryField) + if (bGeometryField) + { + const auto oIter = m_oMapGeometryColumns.find(field->name()); + if (bUseBBOX && oIter != m_oMapGeometryColumns.end()) + { + ProcessGeometryColumnCovering(field, oIter->second); + } + } + else { CreateFieldFromSchema(field, {i}, oMapFieldNameToGDALSchemaFieldDefn); @@ -95,16 +191,121 @@ void OGRParquetDatasetLayer::EstablishFeatureDefn() m_poFeatureDefn->GetGeomFieldCount()); } +/************************************************************************/ +/* BuildScanner() */ +/************************************************************************/ + +void OGRParquetDatasetLayer::BuildScanner() +{ + m_bRebuildScanner = false; + + try + { + std::shared_ptr scannerBuilder; + PARQUET_ASSIGN_OR_THROW(scannerBuilder, m_poDataset->NewScan()); + assert(scannerBuilder); + + // We cannot use the shared memory pool. Otherwise we get random + // crashes in multi-threaded arrow code (apparently some cleanup code), + // that may used the memory pool after it has been destroyed. + // At least this was true with some older libarrow version + // PARQUET_THROW_NOT_OK(scannerBuilder->Pool(m_poMemoryPool)); + + if (m_bIsVSI) + { + const int nFragmentReadAhead = atoi( + CPLGetConfigOption("OGR_PARQUET_FRAGMENT_READ_AHEAD", "2")); + PARQUET_THROW_NOT_OK( + scannerBuilder->FragmentReadahead(nFragmentReadAhead)); + } + + const char *pszBatchSize = + CPLGetConfigOption("OGR_PARQUET_BATCH_SIZE", nullptr); + if (pszBatchSize) + { + PARQUET_THROW_NOT_OK( + scannerBuilder->BatchSize(CPLAtoGIntBig(pszBatchSize))); + } + + const char *pszUseThreads = + CPLGetConfigOption("OGR_PARQUET_USE_THREADS", nullptr); + if (pszUseThreads) + { + PARQUET_THROW_NOT_OK( + scannerBuilder->UseThreads(CPLTestBool(pszUseThreads))); + } + +#if PARQUET_VERSION_MAJOR >= 10 + const char *pszBatchReadAhead = + CPLGetConfigOption("OGR_PARQUET_BATCH_READ_AHEAD", nullptr); + if (pszBatchReadAhead) + { + PARQUET_THROW_NOT_OK( + scannerBuilder->BatchReadahead(atoi(pszBatchReadAhead))); + } +#endif + + namespace cp = ::arrow::compute; + cp::Expression expression; + if (m_poFilterGeom && + CPLTestBool(CPLGetConfigOption( + "OGR_PARQUET_OPTIMIZED_SPATIAL_FILTER", "YES"))) + { + const auto oIter = + m_oMapGeomFieldIndexToGeomColBBOX.find(m_iGeomFieldFilter); + if (oIter != m_oMapGeomFieldIndexToGeomColBBOX.end()) + { + // This actually requires Arrow >= 15 (https://github.com/apache/arrow/issues/39064) + // to be more efficient. + const auto &oBBOXDef = oIter->second; + expression = cp::and_( + {cp::less_equal( + cp::field_ref(arrow::FieldRef( + oBBOXDef.iArrowCol, oBBOXDef.iArrowSubfieldXMin)), + cp::literal(m_sFilterEnvelope.MaxX)), + cp::less_equal( + cp::field_ref(arrow::FieldRef( + oBBOXDef.iArrowCol, oBBOXDef.iArrowSubfieldYMin)), + cp::literal(m_sFilterEnvelope.MaxY)), + cp::greater_equal( + cp::field_ref(arrow::FieldRef( + oBBOXDef.iArrowCol, oBBOXDef.iArrowSubfieldXMax)), + cp::literal(m_sFilterEnvelope.MinX)), + cp::greater_equal( + cp::field_ref(arrow::FieldRef( + oBBOXDef.iArrowCol, oBBOXDef.iArrowSubfieldYMax)), + cp::literal(m_sFilterEnvelope.MinY))}); + } + } + if (expression.is_valid()) + { + PARQUET_THROW_NOT_OK(scannerBuilder->Filter(expression)); + } + + PARQUET_ASSIGN_OR_THROW(m_poScanner, scannerBuilder->Finish()); + } + catch (const std::exception &e) + { + CPLError(CE_Failure, CPLE_AppDefined, "Arrow/Parquet exception: %s", + e.what()); + } +} + /************************************************************************/ /* ReadNextBatch() */ /************************************************************************/ bool OGRParquetDatasetLayer::ReadNextBatch() { + if (m_bRebuildScanner) + BuildScanner(); + m_nIdxInBatch = 0; if (m_poRecordBatchReader == nullptr) { + if (!m_poScanner) + return false; auto result = m_poScanner->ToRecordBatchReader(); if (!result.ok()) { @@ -138,6 +339,8 @@ bool OGRParquetDatasetLayer::ReadNextBatch() } } while (poNextBatch->num_rows() == 0); + // CPLDebug("PARQUET", "Current batch has %d rows", int(poNextBatch->num_rows())); + SetBatch(poNextBatch); return true; @@ -160,6 +363,10 @@ GIntBig OGRParquetDatasetLayer::GetFeatureCount(int bForce) { if (m_poAttrQuery == nullptr && m_poFilterGeom == nullptr) { + if (m_bRebuildScanner) + BuildScanner(); + if (!m_poScanner) + return -1; auto status = m_poScanner->CountRows(); if (status.ok()) return *status; @@ -222,7 +429,7 @@ OGRErr OGRParquetDatasetLayer::GetExtent(int iGeomField, OGREnvelope *psExtent, auto oIter = m_oMapGeometryColumns.find(pszGeomFieldName); if (oIter != m_oMapGeometryColumns.end()) { - auto statusFragments = m_poScanner->dataset()->GetFragments(); + auto statusFragments = m_poDataset->GetFragments(); if (statusFragments.ok()) { *psExtent = OGREnvelope(); @@ -272,3 +479,18 @@ OGRErr OGRParquetDatasetLayer::GetExtent(int iGeomField, OGREnvelope *psExtent, return OGRParquetLayerBase::GetExtent(iGeomField, psExtent, bForce); } + +/************************************************************************/ +/* SetSpatialFilter() */ +/************************************************************************/ + +void OGRParquetDatasetLayer::SetSpatialFilter(int iGeomField, + OGRGeometry *poGeomIn) + +{ + OGRParquetLayerBase::SetSpatialFilter(iGeomField, poGeomIn); + m_bRebuildScanner = true; + + // Full invalidation + InvalidateCachedBatches(); +} diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetdriver.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetdriver.cpp index 96719ea79db0..305942529ae0 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetdriver.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetdriver.cpp @@ -246,43 +246,13 @@ static GDALDataset *OpenFromDatasetFactory( std::shared_ptr dataset; PARQUET_ASSIGN_OR_THROW(dataset, factory->Finish()); - std::shared_ptr scannerBuilder; - PARQUET_ASSIGN_OR_THROW(scannerBuilder, dataset->NewScan()); - auto poMemoryPool = std::shared_ptr( arrow::MemoryPool::CreateDefault().release()); - // We cannot use the above shared memory pool. Otherwise we get random - // crashes in multi-threaded arrow code (apparently some cleanup code), - // that may used the memory pool after it has been destroyed. - // PARQUET_THROW_NOT_OK(scannerBuilder->Pool(poMemoryPool.get())); - const bool bIsVSI = STARTS_WITH(osBasePath.c_str(), "/vsi"); - if (bIsVSI) + const char *pszNumThreads = CPLGetConfigOption("GDAL_NUM_THREADS", nullptr); + if (bIsVSI || pszNumThreads) { - const int nFragmentReadAhead = - atoi(CPLGetConfigOption("OGR_PARQUET_FRAGMENT_READ_AHEAD", "2")); - PARQUET_THROW_NOT_OK( - scannerBuilder->FragmentReadahead(nFragmentReadAhead)); - - const char *pszBatchSize = - CPLGetConfigOption("OGR_PARQUET_BATCH_SIZE", nullptr); - if (pszBatchSize) - { - PARQUET_THROW_NOT_OK( - scannerBuilder->BatchSize(CPLAtoGIntBig(pszBatchSize))); - } - - const char *pszUseThreads = - CPLGetConfigOption("OGR_PARQUET_USE_THREADS", nullptr); - if (pszUseThreads) - { - PARQUET_THROW_NOT_OK( - scannerBuilder->UseThreads(CPLTestBool(pszUseThreads))); - } - - const char *pszNumThreads = - CPLGetConfigOption("GDAL_NUM_THREADS", nullptr); int nNumThreads = 0; if (pszNumThreads == nullptr) nNumThreads = std::min(4, CPLGetNumCPUs()); @@ -290,29 +260,16 @@ static GDALDataset *OpenFromDatasetFactory( nNumThreads = EQUAL(pszNumThreads, "ALL_CPUS") ? CPLGetNumCPUs() : atoi(pszNumThreads); - if (nNumThreads > 1) + if (nNumThreads >= 1) { CPL_IGNORE_RET_VAL(arrow::SetCpuThreadPoolCapacity(nNumThreads)); } - -#if PARQUET_VERSION_MAJOR >= 10 - const char *pszBatchReadAhead = - CPLGetConfigOption("OGR_PARQUET_BATCH_READ_AHEAD", nullptr); - if (pszBatchReadAhead) - { - PARQUET_THROW_NOT_OK( - scannerBuilder->BatchReadahead(atoi(pszBatchReadAhead))); - } -#endif } - std::shared_ptr scanner; - PARQUET_ASSIGN_OR_THROW(scanner, scannerBuilder->Finish()); - auto poDS = std::make_unique(poMemoryPool); auto poLayer = std::make_unique( - poDS.get(), CPLGetBasename(osBasePath.c_str()), scanner, - scannerBuilder->schema(), papszOpenOptions); + poDS.get(), CPLGetBasename(osBasePath.c_str()), bIsVSI, dataset, + papszOpenOptions); poDS->SetLayer(std::move(poLayer)); return poDS.release(); } diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp index 67938e383e1c..ad9ffd6dbb0c 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp @@ -135,12 +135,11 @@ void OGRParquetLayerBase::LoadGeoMetadata( /************************************************************************/ //! Parse bounding box column definition -static bool ParseGeometryColumnCovering(const CPLJSONObject &oJSONDef, - std::string &osBBOXColumn, - std::string &osXMin, - std::string &osYMin, - std::string &osXMax, - std::string &osYMax) +/*static */ +bool OGRParquetLayerBase::ParseGeometryColumnCovering( + const CPLJSONObject &oJSONDef, std::string &osBBOXColumn, + std::string &osXMin, std::string &osYMin, std::string &osXMax, + std::string &osYMax) { const auto oCovering = oJSONDef["covering"]; if (oCovering.IsValid() && @@ -470,6 +469,12 @@ int OGRParquetLayerBase::TestCapability(const char *pszCap) if (EQUAL(pszCap, OLCFastSetNextByIndex)) return true; + if (EQUAL(pszCap, OLCFastSpatialFilter)) + { + return m_oMapGeomFieldIndexToGeomColBBOX.find(m_iGeomFieldFilter) != + m_oMapGeomFieldIndexToGeomColBBOX.end(); + } + return OGRArrowLayer::TestCapability(pszCap); } @@ -529,8 +534,8 @@ void OGRParquetLayer::EstablishFeatureDefn() return; } - const bool bUseBBOX = CPLTestBool(CPLGetConfigOption( - ("OGR_" + GetDriverUCName() + "_USE_BBOX").c_str(), "YES")); + const bool bUseBBOX = + CPLTestBool(CPLGetConfigOption("OGR_PARQUET_USE_BBOX", "YES")); // Keep track of declared bounding box columns in GeoParquet JSON metadata, // in order not to expose them as regular fields. From 30bbfa7b828af0f649fdc63fb26d25dc97fe3df0 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 14 May 2024 16:01:10 +0200 Subject: [PATCH 007/191] Parquet: harmonize thread handling between non-dataset and dataset mode --- ogr/ogrsf_frmts/parquet/ogr_parquet.h | 2 + .../parquet/ogrparquetdatasetlayer.cpp | 10 +++-- ogr/ogrsf_frmts/parquet/ogrparquetdriver.cpp | 16 -------- ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp | 38 ++++++++++++++----- 4 files changed, 38 insertions(+), 28 deletions(-) diff --git a/ogr/ogrsf_frmts/parquet/ogr_parquet.h b/ogr/ogrsf_frmts/parquet/ogr_parquet.h index 75ff9cdb6031..050939b0a916 100644 --- a/ogr/ogrsf_frmts/parquet/ogr_parquet.h +++ b/ogr/ogrsf_frmts/parquet/ogr_parquet.h @@ -59,6 +59,8 @@ class OGRParquetLayerBase CPL_NON_FINAL : public OGRArrowLayer CPLStringList m_aosGeomPossibleNames{}; std::string m_osCRS{}; + static int GetNumCPUs(); + void LoadGeoMetadata( const std::shared_ptr &kv_metadata); bool DealWithGeometryColumn( diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp index 171afd25fb32..3899920d1d80 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp @@ -227,12 +227,16 @@ void OGRParquetDatasetLayer::BuildScanner() scannerBuilder->BatchSize(CPLAtoGIntBig(pszBatchSize))); } + const int nNumCPUs = GetNumCPUs(); const char *pszUseThreads = CPLGetConfigOption("OGR_PARQUET_USE_THREADS", nullptr); - if (pszUseThreads) + if (!pszUseThreads && nNumCPUs > 1) { - PARQUET_THROW_NOT_OK( - scannerBuilder->UseThreads(CPLTestBool(pszUseThreads))); + pszUseThreads = "YES"; + } + if (CPLTestBool(pszUseThreads)) + { + PARQUET_THROW_NOT_OK(scannerBuilder->UseThreads(true)); } #if PARQUET_VERSION_MAJOR >= 10 diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetdriver.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetdriver.cpp index 305942529ae0..f3c0236128b5 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetdriver.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetdriver.cpp @@ -250,22 +250,6 @@ static GDALDataset *OpenFromDatasetFactory( arrow::MemoryPool::CreateDefault().release()); const bool bIsVSI = STARTS_WITH(osBasePath.c_str(), "/vsi"); - const char *pszNumThreads = CPLGetConfigOption("GDAL_NUM_THREADS", nullptr); - if (bIsVSI || pszNumThreads) - { - int nNumThreads = 0; - if (pszNumThreads == nullptr) - nNumThreads = std::min(4, CPLGetNumCPUs()); - else - nNumThreads = EQUAL(pszNumThreads, "ALL_CPUS") - ? CPLGetNumCPUs() - : atoi(pszNumThreads); - if (nNumThreads >= 1) - { - CPL_IGNORE_RET_VAL(arrow::SetCpuThreadPoolCapacity(nNumThreads)); - } - } - auto poDS = std::make_unique(poMemoryPool); auto poLayer = std::make_unique( poDS.get(), CPLGetBasename(osBasePath.c_str()), bIsVSI, dataset, diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp index ad9ffd6dbb0c..6da4ccc57ce8 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp @@ -478,6 +478,27 @@ int OGRParquetLayerBase::TestCapability(const char *pszCap) return OGRArrowLayer::TestCapability(pszCap); } +/************************************************************************/ +/* GetNumCPUs() */ +/************************************************************************/ + +/* static */ +int OGRParquetLayerBase::GetNumCPUs() +{ + const char *pszNumThreads = CPLGetConfigOption("GDAL_NUM_THREADS", nullptr); + int nNumThreads = 0; + if (pszNumThreads == nullptr) + nNumThreads = std::min(4, CPLGetNumCPUs()); + else + nNumThreads = EQUAL(pszNumThreads, "ALL_CPUS") ? CPLGetNumCPUs() + : atoi(pszNumThreads); + if (nNumThreads > 1) + { + CPL_IGNORE_RET_VAL(arrow::SetCpuThreadPoolCapacity(nNumThreads)); + } + return nNumThreads; +} + /************************************************************************/ /* OGRParquetLayer() */ /************************************************************************/ @@ -494,16 +515,15 @@ OGRParquetLayer::OGRParquetLayer( if (pszParquetBatchSize) m_poArrowReader->set_batch_size(CPLAtoGIntBig(pszParquetBatchSize)); - const char *pszNumThreads = CPLGetConfigOption("GDAL_NUM_THREADS", nullptr); - int nNumThreads = 0; - if (pszNumThreads == nullptr) - nNumThreads = std::min(4, CPLGetNumCPUs()); - else - nNumThreads = EQUAL(pszNumThreads, "ALL_CPUS") ? CPLGetNumCPUs() - : atoi(pszNumThreads); - if (nNumThreads > 1) + const int nNumCPUs = GetNumCPUs(); + const char *pszUseThreads = + CPLGetConfigOption("OGR_PARQUET_USE_THREADS", nullptr); + if (!pszUseThreads && nNumCPUs > 1) + { + pszUseThreads = "YES"; + } + if (CPLTestBool(pszUseThreads)) { - CPL_IGNORE_RET_VAL(arrow::SetCpuThreadPoolCapacity(nNumThreads)); m_poArrowReader->set_use_threads(true); } From 3fda22ba992943b8a899858cbcaa5160a4bb57b5 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 13 May 2024 21:31:53 +0200 Subject: [PATCH 008/191] Add OGRArrowIsGeoArrowStruct() and use it --- ogr/ogrsf_frmts/arrow_common/ogr_arrow.h | 22 ++++++++++ ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp | 47 ++++----------------- 2 files changed, 30 insertions(+), 39 deletions(-) diff --git a/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h b/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h index b47cb907d16f..a16103da7dd3 100644 --- a/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h +++ b/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h @@ -61,6 +61,28 @@ enum class OGRArrowGeomEncoding GEOARROW_STRUCT_MULTIPOLYGON, }; +/************************************************************************/ +/* OGRArrowIsGeoArrowStruct() */ +/************************************************************************/ + +inline bool OGRArrowIsGeoArrowStruct(OGRArrowGeomEncoding eEncoding) +{ + switch (eEncoding) + { + case OGRArrowGeomEncoding::GEOARROW_STRUCT_GENERIC: + case OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT: + case OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING: + case OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON: + case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT: + case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING: + case OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON: + return true; + + default: + return false; + } +} + /************************************************************************/ /* OGRArrowLayer */ /************************************************************************/ diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp index 6da4ccc57ce8..90c589e7b232 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp @@ -1461,18 +1461,7 @@ bool OGRParquetLayer::ReadNextBatch() m_anMapGeomFieldIndexToParquetColumns.size()) && m_anMapGeomFieldIndexToParquetColumns[m_iGeomFieldFilter].size() >= 2 && - (m_aeGeomEncoding[m_iGeomFieldFilter] == - OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT || - m_aeGeomEncoding[m_iGeomFieldFilter] == - OGRArrowGeomEncoding::GEOARROW_STRUCT_LINESTRING || - m_aeGeomEncoding[m_iGeomFieldFilter] == - OGRArrowGeomEncoding::GEOARROW_STRUCT_POLYGON || - m_aeGeomEncoding[m_iGeomFieldFilter] == - OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOINT || - m_aeGeomEncoding[m_iGeomFieldFilter] == - OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTILINESTRING || - m_aeGeomEncoding[m_iGeomFieldFilter] == - OGRArrowGeomEncoding::GEOARROW_STRUCT_MULTIPOLYGON)); + OGRArrowIsGeoArrowStruct(m_aeGeomEncoding[m_iGeomFieldFilter])); if (m_asAttributeFilterConstraints.empty() && !bUSEBBOXFields && !(bIsGeoArrowStruct && m_poFilterGeom)) @@ -2029,34 +2018,14 @@ OGRErr OGRParquetLayer::SetIgnoredFields(CSLConstList papszFields) m_oMapGeomFieldIndexToGeomColBBOXParquet.find(i); if (oIter != m_oMapGeomFieldIndexToGeomColBBOX.end() && oIterParquet != - m_oMapGeomFieldIndexToGeomColBBOXParquet.end()) + m_oMapGeomFieldIndexToGeomColBBOXParquet.end() && + !OGRArrowIsGeoArrowStruct(m_aeGeomEncoding[i])) { - const bool bIsGeoArrowStruct = - (m_aeGeomEncoding[i] == - OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT || - m_aeGeomEncoding[i] == - OGRArrowGeomEncoding:: - GEOARROW_STRUCT_LINESTRING || - m_aeGeomEncoding[i] == - OGRArrowGeomEncoding:: - GEOARROW_STRUCT_POLYGON || - m_aeGeomEncoding[i] == - OGRArrowGeomEncoding:: - GEOARROW_STRUCT_MULTIPOINT || - m_aeGeomEncoding[i] == - OGRArrowGeomEncoding:: - GEOARROW_STRUCT_MULTILINESTRING || - m_aeGeomEncoding[i] == - OGRArrowGeomEncoding:: - GEOARROW_STRUCT_MULTIPOLYGON); - if (!bIsGeoArrowStruct) - { - oIter->second.iArrayIdx = nBatchColumns++; - m_anRequestedParquetColumns.insert( - m_anRequestedParquetColumns.end(), - oIterParquet->second.anParquetCols.begin(), - oIterParquet->second.anParquetCols.end()); - } + oIter->second.iArrayIdx = nBatchColumns++; + m_anRequestedParquetColumns.insert( + m_anRequestedParquetColumns.end(), + oIterParquet->second.anParquetCols.begin(), + oIterParquet->second.anParquetCols.end()); } } else From 0d6d8a57c68efd64df6127838ac7f850505bcf1e Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 13 May 2024 21:37:24 +0200 Subject: [PATCH 009/191] Arrow/Parquet: add a OGRArrowLayer::SanityCheckOfSetBatch() method --- ogr/ogrsf_frmts/arrow_common/ogr_arrow.h | 6 ++ .../arrow_common/ograrrowlayer.hpp | 64 +++++++++++++++++++ ogr/ogrsf_frmts/parquet/ogr_parquet.h | 5 -- ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp | 58 +---------------- 4 files changed, 73 insertions(+), 60 deletions(-) diff --git a/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h b/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h index a16103da7dd3..b1b35836132a 100644 --- a/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h +++ b/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h @@ -181,6 +181,10 @@ class OGRArrowLayer CPL_NON_FINAL // m_bIgnoredFields is set int m_nRequestedFIDColumn = -1; // only valid when m_bIgnoredFields is set + int m_nExpectedBatchColumns = + -1; // Should be equal to m_poBatch->num_columns() (when + // m_bIgnoredFields is set) + bool m_bEOF = false; int64_t m_nFeatureIdx = 0; int64_t m_nIdxInBatch = 0; @@ -274,6 +278,8 @@ class OGRArrowLayer CPL_NON_FINAL ++m_nFeatureIdx; } + void SanityCheckOfSetBatch() const; + public: virtual ~OGRArrowLayer() override; diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp index 94f7bd405663..13dd911a1956 100644 --- a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp +++ b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp @@ -3858,7 +3858,10 @@ OGRArrowLayer::SetBatch(const std::shared_ptr &poBatch) m_poArrayYMaxFloat = nullptr; if (m_poBatch) + { m_poBatchColumns = m_poBatch->columns(); + SanityCheckOfSetBatch(); + } if (m_poBatch && m_poFilterGeom) { @@ -3959,6 +3962,67 @@ OGRArrowLayer::SetBatch(const std::shared_ptr &poBatch) } } +/************************************************************************/ +/* SanityCheckOfSetBatch() */ +/************************************************************************/ + +inline void OGRArrowLayer::SanityCheckOfSetBatch() const +{ +#ifdef DEBUG + CPLAssert(m_poBatch); + + const auto &poColumns = m_poBatch->columns(); + + // Sanity checks + CPLAssert(m_poBatch->num_columns() == (m_bIgnoredFields + ? m_nExpectedBatchColumns + : m_poSchema->num_fields())); + const auto &fields = m_poSchema->fields(); + + for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i) + { + int iCol; + if (m_bIgnoredFields) + { + iCol = m_anMapFieldIndexToArrayIndex[i]; + if (iCol < 0) + continue; + } + else + { + iCol = m_anMapFieldIndexToArrowColumn[i][0]; + } + CPL_IGNORE_RET_VAL(iCol); // to make cppcheck happy + + CPLAssert(iCol < static_cast(poColumns.size())); + CPLAssert(fields[m_anMapFieldIndexToArrowColumn[i][0]]->type()->id() == + poColumns[iCol]->type_id()); + } + + for (int i = 0; i < m_poFeatureDefn->GetGeomFieldCount(); ++i) + { + int iCol; + if (m_bIgnoredFields) + { + iCol = m_anMapGeomFieldIndexToArrayIndex[i]; + if (iCol < 0) + continue; + } + else + { + iCol = m_anMapGeomFieldIndexToArrowColumn[i]; + } + CPL_IGNORE_RET_VAL(iCol); // to make cppcheck happy + + CPLAssert(iCol < static_cast(poColumns.size())); + CPLAssert(fields[m_anMapGeomFieldIndexToArrowColumn[i]]->type()->id() == + poColumns[iCol]->type_id()); + } +#else + CPL_IGNORE_RET_VAL(m_nExpectedBatchColumns); +#endif +} + /************************************************************************/ /* GetNextRawFeature() */ /************************************************************************/ diff --git a/ogr/ogrsf_frmts/parquet/ogr_parquet.h b/ogr/ogrsf_frmts/parquet/ogr_parquet.h index 050939b0a916..0849850675c9 100644 --- a/ogr/ogrsf_frmts/parquet/ogr_parquet.h +++ b/ogr/ogrsf_frmts/parquet/ogr_parquet.h @@ -108,11 +108,6 @@ class OGRParquetLayer final : public OGRParquetLayerBase int64_t m_nFeatureIdxSelected = 0; std::vector m_anRequestedParquetColumns{}; // only valid when // m_bIgnoredFields is set -#ifdef DEBUG - int m_nExpectedBatchColumns = - 0; // Should be equal to m_poBatch->num_columns() (when - // m_bIgnoredFields is set) -#endif CPLStringList m_aosFeatherMetadata{}; //! Describe the bbox column of a geometry column diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp index 90c589e7b232..e57e8060f769 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp @@ -1825,57 +1825,6 @@ bool OGRParquetLayer::ReadNextBatch() SetBatch(poNextBatch); -#ifdef DEBUG - const auto &poColumns = m_poBatch->columns(); - - // Sanity checks - CPLAssert(m_poBatch->num_columns() == (m_bIgnoredFields - ? m_nExpectedBatchColumns - : m_poSchema->num_fields())); - - for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i) - { - int iCol; - if (m_bIgnoredFields) - { - iCol = m_anMapFieldIndexToArrayIndex[i]; - if (iCol < 0) - continue; - } - else - { - iCol = m_anMapFieldIndexToArrowColumn[i][0]; - } - CPL_IGNORE_RET_VAL(iCol); // to make cppcheck happy - - CPLAssert(iCol < static_cast(poColumns.size())); - CPLAssert(m_poSchema->fields()[m_anMapFieldIndexToArrowColumn[i][0]] - ->type() - ->id() == poColumns[iCol]->type_id()); - } - - for (int i = 0; i < m_poFeatureDefn->GetGeomFieldCount(); ++i) - { - int iCol; - if (m_bIgnoredFields) - { - iCol = m_anMapGeomFieldIndexToArrayIndex[i]; - if (iCol < 0) - continue; - } - else - { - iCol = m_anMapGeomFieldIndexToArrowColumn[i]; - } - CPL_IGNORE_RET_VAL(iCol); // to make cppcheck happy - - CPLAssert(iCol < static_cast(poColumns.size())); - CPLAssert(m_poSchema->fields()[m_anMapGeomFieldIndexToArrowColumn[i]] - ->type() - ->id() == poColumns[iCol]->type_id()); - } -#endif - return true; } @@ -1902,12 +1851,12 @@ OGRErr OGRParquetLayer::SetIgnoredFields(CSLConstList papszFields) m_anMapGeomFieldIndexToArrayIndex.clear(); m_nRequestedFIDColumn = -1; OGRErr eErr = OGRLayer::SetIgnoredFields(papszFields); + int nBatchColumns = 0; if (!m_bHasMissingMappingToParquet && eErr == OGRERR_NONE) { m_bIgnoredFields = papszFields != nullptr && papszFields[0] != nullptr; if (m_bIgnoredFields) { - int nBatchColumns = 0; if (m_iFIDParquetColumn >= 0) { m_nRequestedFIDColumn = nBatchColumns; @@ -2037,12 +1986,11 @@ OGRErr OGRParquetLayer::SetIgnoredFields(CSLConstList papszFields) CPLAssert( static_cast(m_anMapGeomFieldIndexToArrayIndex.size()) == m_poFeatureDefn->GetGeomFieldCount()); -#ifdef DEBUG - m_nExpectedBatchColumns = nBatchColumns; -#endif } } + m_nExpectedBatchColumns = m_bIgnoredFields ? nBatchColumns : -1; + ComputeConstraintsArrayIdx(); // Full invalidation From 0783fba155bb39757cb250e3140b69fbf4beb254 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 13 May 2024 19:46:36 +0200 Subject: [PATCH 010/191] Parquet dataset (multi-file typically): implement SetIgnoredFields() --- autotest/ogr/ogr_parquet.py | 89 ++++++--- ogr/ogrsf_frmts/parquet/ogr_parquet.h | 9 +- .../parquet/ogrparquetdatasetlayer.cpp | 170 +++++++++++++++++- ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp | 13 +- 4 files changed, 248 insertions(+), 33 deletions(-) diff --git a/autotest/ogr/ogr_parquet.py b/autotest/ogr/ogr_parquet.py index 16efed6dae6b..b81d6ac93f75 100755 --- a/autotest/ogr/ogr_parquet.py +++ b/autotest/ogr/ogr_parquet.py @@ -86,6 +86,14 @@ def _validate(filename, check_data=False): assert not ret +############################################################################### + + +def _has_arrow_dataset(): + drv = gdal.GetDriverByName("Parquet") + return drv is not None and drv.GetMetadataItem("ARROW_DATASET") is not None + + ############################################################################### # Read invalid file @@ -102,9 +110,11 @@ def test_ogr_parquet_invalid(): def _check_test_parquet( filename, + expect_layer_geom_type=True, expect_fast_feature_count=True, expect_fast_get_extent=True, expect_ignore_fields=True, + expect_domain=True, ): with gdaltest.config_option("OGR_PARQUET_BATCH_SIZE", "2"): ds = gdal.OpenEx(filename) @@ -121,7 +131,8 @@ def _check_test_parquet( srs = lyr_defn.GetGeomFieldDefn(0).GetSpatialRef() assert srs is not None assert srs.GetAuthorityCode(None) == "4326" - assert lyr_defn.GetGeomFieldDefn(0).GetType() == ogr.wkbPoint + if expect_layer_geom_type: + assert lyr_defn.GetGeomFieldDefn(0).GetType() == ogr.wkbPoint # import pprint # pprint.pprint(got_field_defns) expected_field_defns = [ @@ -245,17 +256,18 @@ def _check_test_parquet( with pytest.raises(Exception): lyr.GetExtent(geom_field=1) - assert ds.GetFieldDomainNames() == ["dictDomain"] - assert ds.GetFieldDomain("not_existing") is None - for _ in range(2): - domain = ds.GetFieldDomain("dictDomain") - assert domain is not None - assert domain.GetName() == "dictDomain" - assert domain.GetDescription() == "" - assert domain.GetDomainType() == ogr.OFDT_CODED - assert domain.GetFieldType() == ogr.OFTInteger - assert domain.GetFieldSubType() == ogr.OFSTNone - assert domain.GetEnumeration() == {"0": "foo", "1": "bar", "2": "baz"} + if expect_domain: + assert ds.GetFieldDomainNames() == ["dictDomain"] + assert ds.GetFieldDomain("not_existing") is None + for _ in range(2): + domain = ds.GetFieldDomain("dictDomain") + assert domain is not None + assert domain.GetName() == "dictDomain" + assert domain.GetDescription() == "" + assert domain.GetDomainType() == ogr.OFDT_CODED + assert domain.GetFieldType() == ogr.OFTInteger + assert domain.GetFieldSubType() == ogr.OFSTNone + assert domain.GetEnumeration() == {"0": "foo", "1": "bar", "2": "baz"} f = lyr.GetNextFeature() assert f.GetFID() == 0 @@ -491,6 +503,32 @@ def test_ogr_parquet_1(use_vsi): gdal.Unlink(vsifilename) +############################################################################### + + +@pytest.mark.skipif(not _has_arrow_dataset(), reason="GDAL not built with ArrowDataset") +@pytest.mark.parametrize("use_vsi", [False, True]) +def test_ogr_parquet_check_dataset(use_vsi): + + filename = "data/parquet/test.parquet" + if use_vsi: + vsifilename = "/vsimem/test.parquet" + gdal.FileFromMemBuffer(vsifilename, open(filename, "rb").read()) + filename = vsifilename + + try: + _check_test_parquet( + "PARQUET:" + filename, + expect_layer_geom_type=False, + expect_fast_feature_count=False, + expect_fast_get_extent=False, + expect_domain=False, + ) + finally: + if use_vsi: + gdal.Unlink(vsifilename) + + ############################################################################### # Run test_ogrsf @@ -542,6 +580,25 @@ def test_ogr_parquet_test_ogrsf_all_geoms(): assert "ERROR" not in ret +############################################################################### +# Run test_ogrsf + + +@pytest.mark.skipif(not _has_arrow_dataset(), reason="GDAL not built with ArrowDataset") +def test_ogr_parquet_test_ogrsf_all_geoms_with_arrow_dataset(): + + if test_cli_utilities.get_test_ogrsf_path() is None: + pytest.skip() + + ret = gdaltest.runexternal( + test_cli_utilities.get_test_ogrsf_path() + + " -ro PARQUET:data/parquet/all_geoms.parquet" + ) + + assert "INFO" in ret + assert "ERROR" not in ret + + ############################################################################### # Test write support @@ -1509,14 +1566,6 @@ def test_ogr_parquet_is_null(): gdal.Unlink(outfilename) -############################################################################### - - -def _has_arrow_dataset(): - drv = gdal.GetDriverByName("Parquet") - return drv is not None and drv.GetMetadataItem("ARROW_DATASET") is not None - - ############################################################################### # Test reading a flat partitioned dataset diff --git a/ogr/ogrsf_frmts/parquet/ogr_parquet.h b/ogr/ogrsf_frmts/parquet/ogr_parquet.h index 0849850675c9..99c585f2c199 100644 --- a/ogr/ogrsf_frmts/parquet/ogr_parquet.h +++ b/ogr/ogrsf_frmts/parquet/ogr_parquet.h @@ -67,6 +67,8 @@ class OGRParquetLayerBase CPL_NON_FINAL : public OGRArrowLayer int iFieldIdx, const std::shared_ptr &field, std::function computeGeometryTypeFun); + void InvalidateCachedBatches() override; + static bool ParseGeometryColumnCovering(const CPLJSONObject &oJSONDef, std::string &osBBOXColumn, std::string &osXMin, @@ -234,6 +236,7 @@ class OGRParquetDatasetLayer final : public OGRParquetLayerBase bool m_bRebuildScanner = true; std::shared_ptr m_poDataset{}; std::shared_ptr m_poScanner{}; + std::vector m_aosProjectedFields{}; void EstablishFeatureDefn(); void @@ -250,8 +253,6 @@ class OGRParquetDatasetLayer final : public OGRParquetLayerBase bool ReadNextBatch() override; - void InvalidateCachedBatches() override; - bool FastGetExtent(int iGeomField, OGREnvelope *psExtent) const override; public: @@ -272,6 +273,10 @@ class OGRParquetDatasetLayer final : public OGRParquetLayerBase void SetSpatialFilter(int iGeomField, OGRGeometry *poGeom) override; + OGRErr SetIgnoredFields(CSLConstList papszFields) override; + + int TestCapability(const char *) override; + // TODO std::unique_ptr BuildDomain(const std::string & /*osDomainName*/, diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp index 3899920d1d80..33f93c95c4a0 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp @@ -286,6 +286,21 @@ void OGRParquetDatasetLayer::BuildScanner() PARQUET_THROW_NOT_OK(scannerBuilder->Filter(expression)); } + if (m_bIgnoredFields) + { +#ifdef DEBUG + std::string osFields; + for (const std::string &osField : m_aosProjectedFields) + { + if (!osFields.empty()) + osFields += ','; + osFields += osField; + } + CPLDebug("PARQUET", "Projected fields: %s", osFields.c_str()); +#endif + PARQUET_THROW_NOT_OK(scannerBuilder->Project(m_aosProjectedFields)); + } + PARQUET_ASSIGN_OR_THROW(m_poScanner, scannerBuilder->Finish()); } catch (const std::exception &e) @@ -350,15 +365,6 @@ bool OGRParquetDatasetLayer::ReadNextBatch() return true; } -/************************************************************************/ -/* InvalidateCachedBatches() */ -/************************************************************************/ - -void OGRParquetDatasetLayer::InvalidateCachedBatches() -{ - ResetReading(); -} - /************************************************************************/ /* GetFeatureCount() */ /************************************************************************/ @@ -498,3 +504,149 @@ void OGRParquetDatasetLayer::SetSpatialFilter(int iGeomField, // Full invalidation InvalidateCachedBatches(); } + +/************************************************************************/ +/* SetIgnoredFields() */ +/************************************************************************/ + +OGRErr OGRParquetDatasetLayer::SetIgnoredFields(CSLConstList papszFields) +{ + m_bRebuildScanner = true; + m_aosProjectedFields.clear(); + m_bIgnoredFields = false; + m_anMapFieldIndexToArrayIndex.clear(); + m_anMapGeomFieldIndexToArrayIndex.clear(); + m_nRequestedFIDColumn = -1; + OGRErr eErr = OGRParquetLayerBase::SetIgnoredFields(papszFields); + if (eErr == OGRERR_NONE) + { + m_bIgnoredFields = papszFields != nullptr && papszFields[0] != nullptr; + if (m_bIgnoredFields) + { + if (m_iFIDArrowColumn >= 0) + { + m_nRequestedFIDColumn = + static_cast(m_aosProjectedFields.size()); + m_aosProjectedFields.emplace_back(GetFIDColumn()); + } + + const auto &fields = m_poSchema->fields(); + for (int i = 0; i < m_poFeatureDefn->GetFieldCount(); ++i) + { + const auto &field = + fields[m_anMapFieldIndexToArrowColumn[i][0]]; + const auto eArrowType = field->type()->id(); + if (eArrowType == arrow::Type::STRUCT) + { + // For a struct, for the sake of simplicity in + // GetNextRawFeature(), as soon as one of the member if + // requested, request the struct field, so that the Arrow + // type doesn't change + bool bFoundNotIgnored = false; + for (int j = i; j < m_poFeatureDefn->GetFieldCount() && + m_anMapFieldIndexToArrowColumn[i][0] == + m_anMapFieldIndexToArrowColumn[j][0]; + ++j) + { + if (!m_poFeatureDefn->GetFieldDefn(j)->IsIgnored()) + { + bFoundNotIgnored = true; + break; + } + } + if (bFoundNotIgnored) + { + int j; + for (j = i; j < m_poFeatureDefn->GetFieldCount() && + m_anMapFieldIndexToArrowColumn[i][0] == + m_anMapFieldIndexToArrowColumn[j][0]; + ++j) + { + if (!m_poFeatureDefn->GetFieldDefn(j)->IsIgnored()) + { + m_anMapFieldIndexToArrayIndex.push_back( + static_cast( + m_aosProjectedFields.size())); + } + else + { + m_anMapFieldIndexToArrayIndex.push_back(-1); + } + } + i = j - 1; + + m_aosProjectedFields.emplace_back(field->name()); + } + else + { + int j; + for (j = i; j < m_poFeatureDefn->GetFieldCount() && + m_anMapFieldIndexToArrowColumn[i][0] == + m_anMapFieldIndexToArrowColumn[j][0]; + ++j) + { + m_anMapFieldIndexToArrayIndex.push_back(-1); + } + i = j - 1; + } + } + else if (!m_poFeatureDefn->GetFieldDefn(i)->IsIgnored()) + { + m_anMapFieldIndexToArrayIndex.push_back( + static_cast(m_aosProjectedFields.size())); + m_aosProjectedFields.emplace_back(field->name()); + } + else + { + m_anMapFieldIndexToArrayIndex.push_back(-1); + } + } + + for (int i = 0; i < m_poFeatureDefn->GetGeomFieldCount(); ++i) + { + const auto &field = + fields[m_anMapGeomFieldIndexToArrowColumn[i]]; + if (!m_poFeatureDefn->GetGeomFieldDefn(i)->IsIgnored()) + { + m_anMapGeomFieldIndexToArrayIndex.push_back( + static_cast(m_aosProjectedFields.size())); + m_aosProjectedFields.emplace_back(field->name()); + + auto oIter = m_oMapGeomFieldIndexToGeomColBBOX.find(i); + if (oIter != m_oMapGeomFieldIndexToGeomColBBOX.end() && + !OGRArrowIsGeoArrowStruct(m_aeGeomEncoding[i])) + { + oIter->second.iArrayIdx = + static_cast(m_aosProjectedFields.size()); + m_aosProjectedFields.emplace_back( + fields[oIter->second.iArrowCol]->name()); + } + } + else + { + m_anMapGeomFieldIndexToArrayIndex.push_back(-1); + } + } + } + } + + m_nExpectedBatchColumns = + m_bIgnoredFields ? static_cast(m_aosProjectedFields.size()) : -1; + + // Full invalidation + InvalidateCachedBatches(); + + return eErr; +} + +/************************************************************************/ +/* TestCapability() */ +/************************************************************************/ + +int OGRParquetDatasetLayer::TestCapability(const char *pszCap) +{ + if (EQUAL(pszCap, OLCIgnoreFields)) + return true; + + return OGRParquetLayerBase::TestCapability(pszCap); +} diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp index e57e8060f769..bbd04b74e55a 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp @@ -83,6 +83,16 @@ void OGRParquetLayerBase::ResetReading() OGRArrowLayer::ResetReading(); } +/************************************************************************/ +/* InvalidateCachedBatches() */ +/************************************************************************/ + +void OGRParquetLayerBase::InvalidateCachedBatches() +{ + m_iRecordBatch = -1; + ResetReading(); +} + /************************************************************************/ /* LoadGeoMetadata() */ /************************************************************************/ @@ -1834,9 +1844,8 @@ bool OGRParquetLayer::ReadNextBatch() void OGRParquetLayer::InvalidateCachedBatches() { - m_iRecordBatch = -1; m_bSingleBatch = false; - ResetReading(); + OGRParquetLayerBase::InvalidateCachedBatches(); } /************************************************************************/ From 82e1ddb60c600e1d2da8ad85df17e6d616364a5b Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 13 May 2024 19:46:36 +0200 Subject: [PATCH 011/191] Parquet dataset (multi-file typically): make spatial filter work ignoring geometry field --- autotest/ogr/ogr_parquet.py | 31 ++++++++++++++++ .../arrow_common/ograrrowlayer.hpp | 2 +- ogr/ogrsf_frmts/parquet/ogr_parquet.h | 3 ++ .../parquet/ogrparquetdatasetlayer.cpp | 35 +++++++++++++++++++ 4 files changed, 70 insertions(+), 1 deletion(-) diff --git a/autotest/ogr/ogr_parquet.py b/autotest/ogr/ogr_parquet.py index b81d6ac93f75..53b3e59b78ec 100755 --- a/autotest/ogr/ogr_parquet.py +++ b/autotest/ogr/ogr_parquet.py @@ -3945,3 +3945,34 @@ def test_ogr_parquet_read_arrow_json_extension(): assert lyr.GetLayerDefn().GetFieldDefn(0).GetSubType() == ogr.OFSTJSON f = lyr.GetNextFeature() assert f["extension_json"] == '{"foo":"bar"}' + + +############################################################################### +# Test ignored fields with arrow::dataset and bounding box column + + +@pytest.mark.skipif(not _has_arrow_dataset(), reason="GDAL not built with ArrowDataset") +def test_ogr_parquet_ignored_fields_bounding_box_column_arrow_dataset(tmp_path): + + filename = str(tmp_path / "test.parquet") + ds = ogr.GetDriverByName("Parquet").CreateDataSource(filename) + lyr = ds.CreateLayer("test", geom_type=ogr.wkbPoint, options=["FID=fid"]) + f = ogr.Feature(lyr.GetLayerDefn()) + f.SetFID(1) + f.SetGeometryDirectly(ogr.CreateGeometryFromWkt("POINT (1 2)")) + lyr.CreateFeature(f) + f = None + ds.Close() + + ds = ogr.Open("PARQUET:" + filename) + lyr = ds.GetLayer(0) + lyr.SetIgnoredFields([lyr.GetGeometryColumn()]) + lyr.SetSpatialFilterRect(0, 0, 10, 10) + lyr.ResetReading() + f = lyr.GetNextFeature() + assert f.GetFID() == 1 + assert f.GetGeometryRef() is None + + lyr.SetSpatialFilterRect(0, 0, 0, 0) + lyr.ResetReading() + assert lyr.GetNextFeature() is None diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp index 13dd911a1956..06afc19fb93f 100644 --- a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp +++ b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp @@ -4053,7 +4053,7 @@ inline OGRFeature *OGRArrowLayer::GetNextRawFeature() iCol = m_anMapGeomFieldIndexToArrowColumn[m_iGeomFieldFilter]; } - if (m_poArrayXMinFloat || m_poArrayXMinDouble) + if (iCol >= 0 && (m_poArrayXMinFloat || m_poArrayXMinDouble)) { OGREnvelope sEnvelopeSkipToNextFeatureDueToBBOX; const auto IntersectsBBOX = diff --git a/ogr/ogrsf_frmts/parquet/ogr_parquet.h b/ogr/ogrsf_frmts/parquet/ogr_parquet.h index 99c585f2c199..76ae8be4681c 100644 --- a/ogr/ogrsf_frmts/parquet/ogr_parquet.h +++ b/ogr/ogrsf_frmts/parquet/ogr_parquet.h @@ -234,6 +234,7 @@ class OGRParquetDatasetLayer final : public OGRParquetLayerBase { bool m_bIsVSI = false; bool m_bRebuildScanner = true; + bool m_bSkipFilterGeometry = false; std::shared_ptr m_poDataset{}; std::shared_ptr m_poScanner{}; std::vector m_aosProjectedFields{}; @@ -261,6 +262,8 @@ class OGRParquetDatasetLayer final : public OGRParquetLayerBase const std::shared_ptr &dataset, CSLConstList papszOpenOptions); + OGRFeature *GetNextFeature() override; + GIntBig GetFeatureCount(int bForce) override; OGRErr GetExtent(OGREnvelope *psExtent, int bForce = TRUE) override; OGRErr GetExtent(int iGeomField, OGREnvelope *psExtent, diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp index 33f93c95c4a0..94785aef69db 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp @@ -198,6 +198,7 @@ void OGRParquetDatasetLayer::EstablishFeatureDefn() void OGRParquetDatasetLayer::BuildScanner() { m_bRebuildScanner = false; + m_bSkipFilterGeometry = false; try { @@ -279,6 +280,17 @@ void OGRParquetDatasetLayer::BuildScanner() cp::field_ref(arrow::FieldRef( oBBOXDef.iArrowCol, oBBOXDef.iArrowSubfieldYMax)), cp::literal(m_sFilterEnvelope.MinY))}); + + const bool bIsPoint = + wkbFlatten( + m_poFeatureDefn->GetGeomFieldDefn(m_iGeomFieldFilter) + ->GetType()) == wkbPoint; + + m_bSkipFilterGeometry = + m_bFilterIsEnvelope && + (bIsPoint || + m_poFeatureDefn->GetGeomFieldDefn(m_iGeomFieldFilter) + ->IsIgnored()); } } if (expression.is_valid()) @@ -365,6 +377,29 @@ bool OGRParquetDatasetLayer::ReadNextBatch() return true; } +/************************************************************************/ +/* GetNextFeature() */ +/************************************************************************/ + +OGRFeature *OGRParquetDatasetLayer::GetNextFeature() +{ + while (true) + { + OGRFeature *poFeature = GetNextRawFeature(); + if (poFeature == nullptr) + return nullptr; + + if ((m_poFilterGeom == nullptr || m_bSkipFilterGeometry || + FilterGeometry(poFeature->GetGeometryRef())) && + (m_poAttrQuery == nullptr || m_poAttrQuery->Evaluate(poFeature))) + { + return poFeature; + } + else + delete poFeature; + } +} + /************************************************************************/ /* GetFeatureCount() */ /************************************************************************/ From fddfaf266e12a4a1d9adffa4489380509dc63371 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 13 May 2024 19:46:37 +0200 Subject: [PATCH 012/191] Parquet dataset (multi-file typically): optimize spatial filtering --- ogr/ogrsf_frmts/arrow_common/ogr_arrow.h | 10 ++++++++++ ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp | 10 +++++++--- .../parquet/ogrparquetdatasetlayer.cpp | 16 ++++++---------- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h b/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h index b1b35836132a..d6adc37dd4a8 100644 --- a/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h +++ b/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h @@ -143,6 +143,16 @@ class OGRArrowLayer CPL_NON_FINAL std::vector m_anMapGeomFieldIndexToArrowColumn{}; std::vector m_aeGeomEncoding{}; + //! Whether bounding box based spatial filter should be skipped. + // This is set to true by OGRParquetDatasetLayer when there is a bounding + // box field, as an optimization. + bool m_bBaseArrowIgnoreSpatialFilterRect = false; + + //! Whether spatial filter should be skipped (by GetNextArrowArray()) + // This is set to true by OGRParquetDatasetLayer when filtering points in + // a rectangle. + bool m_bBaseArrowIgnoreSpatialFilter = false; + //! Describe the bbox column of a geometry column struct GeomColBBOX { diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp index 06afc19fb93f..931f16515be1 100644 --- a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp +++ b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp @@ -3863,7 +3863,7 @@ OGRArrowLayer::SetBatch(const std::shared_ptr &poBatch) SanityCheckOfSetBatch(); } - if (m_poBatch && m_poFilterGeom) + if (m_poBatch && m_poFilterGeom && !m_bBaseArrowIgnoreSpatialFilterRect) { int iCol; if (m_bIgnoredFields) @@ -4041,7 +4041,7 @@ inline OGRFeature *OGRArrowLayer::GetNextRawFeature() // Evaluate spatial filter by computing the bounding box of each geometry // but without creating a OGRGeometry - if (m_poFilterGeom) + if (m_poFilterGeom && !m_bBaseArrowIgnoreSpatialFilterRect) { int iCol; if (m_bIgnoredFields) @@ -5577,6 +5577,10 @@ inline int OGRArrowLayer::GetNextArrowArray(struct ArrowArrayStream *stream, } } + const bool bNeedsPostFilter = + (m_poAttrQuery) || + (m_poFilterGeom && !m_bBaseArrowIgnoreSpatialFilter); + struct ArrowSchema schema; memset(&schema, 0, sizeof(schema)); auto status = arrow::ExportRecordBatch(*m_poBatch, out_array, &schema); @@ -5694,7 +5698,7 @@ inline int OGRArrowLayer::GetNextArrowArray(struct ArrowArrayStream *stream, for (int64_t i = 0; i < m_nIdxInBatch; ++i) IncrFeatureIdx(); - if (m_poAttrQuery || m_poFilterGeom) + if (bNeedsPostFilter) { CPLStringList aosOptions; if (m_iFIDArrowColumn < 0) diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp index 94785aef69db..245c64d79869 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp @@ -199,6 +199,8 @@ void OGRParquetDatasetLayer::BuildScanner() { m_bRebuildScanner = false; m_bSkipFilterGeometry = false; + m_bBaseArrowIgnoreSpatialFilterRect = false; + m_bBaseArrowIgnoreSpatialFilter = false; try { @@ -281,10 +283,14 @@ void OGRParquetDatasetLayer::BuildScanner() oBBOXDef.iArrowCol, oBBOXDef.iArrowSubfieldYMax)), cp::literal(m_sFilterEnvelope.MinY))}); + m_bBaseArrowIgnoreSpatialFilterRect = true; + const bool bIsPoint = wkbFlatten( m_poFeatureDefn->GetGeomFieldDefn(m_iGeomFieldFilter) ->GetType()) == wkbPoint; + m_bBaseArrowIgnoreSpatialFilter = + m_bFilterIsEnvelope && bIsPoint; m_bSkipFilterGeometry = m_bFilterIsEnvelope && @@ -646,16 +652,6 @@ OGRErr OGRParquetDatasetLayer::SetIgnoredFields(CSLConstList papszFields) m_anMapGeomFieldIndexToArrayIndex.push_back( static_cast(m_aosProjectedFields.size())); m_aosProjectedFields.emplace_back(field->name()); - - auto oIter = m_oMapGeomFieldIndexToGeomColBBOX.find(i); - if (oIter != m_oMapGeomFieldIndexToGeomColBBOX.end() && - !OGRArrowIsGeoArrowStruct(m_aeGeomEncoding[i])) - { - oIter->second.iArrayIdx = - static_cast(m_aosProjectedFields.size()); - m_aosProjectedFields.emplace_back( - fields[oIter->second.iArrowCol]->name()); - } } else { From f9aba44e97e44ecd4992e699a266df75822f7317 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 13 May 2024 19:46:38 +0200 Subject: [PATCH 013/191] Parquet dataset (multi-file typically): implement SetAttributeFilter() --- .../test_with_fid_and_geometry_bbox.parquet | Bin 0 -> 75109 bytes autotest/ogr/ogr_parquet.py | 87 ++++++- doc/source/drivers/vector/parquet.rst | 6 +- ogr/ogrsf_frmts/arrow_common/ogr_arrow.h | 9 + .../arrow_common/ograrrowlayer.hpp | 15 +- ogr/ogrsf_frmts/parquet/ogr_include_parquet.h | 3 + ogr/ogrsf_frmts/parquet/ogr_parquet.h | 7 + .../parquet/ogrparquetdatasetlayer.cpp | 223 +++++++++++++++++- 8 files changed, 337 insertions(+), 13 deletions(-) create mode 100644 autotest/ogr/data/parquet/test_with_fid_and_geometry_bbox.parquet diff --git a/autotest/ogr/data/parquet/test_with_fid_and_geometry_bbox.parquet b/autotest/ogr/data/parquet/test_with_fid_and_geometry_bbox.parquet new file mode 100644 index 0000000000000000000000000000000000000000..79f577585eae7c55b52d34ecd28ec78518f17008 GIT binary patch literal 75109 zcmeHwe_&KsmH)h%Aq_O8fMhzjtRj}XXlY48k|tgDYi2Sb1QL?a03m8=ev^lK zb+Lvi695n$jds4j~_!IlT=VfYLqXm|qN@$@ewxF?9od_IG`;TxUm z88r@3+*8xu-cn!PMv;^SD#&Wzx=%wV#8Fx8YoAU2Y7&%gcl<(b59ctNOy?sSf~eJJ4)YD}N@HZ3B60q)n~anN+QPMqryk z){P{m-i21GBNQ8I(LrZ&gr^Oo1 zjv_lop*5KKZPc_7E!F@G;d32e;>YiV%DXv#p6KwdBV^58$gWnrLw`Ryb%+JJMccba zlPiCR+NPg4sB1+Drj8bENXi6IgQvQtwhk%UHJ%MM4xd3UZ-u#f%;?k<4l}8vb5T=U zqte_Sd@ot?UfSI9%majC<5-CzrKNgNWBoh_mSgWDYu^_W3$;Hl@2RgI**kbXG?#X5 z1e3)R`3t=m=Ur1E6}jcrq=q7&g#~M^IALRHMVwAz3nps2Mu!e{ba}c zp>&-DUD#;#YL(@WQ{80YPk>tv95xNbd`|91VwVt zb>S9Uq_x!7uUPYKA50mu|W23^U%U>>h zVheM1?qC00|H(nRD#V|FN$_9DMEsU_ZNgkhm%@yY!a&K`r8%EQcBi4m#mgwRG)ASH zG~x;L_&g0tBYw-fHfdI6X{Bj%N^@%xIX(%bagualH82!R*-0|V%;LcIitn2ue+4`wmoX84lG z{wX$u+;wEN@@N6sPyh&Jb{jD*!j#lu#i`6ON*YPc!m#7Z_Kf#>3<#JYWfKp$J!hs$ z_bjQVsjYg^5~{Jzz{knTspR#-(Mr~Yb3Zjra_7#W4a4$j>RvQ*;@r*e2 znYaJ+fhG3R0Ly?U5P*Sa*zC$HvveS=1+DnHQVDHHU43m+YjsO{28%B-i`hDZTq^;K zktA#c?*Pj4gb%uqkn$9@HchVw^Cz{1g4Tzk52{?*4Q5e0gIt*uwBJQXLRZq*@IDjLo+S> zJY?T2v-;)?p&V(z&&IDNFrQqiASWtDBTHvwlSVrG$$7^d$$2p4j8Q!e0)DuRQ_%?uk`yluS z5XYvH38~;wQ>@@L`tiQ$$`8+@SvXdoZ^g?n{?{cN>{L5or+bhR*kuIv8RU8=+182v zq{=~CsP71RP$3(zGJ&BxjTo1Dq!=UZVmN$_lmL6dz0^%Mbwjn0TCy=qoJ2NN>7}^w zl$0W1mx7HsTC7-<#+s+dq2B*NrLpp#r5p~Ua(Y3cI97g=?Egf+i{r`ENHN$v){Qir zE8sEiOZIr-pYOip$@HC;*AMvPs3XB-EPMPwOp<@V1~hl!q0aEmJuPPu#x=KZ7u z)_Vg>z2xAh403L%0fC6P5hTtUqnNy2%$>_YOg>K_M^~f{DOnBM`Duf^z6_-7XC1q( zLWR1fQL0M;p8ssD0^sJy}Vdy?-lX}idJUE&1qbNFFINm&xhJPi7H#f(sOMO14D!?%gC^Ia8(D{6_4w7G z@kxw^pivFabgk*lFM?!z*w@*bYP?I@1MV?M-xm#XWF4)HjJ_6jTIwUkA`}f_%E1PG zZ2&A$;?QVe_>ogCuzzsgg(N&M=cPjfYrZTi+xQfK-c{QT-k^KNds6ZcJJ=DLp(Ug!3VwuX0{%Q?DAJ4P{yey2 zk?iG7()?_a)dV8W>*N^>|AxT7 zd%;CY0W|ozkNrUx>5<`(oQMhR2ExgO`9x2+PtL%-ZTOiK+10$u+TZPG;dgT~++{R*8%bK`lCj(4nX(qR)PURciTGRXE_vWZh!xG>h1qeeQ~ zU~8q5?dXU+$wa*i5?Q4CU5*zg3GIC$wsf_MbnBpBee=6gI+cYBW36=WNgXR1@cVPH z0q?=DCHU0~_DO|I7BLH$A>A7^dZY+mG-=-lJ+uiwun$BK{=vPi)eFU#$>0U`OXe-E zZt1G`!7mUZIk3hEK;L@qMT2a95n4<^SObv{o+JYpGsa^CAoK-uwh<*8NP$iSk^C=@ zj~v|(FxZ^!96gFb6br@iM4*OT_@2Q6Jd&(6z>`27E&G9@qrs>n8pd(e(6$56Abmf& zhHP<8R0=9Xv@T}p`EBFdENI9z_m4C#d+3J%PrOO$;3=wOo!;4(46@@T0BEJIDM){6 z0`<7|kfO(A)FZ}`T-CVmM+UiZFd=HR4DwrrHXCJTj7+5$4l63vAXPNVLfsdCY>@pw zhMKL;Ca^LZXziYOMbYjMVWsKWuKK2c@|SnKg~> zj$eTce63TCRt81$r;Nbs23d8|Ajf}6d;fS@4|JQc@3c}9!hE6>#MxgPWYe!>6@+#4 zacx$&u4jY55IkY_4vJT<^a@|~?;1;2p(^@jmcANZna}+OE!~yVKJ2nEeQ>uya$eR4OzoJS{dQBE7t; zSNfqh6{YJ$P;|~(k}bc7kjfj_k~n#|a31J#IQ9pn9He7cVCEA64tha zt}@EYvzs^une%918&(-%TVh`6HNN#N04u`Uda#PlS!=TGk1$4ik+vo#bsMjN*74f^ z(CRpfwePzG4%DAw>DbzwfXd`Dsbr_Fc|0@OjB>&W-_Xx7>h8%U|z`rqBJii>57j9hyFW6Pm-DcU99TmwII^h-nV0E znGQFwl`JD77@qCHxDx*8#UJ*@Jlj8h5Pc9FL=)=mdB?xR!s^iQp{Kp%)~CGasaP2d zbX&1(xq@{VwiW6uDp&1p`?Qx_Tn2cf1ls&K!Ykf-5~tk@E8;`!(DAL$c**|H+yxyU z`<#OKeyU@f03O4DFR%2H-79Zj#dMf`h={l*+3X7kozYSAEu%Bbe5v=+xZUnm5^|+G zWTs;|8UP{CA+QiCu-hPqpY@WZ&jNfq$APx!ZT~8v=cG!Drjznw==|7fFIlzvF6n&3 zS_!-|7Vs#V-|+=6xv=IgXnx}tr2skaM$Ox2wZ%~X!F68p^tuG8f2c6|A#NtWKA8>& zjr*w=rx35-AkkOGgD|B{!yxgI33GuK-G2jxtK)H1A%4ejmtr>1%l`-3AV zmO;yG825%mK_4W6gqB4@+rJ_O3Hs}rYT24_w!G!!Mp$sU5rC-#w4mxzvqj^b5ljUp zFgg;3v4Fh)H3~L5fNT7q3vSsW1n#M;4i9}*!IIoq-`-l^$(L>IGXkp&viz%FvS*X` z1G<$bR%es)b27yDE01O3khrozIsG+&!e6Y$-zT8VtEp-4@%O;0Y-qcZ0!&F4`n+Uc zpO;P$64cnFBypwpEMb!(F~!=C1MLp&C}8s}1oKp|ei%RB;tqC+0K3`%yyVbxUb5ji z3s^gCZTTivyKBGU4!Ebax<}FOmaSfL_3IYEe9G6I((a{ix`SOJz%uRL+U6yvwpqZ& zrCrN=D^_CspsC^qDos-A5Hxu)=|Vh_Q4^dQ;Jgc&eaYBbUEKj9*!HvX2M*BrZI{30 zCA+=_m^B1k$Z9Nv?`ee#z|3({Trf)K!~+htraP>A5*|bRoBHA6j*X|E1|D7FL=rMJxpH;joA7c2AgS(nc)Nz^yA&2AD^Ry^2!Em z<30tLxxbZA*p3&y1&0tl)eYI>4Jd@hY);F0iUCW*boCvZMB43Kd>pB zu0atd3S}kRHC#mgmHuRzIQv2~l$X1b*?fXcf-8HsA+;tN=In zx)MgQ>yVdR{t+65laLE}tt?if@T|`Bcy{I46^*XPF6t+(#+jeWl@0T@mjSb8yDMQ9 z`+njjD}F-F!i}I7rNu0OCwxZa(LbJm6D^O~2Fyq z=WCC6$;nr!&K;y&7-*#dZ(MlCLmtE#GHfoiUF+G1VF3)Stst7-}y z;ln2Yr)XDon_c-~ff{ahwdd#1&Ho$=)=ATalUA)(arib<_F@Gl@oqq+m?wyH{+Eh4 zQTDqMQoZ`Bmu!BON)<1|*iwmIrkqlVCvqn$RGss#t&mpzT9GQomRCZuTfg#>DL7PcsJh@Qnzg_n` z2qm2d*a{6BMR&c6O}gCPtHeq@e2zr%-7DzFEBL7W>d7}1n3m121kN{p50RYTBTh-g zMpBE@;umnv^c@f(IPC<9<43;S0uyCsy`TU|EcpMR*okHTE8!U&z5pG_1vHPiS=>c= zuD+#_hwgkOa@sKK=H8?91a!7tNLpvB*M#SW0aYp{6G-ao?#!JiSJq+p|`wTdVkUx`LN+qv-- zYvYbx1xTj1Xzumo58%#MH~-U1PW&fz(!{n|2?ucengYPS`4xk4>n~pN>R)2RpasQR zC4UO|&F1!Bsum27W&1A{O*wrXKt$dA${$!5=piev!#)T9#gK49T7_8bTG`8khO-44 zC}wju0WD_pN;Mm3#@GH$L1*3nO4#<68{kg=m70!8#YXcE0DbenH9*-GSOVy^n_lwr z&He)A>nesq)1br)*}4j>D|}tWVho1*Ui#1wa^r14!q-)}Z;3TDfJ-XYzimbl1x4$U zit%`c0AFofELy31fEB(n4u*dKi;jev!$L4I1@D`^9bbGa@Ucb4hn|N0`o;PMw_UWv zpwH<~`N--|`RLLGg|xO%`;9lKpv74(3IiO&#UP^3ihcj573?nia1F&J(MMsy_c9;3 z{4YLe)+AjUm&Awx6C4y409p%u*9rx?+|WGrL+C_WU-l5^gLB--M+ zvN{F=Pl1YU{?p$V~z)>-tdorJp}xo04x-L4{T}iK9C4M@6U5c z#2c9RtFc*gmaTdhvg_NAuY!bPo~#5Ph}jIZgVO(JH<5Y3K+Bb!hX7z!8ep{iFRu<$RQ@KM%{MKAJ}9BV2S(E%|3Ga zYgky4HckYPSNgUu;n1&s-3_{Z*Es?G+7=%7unmx#tAdqTU6yHR$$^bVU<>Riu+2vfe*;=I?|4C0 zYa5A1x#wF7${5?z39Ki#ga5ca7FNDTon`$pzS4?udA>*85Y%!qY~y_7+kk|-*^X^; zbeq&A(@p9w?C?b&x2JI%KrsTV2JU!k^R{Wt5>vwA-Ilw3m=ZpG_B}`G^}XRC8+HLu z;cGiKEOPEnm(1(bp6@{OuoKi~<)JO+et{OwFha%`HRc+-3$XFKiU6_qrAvk-Ny zylJ5dg5)F_P>^#yj~$FT~`=K3V#(S-0`e&&#mEI&kRmX4XWaPE>(u75?5QCIgI zA~Ku2@e?1}`jZ6PB%JA$=kc6_e;)A4hnZX5iv9?lK{$nIlUig6>*icMegnb;$XAwKMB#4kLb z$7N{IPfjs)h^KP^_ zn^5A3DEWY5aql)Qj#A*`EL5m#zm}$E6ACw75< zjE|i74K*SQIjt=@5H@mzjcy3(gP@D0Y`{_K%w-lPOLi$CW|M*D-1BTA~_tCiF;or_Ayj@Pur97NM) z+0o4mEU9;a0=Lc~*ogQ>-{Zk)LaYA>((!)NO*(AVlKFJt%YX2ZgMW~%n#w{(wA8!$ zR!t+F+)B>$b$FL-+0o6EPQ7cF?sv&lx?`7OONU#@3F&tH2|A*;q;x6^DV=&(FP-ho z8+yi$7?{mQ^|Mfi--8VW?C?r9{TYCY-d?{|8{cxnA9ym*Ku%ruk$sn;_4Dfhcv9im zFANT{0Zqy=Wdh~JIg+LS2@K=g8qOaG2d0qySA3I^8{~rjkQO^jycn;sp@P4$<-BxN zk(2Jqq2Kh4NxAi^kF0duC+%Mix#Mf}QNx%7qk1T^MiQ|7$k>1?6N3)q4)Bfh0z#puhdw8%HAgbwv~Re67nPrz?Ekd`OOkgH>$B%!1iZA0gxqe{8#BLK;>`K!1h&u z05T;E!L`+j280Eu04xgFx!O-wLym;8zq&?IfR4RQ{vB)lWEJE|7=UYQ75U{}MXC{6 zHLw;m0C^Gy;QBfMAVOk#05%=$TIVNgAXma5+*+^IfvXklUGFFBAy2{pEd4S75b-jp z0~LTx2m3bo$%YLHgs}W8iVj>iPqLW8OB?;9Z({;6tokZo5aZyk7;Kt&d6S=P+mt{Q z8@}eE3Cre27EQd`=O=sm5{O~*b1s^&V6bW8%w|71usMM!c79#a1nnJMHr`+!;+tFi zO(25Z-&7F9-qwie;o>$wIkqi$jd?;+%xVGI-PH#^j zj^p1}aJcX0W2qX;zwIX%b|etQ>F-Do{o7KAmCow#_{o)>2?TWGc?l@qje;1I-Ou~U zTF9NSeq{9?3FVz^{lj1#`L3UA|84?dy|h=t>eq%o49~R}z>Z%?AfDIvNj&d(iynq; z-9A6L{-Qg!xVGf6jD+9}qnheo+>)mk0y!+Y>+`~g9RI$=ZSKh9hr?%J1`D~d-%ob! z2PA5KZ7j16=luFb5*|tsHDBv*!m;bO-$LY|E3E=kY9cFvQ``rFrFrFVqQC+s>5y3=+>7igf9IUuf!9tw(+#z?cd|MuWfGrfd z{3Gbne#F2EY7DHn(u1`wO9AcOmUa*Q#2ql-mPcs!)x&4xHLNr3ym9tH)&Eo(BylAa0ZZKQU-$H z53)1IjfUiTlm^~qn?`t2li~M0e4c?Pb%=SDo#{zU!Q9Nl8|Nt=jiB(BWmUg;fHWP; z=*NvHerEV+%!u)5Na40J-H|ZbVsA%c=lW?(GD2mnS&}?%B&L}l!6|9PDql+)RK2;7 zcHGe7kDH?56S;QP@(87t%RF`%i&sku_r4+*pkmchP@sQ;AIDr=6< z&}!O$T59(vh8o@>DYY7#uxJ&G6Bd@gP2HlOjFW;?Hx`%)g4T>UNk|ZuN-lKs&PbUi zD64p!aIoG>ov6bA@tY2|0Fs6+h8{l|)=P71c68G!S=LL(DPeT7N`yInq}bkYoMwma z2-MylIeVltZfanc3>xHcI?NhRM88}2!10T+J@o)FrEipUGilNAq8iN55mp}Sp314o z_|gj7E8yPB{3nJ;%)8Y+=3V$17&2OyKt1XtEpl#7K!|aMd z)>JcT_sb%b zJE11=sgDPTlBGkT91YyOl$Ij}2ERaH&0QUbJtM4;u>&(V?>;=7tR0Rq!tDCP@ZEBB z1!_`W=?LB$X@iVi2vB7Ei>@0WzELRlgaYQppAn)7hc0gLRmWi z&4%-?j>*#@Clk8ST~gqT0ehE_%O4_r9|9&vzVpDz8dxJBeFRjw^RAA`wS1`_xhq`~ ztOP^dEXyay!k`;mfFSpMmsdK9`={6_^7fP!;n4!Jp#UgiH$q3BsJ7*mj^dfAvh_!naWPbZ5Zj8$EY2-{%3@Yb}&8a?`pfxS~#eq+fRmJ4| zbV>@GDYPa{Ke?_{l2W4FiluLzL9Ug6nv_l0uqQLOPt#F7U+$zzUv^9n;Yt}fRMt?-l2wmS&Q7v;kNakswveMl*ll@h0`tkG3UZt?wquT@ zA|vh^BIjq5!?Rt8Y?>>H$jG&AU4K#lv7D?Vdn;YYto^VgV{W~prE;{2?5J{~vf`s= z)y|P%Y|FbDn3q06`aa@9=GGG$Fj^{mo*?U zX0%i`)sUq%E>zCenI0auH{BXzhGtD2xmxQ&=VXKF)~R%~cvd!$x9VN!tZ6bURgNXt zpPrFpvp{w@|&>01yGc+=A_UR$-+R%)yDNb^SB259a4? zSFP7q19vek-cf&bt#}IV!Fkvq*FF#Pu0BssS@rbg(aOfp#S&M#Vmz`I_=zd#dVUUh zVN1Nr2HE-rgPdN2@-ovIX)f0HAYaX09siqO1pZ>prNfFfZEH#ZYUCb+^nKAFN7m7T zH5!62K&5G3*L+zPrEOVsHFXo=4dARnmToY}-Y=mf=#%3|257KCIPdD1y}U_k(#8ze z?$XzdGf+fdG00mRrP|~=RzFT2<7!jy>X_~QnxZz#ke$Ky5J-a4@nD7^d7}?h)koFj ztIwDeH0pKaEBR9uC^Z$IOPp&hh6MpZ@U#= zN2T1y3IIw#G()Hq=UpA!o7*K@TM6JQcv=Eb+!wzID)}Zd6D(xGBQxh+9kZ8qN@i|r zw$aE^_=@ztZyV&)j@vb|8_xsRI97_M@NnswT?W~{OIn)@y@9nQ32RgD>SW#fLTs+M zf?IH12leyK?@F%HnqkL9ldF1H$MwKI;3}F1b#zyb=<+G&$$>RS04njh7Y(xcMX*ly zV};4l{XjLoHP^{>@8}EPGni{+z~XZZ?HiVtvt>U}P|ERRSB%>ZfCYX(I>tjk1e&66 z)zK8gOR#31)yuQ65!{ZKfGTVe8a=SE_KL zU)ug{APmsp|4V`3g2A=6>o zy~UjYU#6GDm+2jP&Pz5tXJL>B!7s@!Psd=OPrD}reL{U;+h>hH5=jo6e%2TXgR?O6 zZop5Jznyd96LrQrN&K1YBvI$7lL0vYoZN|X$;kkmBTkMd_{?u|2hQRq19EmYxg%#< zliNO%nk48<={tPp^W8a1OGWPFc~Rs} zp5sIY;N>0B0qUyK`C*xs#^|-JKJC?#@X&QA?;P{1ac>{-@Hb#0qqb3LMZLF(qd5rB4AAI*yV_|Rjv5(R331e&9 zTe@1?I>tWs)F(ax+mJTaw+9b3w*PhXLrG)B7~P{yFZ;KlY=a_{4;Zk{KgbL<$E?HO)I{fh1vUXTekGVmaDWRD&OXmo^ z@2H)KpWeonP$*Om|L`+C6v_;R8$(kwgzaR?scHGD9AVWW8+ zE}c4YVmBpHLZ4&S!n&54?r;HetY0!MqpEURZ}ptquA=7lrjo}qyNjDcO~v`sx@%io zI_FkS%c{(nvAAaH?9Q5|qU@qp$eWSgQa5#4R#8)T)7*^Qj=DMNUA0S!vVm4x)tubU zs>+#5>K~`qEj6t(7FQLvbXCFgqL#Yi@}{DWqSk3mHIo{fN?SURS4T+`@QjpqmE?CX zDQ*te0q^FTNmVVtpXLL)O>lX3p%i+P0R4xvkR{ z*XILm{=D+>?xy-V=?f9_92M8>Tt*A9v***0UuMmmDM-IYm9H{`;WVGOqyc@@3B~Th zpFOXvOwWaUi$R8#nzotq>k3;IGyTn2TsJ2x4RqdIQ<&RaJ*jSCZN@Crx76M2+_d`2 za0}`bu$R|lv~<nH-VDZ}ef zBV0#j6@k69=R?}5q4{t#2d~3;J);~_=C@8ry)D%4$xc&EUlvM@X<>7EVr z=1zh1*Wlq%RGFlqHQb;bv_CBwZ2$`AW-6A$eK^o;QXN zZzI;VSrPcE#Il)Q4s|-xkq_xhLx8Uc{sVbYltL73N_Qd9Z$#EPSScfopi}?`&ucMh zQ90J9C@7Hgw$|Be?r1gJ{m;`6(1^~k5KSX1Rr7j=XIFd zINS`UA-&oek3xHaDN8UV;KL97Nql&9>)BlqmcBf{mg5P9dgvoitiVIBK_;-r z!u0u2`0&Zk&n#p3LnY1iV9$6nLK9(`iS`VCQU2kqnmH|9^m6*6jVygZC^s&=w9TMd zGfceU+DXv#!p$%?uJEDyVL$0LnsZ3DF;H&snKc74fqbABcs>_Q1FRD=K|K^o3WsOX zpJ3QD5qyhBTpxuq7tWo;?&b3&K!Wn4{0l1@xIWO@zOjY}Q*v9YE9bXVP08)8u3XqsnbEVbcGAoS__w5bPF5SJx*h3dp*)mQ8Z9r%kKqMq zp->Hep(E+Dmoi%$6F;KHP!ns|CN%EP9vGr#~qXtTUm)&X)S}{BlgcM5LctK=BpR zRE!}0oQ7XviVxcLa6?VulqFE+@VBM64s-|o+`^hxG>u4!groO^yn0}E+&-F$1qm(BrU#zQPt}m;eK8E?TD*!L8 z&ogNIK`lx3PpHAs^1wDQR5-f}^;}h163W2*JrV5R;6XtNEnk|5=f|RB_DCPqwN3&5 zgOV!9g!Zefw3xR?%swc8NBwI_tC|DtG<=FN{gP(RkN1R(KCM(V5ma$tx~QshL1-4H z&lKqkW>OH$Nn_~|ACs3F8Tb|5Q1|B7g5Oj*X$IDvt|}^iei7oGFYvn96SGwMZL5Fz znV3IESnh*XP`k0|iOrP5ux4hv}s6@TJ&AdFgyhuqc4{UVMZ-w>(t{H2-81QlnG56V&e6M(Z?I?gWYqeI%9TL zs2K50Z0G62ynUq&K5fL9sMrWV*9>2*eO*vBXU0O1x1*$aRt7v-fcY~KFD+kv`#`&D zeS1193$j*}R&e{RrG@UWUt4)TTG|Hb88pOKF7n5&@9oI1U8Ij%-?1xW^}VWac60Td z^!YWBf+eMJw+-`WH7|%--_db`t`ooP^}Uz!uV~@+HIqW0;@XSFHXoQl16vd46dPOV7(!>ED^FfrY}}A831C)!MUoZe@9I z2^3itv@a7)etLVRUANwzyD@*Y;9u_IPulFcpbM%Z%2ytqi}?{<%=%jtX@&Zp$%zk&5FjPG^eOU>>Lm16#! zxmm2e3DdVDr$1xsub_Ome5_`KN_hU*@H(q%kLVmPn6!&s_j@{p8m4XrX^tSW%AzFQ(CuG5;i!Kfdik(bM!w`(mrFkqW_&8O>Caf6Vr&75U|g z^uoVDrqTM_*=WnO&Mv|HIVOLX{?=XIhcJI{WFgmgeDanyoAT1~&9C73W0!9Q_FtJI zeN=gEkv|hmvf@tuWVe*ezzM;}Zpx_@{ z|AY3({0nTO(62yG3-eDB`Ca_uDE?G=_QZ)3A1?(Wv_HXU6*PtTrM?f~UBy2RiS(fY z+EtdB*RlHB@B-x5CelZh7hOvxZym%m>ZnthKMV6O5cyr?b?`k$s6h!t1OnbHOC|Cm3-^M_peAB4i= zo3{RTD&~KvL+}sJr%CPc16zNaUyJEmOHp1-AFDhg6yHN4J%3-1;y-m%SI$VI?>=p_ zo9iIxi2SCPLX?HtUmA7NmG^1-{!xQaSWm+0D_$4M_tR;ZKfSk;w-?-Bq^7L&Kln4; zj9EinP>9H{F~8tZ=sMw7n8)KJeN2b<;WHPDK7Y|-M!zVu$i&O+hxPxIA0}s2r;ze% zKFE1thMca?? z!a9hv)64v3o}Nc0rknX$f277QkQA-&yb(fC3Q-mLvmS*~=r!@W*e6York2u;r zkC`I1n0`trw-4@*>&q9dJU=^!)$jaJq?g-AK{Wl-T4>_o`mYxJW->3=9)F{q9LC^7 z;TZAC{4OY&iFABxdb7ae5)VLmmGYT}=_i)c{P3WFy|2*tII~Y`1+@Hej3884wXnW$ z_5!HO3+r3WSYRuR*UYbr%uNf0Yq~-6`H(VHP$uG^F7G#(NU6_Viy^*6>t`s3>-RSC zfS+4bY~pF?C!T4GAs$5Ar~LM~@KF2H>PNm$l|E|X$u{x0^hY-Rw2SoIehSODzTTnz zu>KXwEEKNK!S)dA-<0MaYkLx-eito*^tp&MG(Rppw7t{XGx|3ry;#2@(@i`j75&3A zwU?*oFQsK>{&HHM^RYmc_XQT8oP#%^ZOyEj1LFW0E!kDGb9tX7-yce;JsBa6)guTXRPn9A)3_>Ebba8!K z_C()3s&hnp6s`cLLhYYo*i%6brl)wy%b=0P;);p~lj_??=`ZIOns|yL{lgP!=ILQL z7nM|O=8tAiFlHLnp6K{j?qZaW+Iu&}7q>lynsOm4#S`vL$d~g>^!9|JBAnzXQ`}+4xg_3jab>0*mKKf0T|nM5w$|$|`w#&-@FPo|+=fNuenH zRT$Iv_NH_Fv+)=jL{r)q=HHYd0X}YKjg3c|A%AXnF84>nJ(N`pdBcxC1oexNrmiA& zyB3=Gnf?`jnnGv(B-TZt|CudwGhn=WPIpg9b7QC-^XDQST3=n_&9uHr|88nCxDHCZ zIn3?H`L@uyIt=H|Q>dc=SXwzf1hVrvKsy=g0l){APG5pykKyQOlpj z?-_qH_E}U0Mwo8qkB@)RBH$3RRXP+a#< ze()pmYmtAtnLj@M;l&UiWc4%j5O2RC?EQfnIcvJ9>C^U%jW4jd7G~dE)|?6U+AZ?C zj1Sn_`$#*lZ_w@`t8k;pAC*5QEoA)7*wU2dEU540B7dCx^D#a12YMmDTHk1Yq11OY zC^|?k+J71A!6(JYf1)XWeEn^N%3D~rh}$b04^{eGq|D|=7MgARk{K1W81O!TEXSdE zXos;rviU1?eh4TN{;z$Ax4*nUT7ZdIdEzfNo^2oRWb`Oq*43M$=g$@NBFtXw^l0eL z=6^SVjm)oY!=#ZpG(VQ#j7T=ucLh~4Ed(|GfK0<3FgLRee0YTh%txd8ieP^QX$lHx ze#dwP>L@Jpl~I1}*x%xVe5M4dNJSro@GJy#4AZJA@O>)s_(awp!248%;8zrd--5X? zmxsRllsyWrAmW2CRT&flZI2dEqhkFx{YjtG`ZRSWjI+T19| z&d#cek|mWH>Agr~65{Wv;PMo>@Tc{c@-OL~)<9)|P#Eg3ybN4#Dt|i_TJdL)JZtr- z{Ih%O3a50!ysoa=^xUqRbeJOwWOElo*M#{S!p$X) z2>rXn$7ubR`N}9iQ6EIS*k1o>)#UR7VB8$)KP!)XsQ-c=i%&b+t6G2`<~}g)=V74QczPah$j^+CpSE8z9u)Z)rl+M+YT|eC2WdRV z9Agbl>*ezD_)8ex37NgSX ze`q)DJ*q#%$EZS~IU+qDA4?P0e7wotep6RzD)m7j9zg9LAXC$#_74{zTqD{6WTowP zxoorw=tu*bG^E-;wI{{LpjoIY1U)bi4ZRRAAIgWON7G}j3&dCE2!8?aber{)+gIqH zJi)@kNT_(sq({(|MoTDxwOXPDGSX^|9@A7P= 'l'", "decimal128 = -1234.567", "decimal256 = -1234.567", - # not optimized + "uint8 = 5 AND int8 = 2", + "uint8 = -5 AND int8 = 2", + "int8 = 2 AND uint8 = -5", + "uint8 = -5 AND int8 = 200", + "NOT uint8 = 5 AND uint8 IS NOT NULL", + "NOT uint8 = 50 AND uint8 IS NOT NULL", + # not optimized for non-dataset layer + "FID = 0", "boolean = 0 OR boolean = 1", + "uint8 = 1 OR uint8 = -1", + "uint8 = -1 OR uint8 = 1", "1 = 1", "boolean = boolean", "FID = 1", '"struct_field.a" = 1', '"struct_field.a" = 0', + "string LIKE 'd'", + "string LIKE 'D'", + "string ILIKE 'D'", + "string LIKE 'f'", + "timestamp_ms_gmt = '2019/01/01 14:00:00.500Z'", + "timestamp_ms_gmt < '2019/01/01 14:00:00.500Z'", + "timestamp_s_no_tz = '2019/01/01 14:00:00'", + "timestamp_s_no_tz < '2019/01/01 14:00:00'", + # partially optimized + "boolean = 0 AND OGR_GEOMETRY IS NOT NULL", + "OGR_GEOMETRY IS NOT NULL AND boolean = 0", + # not optimized + "OGR_GEOMETRY IS NOT NULL AND OGR_GEOMETRY IS NOT NULL", + "boolean = 0 OR OGR_GEOMETRY IS NOT NULL", + "OGR_GEOMETRY IS NOT NULL OR boolean = 0", ], ) -def test_ogr_parquet_attribute_filter(filter): +def test_ogr_parquet_attribute_filter(filter, with_arrow_dataset_or_not): with gdaltest.config_option("OGR_PARQUET_OPTIMIZED_ATTRIBUTE_FILTER", "NO"): ds = ogr.Open("data/parquet/test.parquet") @@ -1463,12 +1499,26 @@ def test_ogr_parquet_attribute_filter(filter): ref_fc = lyr.GetFeatureCount() ds = None - ds = ogr.Open("data/parquet/test.parquet") + prefix = "PARQUET:" if with_arrow_dataset_or_not else "" + ds = ogr.Open(prefix + "data/parquet/test.parquet") lyr = ds.GetLayer(0) assert lyr.SetAttributeFilter(filter) == ogr.OGRERR_NONE assert lyr.GetFeatureCount() == ref_fc +def test_ogr_parquet_attribute_filter_on_fid_column(with_arrow_dataset_or_not): + + filter = "fid = 1" + + prefix = "PARQUET:" if with_arrow_dataset_or_not else "" + ds = ogr.Open(prefix + "data/parquet/test_with_fid_and_geometry_bbox.parquet") + lyr = ds.GetLayer(0) + assert lyr.SetAttributeFilter(filter) == ogr.OGRERR_NONE + f = lyr.GetNextFeature() + assert f.GetFID() == 1 + assert lyr.GetNextFeature() is None + + def test_ogr_parquet_attribute_filter_and_then_ignored_fields(): ds = ogr.Open("data/parquet/test.parquet") @@ -1512,7 +1562,7 @@ def test_ogr_parquet_ignored_fields_and_then_attribute_filter(): assert lyr.GetFeatureCount() == 1 -def test_ogr_parquet_attribute_filter_and_spatial_filter(): +def test_ogr_parquet_attribute_filter_and_spatial_filter(with_arrow_dataset_or_not): filter = "int8 != 0" @@ -1525,13 +1575,40 @@ def test_ogr_parquet_attribute_filter_and_spatial_filter(): assert ref_fc > 0 ds = None - ds = ogr.Open("data/parquet/test.parquet") + prefix = "PARQUET:" if with_arrow_dataset_or_not else "" + ds = ogr.Open(prefix + "data/parquet/test.parquet") lyr = ds.GetLayer(0) lyr.SetSpatialFilterRect(4, 2, 4, 2) assert lyr.SetAttributeFilter(filter) == ogr.OGRERR_NONE assert lyr.GetFeatureCount() == ref_fc +def test_ogr_parquet_attribute_filter_and_spatial_filter_with_spatial_index( + tmp_path, with_arrow_dataset_or_not +): + + filename = str(tmp_path / "test.parquet") + gdal.VectorTranslate(filename, "data/parquet/test.parquet") + + filter = "uint8 != 1" + + with gdaltest.config_option("OGR_PARQUET_OPTIMIZED_ATTRIBUTE_FILTER", "NO"): + ds = ogr.Open("data/parquet/test.parquet") + lyr = ds.GetLayer(0) + lyr.SetSpatialFilterRect(1, 2, 3, 2) + assert lyr.SetAttributeFilter(filter) == ogr.OGRERR_NONE + ref_fc = lyr.GetFeatureCount() + assert ref_fc > 0 + ds = None + + prefix = "PARQUET:" if with_arrow_dataset_or_not else "" + ds = ogr.Open(prefix + filename) + lyr = ds.GetLayer(0) + lyr.SetSpatialFilterRect(1, 2, 3, 2) + assert lyr.SetAttributeFilter(filter) == ogr.OGRERR_NONE + assert lyr.GetFeatureCount() == ref_fc + + ############################################################################### # Test IS NULL / IS NOT NULL diff --git a/doc/source/drivers/vector/parquet.rst b/doc/source/drivers/vector/parquet.rst index 634949bad436..fd8c9d84e1d2 100644 --- a/doc/source/drivers/vector/parquet.rst +++ b/doc/source/drivers/vector/parquet.rst @@ -179,7 +179,11 @@ Starting with GDAL 3.6.0, the driver can read directories that contain several Parquet files, and expose them as a single layer. This support is only enabled if the driver is built against the ``arrowdataset`` C++ library. -Note that no optimization is currently done regarding filtering. +It is also possible to force opening single Parquet file in that mode by prefixing +their filename with ``PARQUET:``. + +Optimized spatial and attribute filtering for Arrow datasets is available since +GDAL 3.10. Metadata -------- diff --git a/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h b/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h index d6adc37dd4a8..6179fd009b07 100644 --- a/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h +++ b/ogr/ogrsf_frmts/arrow_common/ogr_arrow.h @@ -210,6 +210,11 @@ class OGRArrowLayer CPL_NON_FINAL std::vector m_asAttributeFilterConstraints{}; + //! Whether attribute filter should be skipped. + // This is set to true by OGRParquetDatasetLayer when it can fully translate + // a filter, as an optimization. + bool m_bBaseArrowIgnoreAttributeFilter = false; + std::map> LoadGDALSchema(const arrow::KeyValueMetadata *kv_metadata); @@ -273,6 +278,10 @@ class OGRArrowLayer CPL_NON_FINAL // Refreshes Constraint.iArrayIdx from iField. To be called by SetIgnoredFields() void ComputeConstraintsArrayIdx(); + static const swq_expr_node *GetColumnSubNode(const swq_expr_node *poNode); + static const swq_expr_node *GetConstantSubNode(const swq_expr_node *poNode); + static bool IsComparisonOp(int op); + virtual bool FastGetExtent(int iGeomField, OGREnvelope *psExtent) const; bool FastGetExtent3D(int iGeomField, OGREnvelope3D *psExtent) const; static OGRErr GetExtentFromMetadata(const CPLJSONObject &oJSONDef, diff --git a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp index 931f16515be1..31ab3688429f 100644 --- a/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp +++ b/ogr/ogrsf_frmts/arrow_common/ograrrowlayer.hpp @@ -3130,7 +3130,9 @@ inline void OGRArrowLayer::ResetReading() /* GetColumnSubNode() */ /***********************************************************************/ -static const swq_expr_node *GetColumnSubNode(const swq_expr_node *poNode) +/* static*/ +inline const swq_expr_node * +OGRArrowLayer::GetColumnSubNode(const swq_expr_node *poNode) { if (poNode->eNodeType == SNT_OPERATION && poNode->nSubExprCount == 2) { @@ -3146,7 +3148,9 @@ static const swq_expr_node *GetColumnSubNode(const swq_expr_node *poNode) /* GetConstantSubNode() */ /***********************************************************************/ -static const swq_expr_node *GetConstantSubNode(const swq_expr_node *poNode) +/* static */ +inline const swq_expr_node * +OGRArrowLayer::GetConstantSubNode(const swq_expr_node *poNode) { if (poNode->eNodeType == SNT_OPERATION && poNode->nSubExprCount == 2) { @@ -3162,7 +3166,8 @@ static const swq_expr_node *GetConstantSubNode(const swq_expr_node *poNode) /* IsComparisonOp() */ /***********************************************************************/ -static bool IsComparisonOp(int op) +/* static*/ +inline bool OGRArrowLayer::IsComparisonOp(int op) { return (op == SWQ_EQ || op == SWQ_NE || op == SWQ_LT || op == SWQ_LE || op == SWQ_GT || op == SWQ_GE); @@ -5578,8 +5583,8 @@ inline int OGRArrowLayer::GetNextArrowArray(struct ArrowArrayStream *stream, } const bool bNeedsPostFilter = - (m_poAttrQuery) || - (m_poFilterGeom && !m_bBaseArrowIgnoreSpatialFilter); + (m_poAttrQuery && !m_bBaseArrowIgnoreAttributeFilter) || + (m_poFilterGeom && !m_bBaseArrowIgnoreSpatialFilter); struct ArrowSchema schema; memset(&schema, 0, sizeof(schema)); diff --git a/ogr/ogrsf_frmts/parquet/ogr_include_parquet.h b/ogr/ogrsf_frmts/parquet/ogr_include_parquet.h index d2d08e4f30f7..0a56ea3f1447 100644 --- a/ogr/ogrsf_frmts/parquet/ogr_include_parquet.h +++ b/ogr/ogrsf_frmts/parquet/ogr_include_parquet.h @@ -61,6 +61,9 @@ #ifdef GDAL_USE_ARROWDATASET #include "arrow/filesystem/filesystem.h" +#include "arrow/compute/api_scalar.h" +#include "arrow/compute/cast.h" +#include "arrow/compute/registry.h" #include "arrow/dataset/dataset.h" #include "arrow/dataset/discovery.h" #include "arrow/dataset/file_parquet.h" diff --git a/ogr/ogrsf_frmts/parquet/ogr_parquet.h b/ogr/ogrsf_frmts/parquet/ogr_parquet.h index 76ae8be4681c..9323f54bac2b 100644 --- a/ogr/ogrsf_frmts/parquet/ogr_parquet.h +++ b/ogr/ogrsf_frmts/parquet/ogr_parquet.h @@ -246,6 +246,11 @@ class OGRParquetDatasetLayer final : public OGRParquetLayerBase void BuildScanner(); + //! Translate a OGR SQL expression into an Arrow one + // bFullyTranslated should be set to true before calling this method. + arrow::compute::Expression BuildArrowFilter(const swq_expr_node *poNode, + bool &bFullyTranslated); + protected: std::string GetDriverUCName() const override { @@ -276,6 +281,8 @@ class OGRParquetDatasetLayer final : public OGRParquetLayerBase void SetSpatialFilter(int iGeomField, OGRGeometry *poGeom) override; + OGRErr SetAttributeFilter(const char *pszFilter) override; + OGRErr SetIgnoredFields(CSLConstList papszFields) override; int TestCapability(const char *) override; diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp index 245c64d79869..bb8d7f7debc0 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp @@ -34,11 +34,15 @@ #include #include +#include "cpl_time.h" + #include "ogr_parquet.h" #include "../arrow_common/ograrrowlayer.hpp" #include "../arrow_common/ograrrowdataset.hpp" +namespace cp = ::arrow::compute; + /************************************************************************/ /* OGRParquetLayer() */ /************************************************************************/ @@ -201,6 +205,7 @@ void OGRParquetDatasetLayer::BuildScanner() m_bSkipFilterGeometry = false; m_bBaseArrowIgnoreSpatialFilterRect = false; m_bBaseArrowIgnoreSpatialFilter = false; + m_bBaseArrowIgnoreAttributeFilter = false; try { @@ -252,7 +257,6 @@ void OGRParquetDatasetLayer::BuildScanner() } #endif - namespace cp = ::arrow::compute; cp::Expression expression; if (m_poFilterGeom && CPLTestBool(CPLGetConfigOption( @@ -299,6 +303,32 @@ void OGRParquetDatasetLayer::BuildScanner() ->IsIgnored()); } } + + if (m_poAttrQuery && + CPLTestBool(CPLGetConfigOption( + "OGR_PARQUET_OPTIMIZED_ATTRIBUTE_FILTER", "YES"))) + { + const swq_expr_node *poNode = + static_cast(m_poAttrQuery->GetSWQExpr()); + bool bFullyTranslated = true; + auto expressionFilter = BuildArrowFilter(poNode, bFullyTranslated); + if (expressionFilter.is_valid()) + { + if (bFullyTranslated) + { + CPLDebugOnly("PARQUET", + "Attribute filter fully translated to Arrow"); + m_asAttributeFilterConstraints.clear(); + m_bBaseArrowIgnoreAttributeFilter = true; + } + + if (expression.is_valid()) + expression = cp::and_(expression, expressionFilter); + else + expression = std::move(expressionFilter); + } + } + if (expression.is_valid()) { PARQUET_THROW_NOT_OK(scannerBuilder->Filter(expression)); @@ -328,6 +358,184 @@ void OGRParquetDatasetLayer::BuildScanner() } } +/************************************************************************/ +/* BuildArrowFilter() */ +/************************************************************************/ + +cp::Expression +OGRParquetDatasetLayer::BuildArrowFilter(const swq_expr_node *poNode, + bool &bFullyTranslated) +{ + if (poNode->eNodeType == SNT_OPERATION && poNode->nOperation == SWQ_AND && + poNode->nSubExprCount == 2) + { + auto sLeft = BuildArrowFilter(poNode->papoSubExpr[0], bFullyTranslated); + auto sRight = + BuildArrowFilter(poNode->papoSubExpr[1], bFullyTranslated); + if (sLeft.is_valid() && sRight.is_valid()) + return cp::and_(sLeft, sRight); + if (sLeft.is_valid()) + return sLeft; + if (sRight.is_valid()) + return sRight; + } + + else if (poNode->eNodeType == SNT_OPERATION && + poNode->nOperation == SWQ_OR && poNode->nSubExprCount == 2) + { + auto sLeft = BuildArrowFilter(poNode->papoSubExpr[0], bFullyTranslated); + auto sRight = + BuildArrowFilter(poNode->papoSubExpr[1], bFullyTranslated); + if (sLeft.is_valid() && sRight.is_valid()) + return cp::or_(sLeft, sRight); + } + + else if (poNode->eNodeType == SNT_OPERATION && + poNode->nOperation == SWQ_NOT && poNode->nSubExprCount == 1) + { + auto expr = BuildArrowFilter(poNode->papoSubExpr[0], bFullyTranslated); + if (expr.is_valid()) + return cp::not_(expr); + } + + else if (poNode->eNodeType == SNT_COLUMN) + { + if (poNode->field_index >= 0 && + poNode->field_index < m_poFeatureDefn->GetFieldCount()) + { + std::vector fieldRefs; + for (int idx : m_anMapFieldIndexToArrowColumn[poNode->field_index]) + fieldRefs.emplace_back(idx); + auto expr = cp::field_ref(arrow::FieldRef(std::move(fieldRefs))); + + // Comparing a boolean column to 0 or 1 fails without explicit cast + if (m_poFeatureDefn->GetFieldDefn(poNode->field_index) + ->GetSubType() == OFSTBoolean) + { + expr = cp::call("cast", {expr}, + cp::CastOptions::Safe(arrow::uint8())); + } + return expr; + } + else if (poNode->field_index == + m_poFeatureDefn->GetFieldCount() + SPF_FID && + m_iFIDArrowColumn >= 0) + { + return cp::field_ref(arrow::FieldRef(m_iFIDArrowColumn)); + } + } + + else if (poNode->eNodeType == SNT_CONSTANT) + { + switch (poNode->field_type) + { + case SWQ_INTEGER: + case SWQ_INTEGER64: + return cp::literal(static_cast(poNode->int_value)); + + case SWQ_FLOAT: + return cp::literal(poNode->float_value); + + case SWQ_STRING: + return cp::literal(poNode->string_value); + + case SWQ_TIMESTAMP: + { + OGRField sField; + if (OGRParseDate(poNode->string_value, &sField, 0)) + { + struct tm brokenDown; + brokenDown.tm_year = sField.Date.Year - 1900; + brokenDown.tm_mon = sField.Date.Month - 1; + brokenDown.tm_mday = sField.Date.Day; + brokenDown.tm_hour = sField.Date.Hour; + brokenDown.tm_min = sField.Date.Minute; + brokenDown.tm_sec = static_cast(sField.Date.Second); + int64_t nVal = + CPLYMDHMSToUnixTime(&brokenDown) * 1000 + + (static_cast(sField.Date.Second * 1000 + 0.5) % + 1000); + if (sField.Date.TZFlag > OGR_TZFLAG_MIXED_TZ) + { + // Convert for sField.Date.TZFlag to UTC + const int TZOffset = + (sField.Date.TZFlag - OGR_TZFLAG_UTC) * 15; + const int TZOffsetMS = TZOffset * 60 * 1000; + nVal -= TZOffsetMS; + return cp::literal(arrow::TimestampScalar( + nVal, arrow::TimeUnit::MILLI, "UTC")); + } + else + { + return cp::literal(arrow::TimestampScalar( + nVal, arrow::TimeUnit::MILLI)); + } + } + } + + default: + break; + } + } + + else if (poNode->eNodeType == SNT_OPERATION && poNode->nSubExprCount == 2 && + IsComparisonOp(poNode->nOperation)) + { + auto sLeft = BuildArrowFilter(poNode->papoSubExpr[0], bFullyTranslated); + auto sRight = + BuildArrowFilter(poNode->papoSubExpr[1], bFullyTranslated); + if (sLeft.is_valid() && sRight.is_valid()) + { + if (poNode->nOperation == SWQ_EQ) + return cp::equal(sLeft, sRight); + if (poNode->nOperation == SWQ_LT) + return cp::less(sLeft, sRight); + if (poNode->nOperation == SWQ_LE) + return cp::less_equal(sLeft, sRight); + if (poNode->nOperation == SWQ_GT) + return cp::greater(sLeft, sRight); + if (poNode->nOperation == SWQ_GE) + return cp::greater_equal(sLeft, sRight); + if (poNode->nOperation == SWQ_NE) + return cp::not_equal(sLeft, sRight); + } + } + + else if (poNode->eNodeType == SNT_OPERATION && poNode->nSubExprCount == 2 && + (poNode->nOperation == SWQ_LIKE || + poNode->nOperation == SWQ_ILIKE) && + poNode->papoSubExpr[1]->eNodeType == SNT_CONSTANT && + poNode->papoSubExpr[1]->field_type == SWQ_STRING) + { + auto sLeft = BuildArrowFilter(poNode->papoSubExpr[0], bFullyTranslated); + if (sLeft.is_valid()) + { + if (cp::GetFunctionRegistry() + ->GetFunction("match_like") + .ValueOr(nullptr)) + { + // match_like is only available is Arrow built against RE2. + return cp::call( + "match_like", {sLeft}, + cp::MatchSubstringOptions( + poNode->papoSubExpr[1]->string_value, + /* ignore_case=*/poNode->nOperation == SWQ_ILIKE)); + } + } + } + + else if (poNode->eNodeType == SNT_OPERATION && + poNode->nOperation == SWQ_ISNULL && poNode->nSubExprCount == 1) + { + auto expr = BuildArrowFilter(poNode->papoSubExpr[0], bFullyTranslated); + if (expr.is_valid()) + return cp::is_null(expr); + } + + bFullyTranslated = false; + return {}; +} + /************************************************************************/ /* ReadNextBatch() */ /************************************************************************/ @@ -397,7 +605,8 @@ OGRFeature *OGRParquetDatasetLayer::GetNextFeature() if ((m_poFilterGeom == nullptr || m_bSkipFilterGeometry || FilterGeometry(poFeature->GetGeometryRef())) && - (m_poAttrQuery == nullptr || m_poAttrQuery->Evaluate(poFeature))) + (m_poAttrQuery == nullptr || m_bBaseArrowIgnoreAttributeFilter || + m_poAttrQuery->Evaluate(poFeature))) { return poFeature; } @@ -681,3 +890,13 @@ int OGRParquetDatasetLayer::TestCapability(const char *pszCap) return OGRParquetLayerBase::TestCapability(pszCap); } + +/***********************************************************************/ +/* SetAttributeFilter() */ +/***********************************************************************/ + +OGRErr OGRParquetDatasetLayer::SetAttributeFilter(const char *pszFilter) +{ + m_bRebuildScanner = true; + return OGRParquetLayerBase::SetAttributeFilter(pszFilter); +} From 28e264cbe508eb44045cfecd71eda274024285d1 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 13 May 2024 23:15:38 +0200 Subject: [PATCH 014/191] Parquet dataset (multi-file typically): optimize spatial filtering on GeoArrow struct Point even when there is no bounding box column --- autotest/ogr/ogr_parquet.py | 24 ++++++-- .../parquet/ogrparquetdatasetlayer.cpp | 55 +++++++++++++++++++ ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp | 19 ++++++- 3 files changed, 92 insertions(+), 6 deletions(-) diff --git a/autotest/ogr/ogr_parquet.py b/autotest/ogr/ogr_parquet.py index 3bae42f988b8..298f27cdf0ea 100755 --- a/autotest/ogr/ogr_parquet.py +++ b/autotest/ogr/ogr_parquet.py @@ -3747,7 +3747,12 @@ def check_file(filename): @pytest.mark.parametrize("covering_bbox", [True, False]) @gdaltest.enable_exceptions() def test_ogr_parquet_geoarrow( - tmp_vsimem, tmp_path, wkt, check_with_pyarrow, covering_bbox + tmp_vsimem, + tmp_path, + wkt, + check_with_pyarrow, + covering_bbox, + with_arrow_dataset_or_not, ): geom = ogr.CreateGeometryFromWkt(wkt) @@ -3830,17 +3835,28 @@ def check(lyr): f = lyr.GetNextFeature() ogrtest.check_feature_geometry(f, geom2) - ds = ogr.Open(filename) + filename_to_open = ("PARQUET:" if with_arrow_dataset_or_not else "") + filename + + ds = ogr.Open(filename_to_open) lyr = ds.GetLayer(0) check(lyr) + if ( + covering_bbox + or not with_arrow_dataset_or_not + or lyr.GetGeomType() in (ogr.wkbPoint, ogr.wkbPoint25D) + ): + assert lyr.TestCapability(ogr.OLCFastSpatialFilter) + else: + assert not lyr.TestCapability(ogr.OLCFastSpatialFilter) + # Check that ignoring attribute fields doesn't impact geometry reading - ds = ogr.Open(filename) + ds = ogr.Open(filename_to_open) lyr = ds.GetLayer(0) lyr.SetIgnoredFields(["foo"]) check(lyr) - ds = ogr.Open(filename) + ds = ogr.Open(filename_to_open) lyr = ds.GetLayer(0) minx, maxx, miny, maxy = geom.GetEnvelope() diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp index bb8d7f7debc0..3d645e385cb0 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp @@ -286,7 +286,50 @@ void OGRParquetDatasetLayer::BuildScanner() cp::field_ref(arrow::FieldRef( oBBOXDef.iArrowCol, oBBOXDef.iArrowSubfieldYMax)), cp::literal(m_sFilterEnvelope.MinY))}); + } + else if (m_iGeomFieldFilter >= 0 && + m_iGeomFieldFilter < + static_cast(m_aeGeomEncoding.size()) && + m_aeGeomEncoding[m_iGeomFieldFilter] == + OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT) + { + const int iCol = + m_anMapGeomFieldIndexToArrowColumn[m_iGeomFieldFilter]; + const auto &field = m_poSchema->fields()[iCol]; + auto type = field->type(); + std::vector fieldRefs; + fieldRefs.emplace_back(iCol); + if (type->id() == arrow::Type::STRUCT) + { + const auto fieldStruct = + std::static_pointer_cast(type); + const auto fieldX = fieldStruct->GetFieldByName("x"); + const auto fieldY = fieldStruct->GetFieldByName("y"); + if (fieldX && fieldY) + { + auto fieldRefX(fieldRefs); + fieldRefX.emplace_back("x"); + auto fieldRefY(fieldRefs); + fieldRefY.emplace_back("y"); + expression = cp::and_( + {cp::less_equal( + cp::field_ref(arrow::FieldRef(fieldRefX)), + cp::literal(m_sFilterEnvelope.MaxX)), + cp::less_equal( + cp::field_ref(arrow::FieldRef(fieldRefY)), + cp::literal(m_sFilterEnvelope.MaxY)), + cp::greater_equal( + cp::field_ref(arrow::FieldRef(fieldRefX)), + cp::literal(m_sFilterEnvelope.MinX)), + cp::greater_equal( + cp::field_ref(arrow::FieldRef(fieldRefY)), + cp::literal(m_sFilterEnvelope.MinY))}); + } + } + } + if (expression.is_valid()) + { m_bBaseArrowIgnoreSpatialFilterRect = true; const bool bIsPoint = @@ -888,6 +931,18 @@ int OGRParquetDatasetLayer::TestCapability(const char *pszCap) if (EQUAL(pszCap, OLCIgnoreFields)) return true; + if (EQUAL(pszCap, OLCFastSpatialFilter)) + { + if (m_iGeomFieldFilter >= 0 && + m_iGeomFieldFilter < static_cast(m_aeGeomEncoding.size()) && + m_aeGeomEncoding[m_iGeomFieldFilter] == + OGRArrowGeomEncoding::GEOARROW_STRUCT_POINT) + { + return true; + } + // fallback to base method + } + return OGRParquetLayerBase::TestCapability(pszCap); } diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp index bbd04b74e55a..f09eb7989b61 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetlayer.cpp @@ -481,8 +481,12 @@ int OGRParquetLayerBase::TestCapability(const char *pszCap) if (EQUAL(pszCap, OLCFastSpatialFilter)) { - return m_oMapGeomFieldIndexToGeomColBBOX.find(m_iGeomFieldFilter) != - m_oMapGeomFieldIndexToGeomColBBOX.end(); + if (m_oMapGeomFieldIndexToGeomColBBOX.find(m_iGeomFieldFilter) != + m_oMapGeomFieldIndexToGeomColBBOX.end()) + { + return true; + } + return false; } return OGRArrowLayer::TestCapability(pszCap); @@ -2104,6 +2108,17 @@ int OGRParquetLayer::TestCapability(const char *pszCap) if (EQUAL(pszCap, OLCIgnoreFields)) return !m_bHasMissingMappingToParquet; + if (EQUAL(pszCap, OLCFastSpatialFilter)) + { + if (m_iGeomFieldFilter >= 0 && + m_iGeomFieldFilter < static_cast(m_aeGeomEncoding.size()) && + OGRArrowIsGeoArrowStruct(m_aeGeomEncoding[m_iGeomFieldFilter])) + { + return true; + } + // fallback to base method + } + return OGRParquetLayerBase::TestCapability(pszCap); } From 6093d055d8c57d01a95773d7fbc1bc6f070c65ce Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 14 May 2024 21:03:53 +0200 Subject: [PATCH 015/191] Add a static version of OGRLayer::FilterWKBGeometry() --- ogr/ogrsf_frmts/generic/ogrlayer.cpp | 43 +++++++++++++++++++++------- ogr/ogrsf_frmts/ogrsf_frmts.h | 8 ++++++ 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/ogr/ogrsf_frmts/generic/ogrlayer.cpp b/ogr/ogrsf_frmts/generic/ogrlayer.cpp index eb814d220e02..3bdd02b5d057 100644 --- a/ogr/ogrsf_frmts/generic/ogrlayer.cpp +++ b/ogr/ogrsf_frmts/generic/ogrlayer.cpp @@ -1879,22 +1879,38 @@ bool OGRLayer::FilterWKBGeometry(const GByte *pabyWKB, size_t nWKBSize, bool bEnvelopeAlreadySet, OGREnvelope &sEnvelope) const { - if (!m_poFilterGeom) + OGRPreparedGeometry *pPreparedFilterGeom = m_pPreparedFilterGeom; + bool bRet = FilterWKBGeometry( + pabyWKB, nWKBSize, bEnvelopeAlreadySet, sEnvelope, m_poFilterGeom, + m_bFilterIsEnvelope, m_sFilterEnvelope, pPreparedFilterGeom); + const_cast(this)->m_pPreparedFilterGeom = pPreparedFilterGeom; + return bRet; +} + +/* static */ +bool OGRLayer::FilterWKBGeometry(const GByte *pabyWKB, size_t nWKBSize, + bool bEnvelopeAlreadySet, + OGREnvelope &sEnvelope, + const OGRGeometry *poFilterGeom, + bool bFilterIsEnvelope, + const OGREnvelope &sFilterEnvelope, + OGRPreparedGeometry *&pPreparedFilterGeom) +{ + if (!poFilterGeom) return true; if ((bEnvelopeAlreadySet || OGRWKBGetBoundingBox(pabyWKB, nWKBSize, sEnvelope)) && - m_sFilterEnvelope.Intersects(sEnvelope)) + sFilterEnvelope.Intersects(sEnvelope)) { - if (m_bFilterIsEnvelope && m_sFilterEnvelope.Contains(sEnvelope)) + if (bFilterIsEnvelope && sFilterEnvelope.Contains(sEnvelope)) { return true; } else { - if (m_bFilterIsEnvelope && - OGRWKBIntersectsPessimistic(pabyWKB, nWKBSize, - m_sFilterEnvelope)) + if (bFilterIsEnvelope && + OGRWKBIntersectsPessimistic(pabyWKB, nWKBSize, sFilterEnvelope)) { return true; } @@ -1905,12 +1921,19 @@ bool OGRLayer::FilterWKBGeometry(const GByte *pabyWKB, size_t nWKBSize, if (OGRGeometryFactory::createFromWkb(pabyWKB, nullptr, &poGeom, nWKBSize) == OGRERR_NONE) { - if (m_pPreparedFilterGeom) + if (!pPreparedFilterGeom) + { + pPreparedFilterGeom = + OGRCreatePreparedGeometry(OGRGeometry::ToHandle( + const_cast(poFilterGeom))); + } + if (pPreparedFilterGeom) ret = OGRPreparedGeometryIntersects( - m_pPreparedFilterGeom, - OGRGeometry::ToHandle(poGeom)); + pPreparedFilterGeom, + OGRGeometry::ToHandle( + const_cast(poGeom))); else - ret = m_poFilterGeom->Intersects(poGeom); + ret = poFilterGeom->Intersects(poGeom); } delete poGeom; return CPL_TO_BOOL(ret); diff --git a/ogr/ogrsf_frmts/ogrsf_frmts.h b/ogr/ogrsf_frmts/ogrsf_frmts.h index 47ec16626c09..e3089568cf52 100644 --- a/ogr/ogrsf_frmts/ogrsf_frmts.h +++ b/ogr/ogrsf_frmts/ogrsf_frmts.h @@ -387,6 +387,14 @@ class CPL_DLL OGRLayer : public GDALMajorObject bool FilterWKBGeometry(const GByte *pabyWKB, size_t nWKBSize, bool bEnvelopeAlreadySet, OGREnvelope &sEnvelope) const; + + static bool FilterWKBGeometry(const GByte *pabyWKB, size_t nWKBSize, + bool bEnvelopeAlreadySet, + OGREnvelope &sEnvelope, + const OGRGeometry *poFilterGeom, + bool bFilterIsEnvelope, + const OGREnvelope &sFilterEnvelope, + OGRPreparedGeometry *&poPreparedFilterGeom); //! @endcond /** Field name used by GetArrowSchema() for a FID column when From c62ce4aa989652acf0501a502648038f705ea317 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 14 May 2024 21:11:39 +0200 Subject: [PATCH 016/191] Add OGRGeometry::IsRectangle() and use it in OGRLayer::InstallFilter() --- ogr/ogr_geometry.h | 2 ++ ogr/ogrgeometry.cpp | 49 ++++++++++++++++++++++++++++ ogr/ogrsf_frmts/generic/ogrlayer.cpp | 37 +-------------------- 3 files changed, 52 insertions(+), 36 deletions(-) diff --git a/ogr/ogr_geometry.h b/ogr/ogr_geometry.h index f3255859774c..d7778b219d85 100644 --- a/ogr/ogr_geometry.h +++ b/ogr/ogr_geometry.h @@ -628,6 +628,8 @@ class CPL_DLL OGRGeometry virtual void swapXY(); + bool IsRectangle() const; + //! @cond Doxygen_Suppress static OGRGeometry *CastToIdentity(OGRGeometry *poGeom) { diff --git a/ogr/ogrgeometry.cpp b/ogr/ogrgeometry.cpp index 8ac87d6cffae..752fa706b787 100644 --- a/ogr/ogrgeometry.cpp +++ b/ogr/ogrgeometry.cpp @@ -8233,3 +8233,52 @@ void OGRwkbExportOptionsSetPrecision( if (hPrecisionOptions) psOptions->sPrecision.SetFrom(*hPrecisionOptions); } + +/************************************************************************/ +/* IsRectangle() */ +/************************************************************************/ + +/** + * \brief Returns whether the geometry is a polygon with 4 corners forming + * a rectangle. + * + * @since GDAL 3.10 + */ +bool OGRGeometry::IsRectangle() const +{ + if (wkbFlatten(getGeometryType()) != wkbPolygon) + return false; + + const OGRPolygon *poPoly = toPolygon(); + + if (poPoly->getNumInteriorRings() != 0) + return false; + + const OGRLinearRing *poRing = poPoly->getExteriorRing(); + if (!poRing) + return false; + + if (poRing->getNumPoints() > 5 || poRing->getNumPoints() < 4) + return false; + + // If the ring has 5 points, the last should be the first. + if (poRing->getNumPoints() == 5 && (poRing->getX(0) != poRing->getX(4) || + poRing->getY(0) != poRing->getY(4))) + return false; + + // Polygon with first segment in "y" direction. + if (poRing->getX(0) == poRing->getX(1) && + poRing->getY(1) == poRing->getY(2) && + poRing->getX(2) == poRing->getX(3) && + poRing->getY(3) == poRing->getY(0)) + return true; + + // Polygon with first segment in "x" direction. + if (poRing->getY(0) == poRing->getY(1) && + poRing->getX(1) == poRing->getX(2) && + poRing->getY(2) == poRing->getY(3) && + poRing->getX(3) == poRing->getX(0)) + return true; + + return false; +} diff --git a/ogr/ogrsf_frmts/generic/ogrlayer.cpp b/ogr/ogrsf_frmts/generic/ogrlayer.cpp index 3bdd02b5d057..fdf592d0a931 100644 --- a/ogr/ogrsf_frmts/generic/ogrlayer.cpp +++ b/ogr/ogrsf_frmts/generic/ogrlayer.cpp @@ -1680,42 +1680,7 @@ int OGRLayer::InstallFilter(OGRGeometry *poFilter) m_pPreparedFilterGeom = OGRCreatePreparedGeometry(OGRGeometry::ToHandle(m_poFilterGeom)); - /* -------------------------------------------------------------------- */ - /* Now try to determine if the filter is really a rectangle. */ - /* -------------------------------------------------------------------- */ - if (wkbFlatten(m_poFilterGeom->getGeometryType()) != wkbPolygon) - return TRUE; - - OGRPolygon *poPoly = m_poFilterGeom->toPolygon(); - - if (poPoly->getNumInteriorRings() != 0) - return TRUE; - - OGRLinearRing *poRing = poPoly->getExteriorRing(); - if (poRing == nullptr) - return TRUE; - - if (poRing->getNumPoints() > 5 || poRing->getNumPoints() < 4) - return TRUE; - - // If the ring has 5 points, the last should be the first. - if (poRing->getNumPoints() == 5 && (poRing->getX(0) != poRing->getX(4) || - poRing->getY(0) != poRing->getY(4))) - return TRUE; - - // Polygon with first segment in "y" direction. - if (poRing->getX(0) == poRing->getX(1) && - poRing->getY(1) == poRing->getY(2) && - poRing->getX(2) == poRing->getX(3) && - poRing->getY(3) == poRing->getY(0)) - m_bFilterIsEnvelope = TRUE; - - // Polygon with first segment in "x" direction. - if (poRing->getY(0) == poRing->getY(1) && - poRing->getX(1) == poRing->getX(2) && - poRing->getY(2) == poRing->getY(3) && - poRing->getX(3) == poRing->getX(0)) - m_bFilterIsEnvelope = TRUE; + m_bFilterIsEnvelope = m_poFilterGeom->IsRectangle(); return TRUE; } From 48554524db29abb3541cf23026ee549fda4d8c3e Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 14 May 2024 21:27:26 +0200 Subject: [PATCH 017/191] Add testing for OGRGeometry::IsRectangle() --- autotest/cpp/test_ogr.cpp | 86 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/autotest/cpp/test_ogr.cpp b/autotest/cpp/test_ogr.cpp index 518b11e43547..a1d6e77b4f44 100644 --- a/autotest/cpp/test_ogr.cpp +++ b/autotest/cpp/test_ogr.cpp @@ -3961,4 +3961,90 @@ TEST_F(test_ogr, OGRFeature_SerializeToBinary) } } +// Test OGRGeometry::IsRectangle() +TEST_F(test_ogr, OGRGeometry_IsRectangle) +{ + // Not a polygon + { + OGRGeometry *poGeom = nullptr; + OGRGeometryFactory::createFromWkt("POINT EMPTY", nullptr, &poGeom); + ASSERT_NE(poGeom, nullptr); + EXPECT_FALSE(poGeom->IsRectangle()); + delete poGeom; + } + // Polygon empty + { + OGRGeometry *poGeom = nullptr; + OGRGeometryFactory::createFromWkt("POLYGON EMPTY", nullptr, &poGeom); + ASSERT_NE(poGeom, nullptr); + EXPECT_FALSE(poGeom->IsRectangle()); + delete poGeom; + } + // Polygon with inner ring + { + OGRGeometry *poGeom = nullptr; + OGRGeometryFactory::createFromWkt( + "POLYGON ((0 0,0 1,1 1,1 0,0 0),(0.2 0.2,0.2 0.8,0.8 0.8,0.8 " + "0.2,0.2 0.2))", + nullptr, &poGeom); + ASSERT_NE(poGeom, nullptr); + EXPECT_FALSE(poGeom->IsRectangle()); + delete poGeom; + } + // Polygon with 3 points + { + OGRGeometry *poGeom = nullptr; + OGRGeometryFactory::createFromWkt("POLYGON ((0 0,0 1,1 1))", nullptr, + &poGeom); + ASSERT_NE(poGeom, nullptr); + EXPECT_FALSE(poGeom->IsRectangle()); + delete poGeom; + } + // Polygon with 6 points + { + OGRGeometry *poGeom = nullptr; + OGRGeometryFactory::createFromWkt( + "POLYGON ((0 0,0.1 0,0.2 0,0.3 0,1 1,0 0))", nullptr, &poGeom); + ASSERT_NE(poGeom, nullptr); + EXPECT_FALSE(poGeom->IsRectangle()); + delete poGeom; + } + // Polygon with 5 points, but last one not matching first (invalid) + { + OGRGeometry *poGeom = nullptr; + OGRGeometryFactory::createFromWkt( + "POLYGON ((0 0,0 1,1 1,1 0,-999 -999))", nullptr, &poGeom); + ASSERT_NE(poGeom, nullptr); + EXPECT_FALSE(poGeom->IsRectangle()); + delete poGeom; + } + // Polygon with 5 points, but not rectangle + { + OGRGeometry *poGeom = nullptr; + OGRGeometryFactory::createFromWkt("POLYGON ((0 0,0 1.1,1 1,1 0,0 0))", + nullptr, &poGeom); + ASSERT_NE(poGeom, nullptr); + EXPECT_FALSE(poGeom->IsRectangle()); + delete poGeom; + } + // Rectangle (type 1) + { + OGRGeometry *poGeom = nullptr; + OGRGeometryFactory::createFromWkt("POLYGON ((0 0,0 1,1 1,1 0,0 0))", + nullptr, &poGeom); + ASSERT_NE(poGeom, nullptr); + EXPECT_TRUE(poGeom->IsRectangle()); + delete poGeom; + } + // Rectangle2(type 1) + { + OGRGeometry *poGeom = nullptr; + OGRGeometryFactory::createFromWkt("POLYGON ((0 0,1 0,1 1,0 1,0 0))", + nullptr, &poGeom); + ASSERT_NE(poGeom, nullptr); + EXPECT_TRUE(poGeom->IsRectangle()); + delete poGeom; + } +} + } // namespace From 4133d5cae5606e1b7318ae2cab23f00282b93409 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 14 May 2024 13:39:48 +0200 Subject: [PATCH 018/191] Parquet dataset (multi-file typically): implement geometry intersection from WKB column as a Arrow compute function --- autotest/ogr/ogr_parquet.py | 6 +- .../parquet/ogrparquetdatasetlayer.cpp | 254 +++++++++++++++++- 2 files changed, 257 insertions(+), 3 deletions(-) diff --git a/autotest/ogr/ogr_parquet.py b/autotest/ogr/ogr_parquet.py index 298f27cdf0ea..82ed7c7afb71 100755 --- a/autotest/ogr/ogr_parquet.py +++ b/autotest/ogr/ogr_parquet.py @@ -127,6 +127,7 @@ def _check_test_parquet( expect_fast_get_extent=True, expect_ignore_fields=True, expect_domain=True, + fid_reliable_after_spatial_filtering=True, ): with gdaltest.config_option("OGR_PARQUET_BATCH_SIZE", "2"): ds = gdal.OpenEx(filename) @@ -422,7 +423,9 @@ def _check_test_parquet( lyr.SetSpatialFilterRect(4, 2, 4, 2) lyr.ResetReading() f = lyr.GetNextFeature() - assert f.GetFID() == 4 + if fid_reliable_after_spatial_filtering: + assert f.GetFID() == 4 + assert f.GetGeometryRef().ExportToWkt() == "POINT (4 2)" lyr.SetSpatialFilterRect(-100, -100, -100, -100) lyr.ResetReading() @@ -535,6 +538,7 @@ def test_ogr_parquet_check_dataset(use_vsi): expect_fast_feature_count=False, expect_fast_get_extent=False, expect_domain=False, + fid_reliable_after_spatial_filtering=False, ) finally: if use_vsi: diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp index 3d645e385cb0..a1ea8a2e15ca 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp @@ -35,6 +35,7 @@ #include #include "cpl_time.h" +#include "ogr_api.h" #include "ogr_parquet.h" @@ -195,6 +196,226 @@ void OGRParquetDatasetLayer::EstablishFeatureDefn() m_poFeatureDefn->GetGeomFieldCount()); } +namespace +{ + +/************************************************************************/ +/* WKBGeometryOptionsType */ +/************************************************************************/ + +class WKBGeometryOptions; + +class WKBGeometryOptionsType : public cp::FunctionOptionsType +{ + WKBGeometryOptionsType() = default; + + static const WKBGeometryOptions &Cast(const cp::FunctionOptions &opts); + + public: + const char *type_name() const override + { + return "WKBGeometryOptionsType"; + } + + std::string Stringify(const cp::FunctionOptions &) const override; + bool Compare(const cp::FunctionOptions &, + const cp::FunctionOptions &) const override; + std::unique_ptr + Copy(const cp::FunctionOptions &) const override; + + static WKBGeometryOptionsType *GetSingleton() + { + static WKBGeometryOptionsType instance; + return &instance; + } +}; + +/************************************************************************/ +/* WKBGeometryOptions */ +/************************************************************************/ + +class WKBGeometryOptions : public cp::FunctionOptions +{ + + public: + explicit WKBGeometryOptions( + const std::vector &abyFilterGeomWkbIn = std::vector()) + : cp::FunctionOptions(WKBGeometryOptionsType::GetSingleton()), + abyFilterGeomWkb(abyFilterGeomWkbIn) + { + } + + bool operator==(const WKBGeometryOptions &other) const + { + return abyFilterGeomWkb == other.abyFilterGeomWkb; + } + + std::vector abyFilterGeomWkb; +}; + +const WKBGeometryOptions & +WKBGeometryOptionsType::Cast(const cp::FunctionOptions &opts) +{ + return *cpl::down_cast(&opts); +} + +bool WKBGeometryOptionsType::Compare(const cp::FunctionOptions &optsA, + const cp::FunctionOptions &optsB) const +{ + return Cast(optsA) == Cast(optsB); +} + +std::string +WKBGeometryOptionsType::Stringify(const cp::FunctionOptions &opts) const +{ + const auto &bboxOptions = Cast(opts); + std::string osRet(type_name()); + osRet += '-'; + for (GByte byVal : bboxOptions.abyFilterGeomWkb) + osRet += CPLSPrintf("%02X", byVal); + return osRet; +} + +std::unique_ptr +WKBGeometryOptionsType::Copy(const cp::FunctionOptions &opts) const +{ + return std::make_unique(Cast(opts)); +} + +/************************************************************************/ +/* OptionsWrapper */ +/************************************************************************/ + +/// KernelState adapter for the common case of kernels whose only +/// state is an instance of a subclass of FunctionOptions. +template struct OptionsWrapper : public cp::KernelState +{ + explicit OptionsWrapper(OptionsType optionsIn) + : options(std::move(optionsIn)) + { + } + + static arrow::Result> + Init(cp::KernelContext *, const cp::KernelInitArgs &args) + { + auto options = cpl::down_cast(args.options); + CPLAssert(options); + return std::make_unique(*options); + } + + static const OptionsType &Get(cp::KernelContext *ctx) + { + return cpl::down_cast(ctx->state())->options; + } + + OptionsType options; +}; +} // namespace + +/************************************************************************/ +/* ExecOGRWKBIntersects() */ +/************************************************************************/ + +static arrow::Status ExecOGRWKBIntersects(cp::KernelContext *ctx, + const cp::ExecSpan &batch, + cp::ExecResult *out) +{ + // Get filter geometry + const auto &opts = OptionsWrapper::Get(ctx); + OGRGeometry *poGeomTmp = nullptr; + OGRErr eErr = OGRGeometryFactory::createFromWkb( + opts.abyFilterGeomWkb.data(), nullptr, &poGeomTmp, + opts.abyFilterGeomWkb.size()); + CPL_IGNORE_RET_VAL(eErr); + CPLAssert(eErr == OGRERR_NONE); + CPLAssert(poGeomTmp != nullptr); + std::unique_ptr poFilterGeom(poGeomTmp); + OGREnvelope sFilterEnvelope; + poFilterGeom->getEnvelope(&sFilterEnvelope); + const bool bFilterIsEnvelope = poFilterGeom->IsRectangle(); + + // Deal with input array + CPLAssert(batch.num_values() == 1); + const arrow::ArraySpan &input = batch[0].array; + CPLAssert(input.type->id() == arrow::Type::BINARY); + // Packed array of bits + const auto pabyInputValidity = input.buffers[0].data; + const auto nInputOffsets = input.offset; + const auto panWkbOffsets = input.GetValues(1); + const auto pabyWkbArray = input.buffers[2].data; + + // Deal with output array + CPLAssert(out->type()->id() == arrow::Type::BOOL); + auto out_span = out->array_span(); + // Below array holds 8 bits per uint8_t + uint8_t *pabitsOutValues = out_span->buffers[1].data; + const auto nOutOffset = out_span->offset; + + // Iterate over WKB geometries + OGRPreparedGeometry *pPreparedFilterGeom = nullptr; + OGREnvelope sEnvelope; + for (int64_t i = 0; i < batch.length; ++i) + { + const bool bInputIsNull = + (pabyInputValidity && + arrow::bit_util::GetBit(pabyInputValidity, i + nInputOffsets) == + 0); + bool bOutputVal = false; + if (!bInputIsNull) + { + const GByte *pabyWkb = pabyWkbArray + panWkbOffsets[i]; + const size_t nWkbSize = panWkbOffsets[i + 1] - panWkbOffsets[i]; + bOutputVal = OGRLayer::FilterWKBGeometry( + pabyWkb, nWkbSize, + /* bEnvelopeAlreadySet = */ false, sEnvelope, + poFilterGeom.get(), bFilterIsEnvelope, sFilterEnvelope, + pPreparedFilterGeom); + } + if (bOutputVal) + arrow::bit_util::SetBit(pabitsOutValues, i + nOutOffset); + else + arrow::bit_util::ClearBit(pabitsOutValues, i + nOutOffset); + } + + // Cleanup + if (pPreparedFilterGeom) + OGRDestroyPreparedGeometry(pPreparedFilterGeom); + + return arrow::Status::OK(); +} + +/************************************************************************/ +/* RegisterOGRWKBIntersectsIfNeeded() */ +/************************************************************************/ + +static bool RegisterOGRWKBIntersectsIfNeeded() +{ + auto registry = cp::GetFunctionRegistry(); + bool bRet = + registry->GetFunction("OGRWKBIntersects").ValueOr(nullptr) != nullptr; + if (!bRet) + { + static const WKBGeometryOptions defaultOpts; + + // Below assert is completely useless but helps improve test coverage + CPLAssert(WKBGeometryOptionsType::GetSingleton()->Compare( + defaultOpts, *(WKBGeometryOptionsType::GetSingleton() + ->Copy(defaultOpts) + .get()))); + + auto func = std::make_shared( + "OGRWKBIntersects", cp::Arity::Unary(), cp::FunctionDoc(), + &defaultOpts); + cp::ScalarKernel kernel({arrow::binary()}, arrow::boolean(), + ExecOGRWKBIntersects, + OptionsWrapper::Init); + kernel.null_handling = cp::NullHandling::OUTPUT_NOT_NULL; + bRet = func->AddKernel(std::move(kernel)).ok() && + registry->AddFunction(std::move(func)).ok(); + } + return bRet; +} + /************************************************************************/ /* BuildScanner() */ /************************************************************************/ @@ -258,7 +479,7 @@ void OGRParquetDatasetLayer::BuildScanner() #endif cp::Expression expression; - if (m_poFilterGeom && + if (m_poFilterGeom && !m_poFilterGeom->IsEmpty() && CPLTestBool(CPLGetConfigOption( "OGR_PARQUET_OPTIMIZED_SPATIAL_FILTER", "YES"))) { @@ -327,8 +548,37 @@ void OGRParquetDatasetLayer::BuildScanner() } } } + else if (m_iGeomFieldFilter >= 0 && + m_iGeomFieldFilter < + static_cast(m_aeGeomEncoding.size()) && + m_aeGeomEncoding[m_iGeomFieldFilter] == + OGRArrowGeomEncoding::WKB) + { + const int iCol = + m_anMapGeomFieldIndexToArrowColumn[m_iGeomFieldFilter]; + const auto &field = m_poSchema->fields()[iCol]; + if (field->type()->id() == arrow::Type::BINARY && + RegisterOGRWKBIntersectsIfNeeded()) + { + auto oFieldRef = arrow::FieldRef(iCol); + std::vector abyFilterGeomWkb; + abyFilterGeomWkb.resize(m_poFilterGeom->WkbSize()); + m_poFilterGeom->exportToWkb(wkbNDR, abyFilterGeomWkb.data(), + wkbVariantIso); + expression = + cp::call("OGRWKBIntersects", {cp::field_ref(oFieldRef)}, + WKBGeometryOptions(abyFilterGeomWkb)); + + if (expression.is_valid()) + { + m_bBaseArrowIgnoreSpatialFilterRect = true; + m_bBaseArrowIgnoreSpatialFilter = true; + m_bSkipFilterGeometry = true; + } + } + } - if (expression.is_valid()) + if (expression.is_valid() && !m_bSkipFilterGeometry) { m_bBaseArrowIgnoreSpatialFilterRect = true; From 1dc79e374287e5224acf89c3a02825f692e92487 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 14 May 2024 18:03:08 +0200 Subject: [PATCH 019/191] Parquet: bring back run-time compatibility with libarrow <= 13 --- .../parquet/ogrparquetdatasetlayer.cpp | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp index a1ea8a2e15ca..d6c55f63cced 100644 --- a/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp +++ b/ogr/ogrsf_frmts/parquet/ogrparquetdatasetlayer.cpp @@ -42,6 +42,12 @@ #include "../arrow_common/ograrrowlayer.hpp" #include "../arrow_common/ograrrowdataset.hpp" +#if PARQUET_VERSION_MAJOR >= 13 +// Using field indices for FieldRef is only supported since +// https://github.com/apache/arrow/commit/10eedbe63c71f4cf8f0621f3a2304ab3168a2ae5 +#define SUPPORTS_INDICES_IN_FIELD_REF +#endif + namespace cp = ::arrow::compute; /************************************************************************/ @@ -489,6 +495,7 @@ void OGRParquetDatasetLayer::BuildScanner() { // This actually requires Arrow >= 15 (https://github.com/apache/arrow/issues/39064) // to be more efficient. +#ifdef SUPPORTS_INDICES_IN_FIELD_REF const auto &oBBOXDef = oIter->second; expression = cp::and_( {cp::less_equal( @@ -507,6 +514,31 @@ void OGRParquetDatasetLayer::BuildScanner() cp::field_ref(arrow::FieldRef( oBBOXDef.iArrowCol, oBBOXDef.iArrowSubfieldYMax)), cp::literal(m_sFilterEnvelope.MinY))}); +#else + const auto oIter2 = m_oMapGeometryColumns.find( + m_poFeatureDefn->GetGeomFieldDefn(m_iGeomFieldFilter) + ->GetNameRef()); + std::string osBBOXColumn; + std::string osXMin, osYMin, osXMax, osYMax; + if (ParseGeometryColumnCovering(oIter2->second, osBBOXColumn, + osXMin, osYMin, osXMax, osYMax)) + { + expression = cp::and_( + {cp::less_equal(cp::field_ref(arrow::FieldRef( + osBBOXColumn, osXMin)), + cp::literal(m_sFilterEnvelope.MaxX)), + cp::less_equal(cp::field_ref(arrow::FieldRef( + osBBOXColumn, osYMin)), + cp::literal(m_sFilterEnvelope.MaxY)), + cp::greater_equal(cp::field_ref(arrow::FieldRef( + osBBOXColumn, osXMax)), + cp::literal(m_sFilterEnvelope.MinX)), + cp::greater_equal( + cp::field_ref( + arrow::FieldRef(osBBOXColumn, osYMax)), + cp::literal(m_sFilterEnvelope.MinY))}); + } +#endif } else if (m_iGeomFieldFilter >= 0 && m_iGeomFieldFilter < @@ -519,7 +551,11 @@ void OGRParquetDatasetLayer::BuildScanner() const auto &field = m_poSchema->fields()[iCol]; auto type = field->type(); std::vector fieldRefs; +#ifdef SUPPORTS_INDICES_IN_FIELD_REF fieldRefs.emplace_back(iCol); +#else + fieldRefs.emplace_back(field->name()); +#endif if (type->id() == arrow::Type::STRUCT) { const auto fieldStruct = @@ -560,7 +596,11 @@ void OGRParquetDatasetLayer::BuildScanner() if (field->type()->id() == arrow::Type::BINARY && RegisterOGRWKBIntersectsIfNeeded()) { +#ifdef SUPPORTS_INDICES_IN_FIELD_REF auto oFieldRef = arrow::FieldRef(iCol); +#else + auto oFieldRef = arrow::FieldRef(field->name()); +#endif std::vector abyFilterGeomWkb; abyFilterGeomWkb.resize(m_poFilterGeom->WkbSize()); m_poFilterGeom->exportToWkb(wkbNDR, abyFilterGeomWkb.data(), @@ -697,8 +737,28 @@ OGRParquetDatasetLayer::BuildArrowFilter(const swq_expr_node *poNode, poNode->field_index < m_poFeatureDefn->GetFieldCount()) { std::vector fieldRefs; +#ifdef SUPPORTS_INDICES_IN_FIELD_REF for (int idx : m_anMapFieldIndexToArrowColumn[poNode->field_index]) fieldRefs.emplace_back(idx); +#else + std::shared_ptr field; + for (int idx : m_anMapFieldIndexToArrowColumn[poNode->field_index]) + { + if (!field) + { + field = m_poSchema->fields()[idx]; + } + else + { + CPLAssert(field->type()->id() == arrow::Type::STRUCT); + const auto fieldStruct = + std::static_pointer_cast( + field->type()); + field = fieldStruct->fields()[idx]; + } + fieldRefs.emplace_back(field->name()); + } +#endif auto expr = cp::field_ref(arrow::FieldRef(std::move(fieldRefs))); // Comparing a boolean column to 0 or 1 fails without explicit cast @@ -714,7 +774,12 @@ OGRParquetDatasetLayer::BuildArrowFilter(const swq_expr_node *poNode, m_poFeatureDefn->GetFieldCount() + SPF_FID && m_iFIDArrowColumn >= 0) { +#ifdef SUPPORTS_INDICES_IN_FIELD_REF return cp::field_ref(arrow::FieldRef(m_iFIDArrowColumn)); +#else + return cp::field_ref(arrow::FieldRef( + m_poSchema->fields()[m_iFIDArrowColumn]->name())); +#endif } } From 206daf0797150caded06a513c8bd25c922ce87ba Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 15 May 2024 19:44:57 +0200 Subject: [PATCH 020/191] vsiadls.py: fix test on machines with ~/.azure credentials available --- autotest/gcore/vsiadls.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/autotest/gcore/vsiadls.py b/autotest/gcore/vsiadls.py index bceeed06175d..ee6995d08a7b 100755 --- a/autotest/gcore/vsiadls.py +++ b/autotest/gcore/vsiadls.py @@ -56,7 +56,19 @@ def open_for_read(uri): @pytest.fixture(autouse=True, scope="module") def startup_and_cleanup(): - with gdaltest.config_option("CPL_AZURE_VM_API_ROOT_URL", "disabled"): + with gdaltest.config_options( + { + "AZURE_STORAGE_CONNECTION_STRING": None, + "AZURE_STORAGE_ACCOUNT": None, + "AZURE_STORAGE_ACCESS_KEY": None, + "AZURE_STORAGE_SAS_TOKEN": None, + "AZURE_NO_SIGN_REQUEST": None, + "AZURE_CONFIG_DIR": "", + "AZURE_STORAGE_ACCESS_TOKEN": "", + "AZURE_FEDERATED_TOKEN_FILE": "", + "CPL_AZURE_VM_API_ROOT_URL": "disabled", + } + ): assert gdal.GetSignedURL("/vsiadls/foo/bar") is None gdaltest.webserver_process = None From 995e08918807b35c92d918c0f4ca5de8514f49e6 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 16 May 2024 02:17:19 +0200 Subject: [PATCH 021/191] ISIS3: Create(): open file(s) in wb+ mode if possible --- frmts/pds/isis3dataset.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/frmts/pds/isis3dataset.cpp b/frmts/pds/isis3dataset.cpp index fec7fc7ceae8..714f1b5fecbf 100644 --- a/frmts/pds/isis3dataset.cpp +++ b/frmts/pds/isis3dataset.cpp @@ -3998,7 +3998,10 @@ GDALDataset *ISIS3Dataset::Create(const char *pszFilename, int nXSize, return nullptr; } - VSILFILE *fp = VSIFOpenExL(pszFilename, "wb", true); + const char *pszPermission = + VSISupportsRandomWrite(pszFilename, true) ? "wb+" : "wb"; + + VSILFILE *fp = VSIFOpenExL(pszFilename, pszPermission, true); if (fp == nullptr) { CPLError(CE_Failure, CPLE_FileIO, "Cannot create %s: %s", pszFilename, @@ -4014,7 +4017,7 @@ GDALDataset *ISIS3Dataset::Create(const char *pszFilename, int nXSize, osExternalFilename = CSLFetchNameValueDef(papszOptions, "EXTERNAL_FILENAME", CPLResetExtension(pszFilename, "cub")); - fpImage = VSIFOpenExL(osExternalFilename.c_str(), "wb", true); + fpImage = VSIFOpenExL(osExternalFilename.c_str(), pszPermission, true); if (fpImage == nullptr) { CPLError(CE_Failure, CPLE_FileIO, "Cannot create %s: %s", From 377150f5d803faed6e0b10314dcc37a5edcf175b Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 16 May 2024 02:47:54 +0200 Subject: [PATCH 022/191] PDS4: Create(): open file in wb+ mode if possible --- frmts/pds/pds4dataset.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/frmts/pds/pds4dataset.cpp b/frmts/pds/pds4dataset.cpp index 9862a3ea01bb..559868b77975 100644 --- a/frmts/pds/pds4dataset.cpp +++ b/frmts/pds/pds4dataset.cpp @@ -4544,7 +4544,11 @@ PDS4Dataset *PDS4Dataset::CreateInternal(const char *pszFilename, } else { - fpImage = VSIFOpenL(osImageFilename, bAppend ? "rb+" : "wb"); + fpImage = VSIFOpenL( + osImageFilename, + bAppend ? "rb+" + : VSISupportsRandomWrite(osImageFilename.c_str(), true) ? "wb+" + : "wb"); if (fpImage == nullptr) { CPLError(CE_Failure, CPLE_FileIO, "Cannot create %s", From 46db4439843fd400ea380ec1f8b6d30501f12f1d Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 16 May 2024 02:22:06 +0200 Subject: [PATCH 023/191] /vsimem/: make Read() error for a file not opened with read permissions --- port/cpl_vsi_mem.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/port/cpl_vsi_mem.cpp b/port/cpl_vsi_mem.cpp index 904ef83dc6e0..3ee6b4b635de 100644 --- a/port/cpl_vsi_mem.cpp +++ b/port/cpl_vsi_mem.cpp @@ -135,8 +135,10 @@ class VSIMemHandle final : public VSIVirtualHandle public: std::shared_ptr poFile = nullptr; vsi_l_offset m_nOffset = 0; + bool m_bReadAllowed = false; bool bUpdate = false; bool bEOF = false; + bool m_bError = false; bool bExtendFileAtNextWrite = false; VSIMemHandle() = default; @@ -397,6 +399,12 @@ size_t VSIMemHandle::Read(void *pBuffer, size_t nSize, size_t nCount) { CPL_SHARED_LOCK oLock(poFile->m_oMutex); + if (!m_bReadAllowed) + { + m_bError = true; + return 0; + } + size_t nBytesToRead = nSize * nCount; if (nBytesToRead == 0) return 0; @@ -641,14 +649,15 @@ VSIVirtualHandle *VSIMemFilesystemHandler::Open(const char *pszFilename, poHandle->poFile = poFile; poHandle->m_nOffset = 0; poHandle->bEOF = false; - poHandle->bUpdate = strstr(pszAccess, "w") || strstr(pszAccess, "+") || - strstr(pszAccess, "a"); + poHandle->bUpdate = strchr(pszAccess, 'w') || strchr(pszAccess, '+') || + strchr(pszAccess, 'a'); + poHandle->m_bReadAllowed = strchr(pszAccess, 'r') || strchr(pszAccess, '+'); #ifdef DEBUG_VERBOSE CPLDebug("VSIMEM", "Opening handle %p on %s: ref_count=%d", poHandle, pszFilename, static_cast(poFile.use_count())); #endif - if (strstr(pszAccess, "a")) + if (strchr(pszAccess, 'a')) { CPL_SHARED_LOCK oLock(poFile->m_oMutex); poHandle->m_nOffset = poFile->nLength; @@ -1062,6 +1071,7 @@ VSILFILE *VSIFileFromMemBuffer(const char *pszFilename, GByte *pabyData, poHandle->poFile = std::move(poFile); poHandle->bUpdate = true; + poHandle->m_bReadAllowed = true; return poHandle; } From 981e0e9e5777ed40f5cfb3472d0af4024105e4e2 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 16 May 2024 03:05:55 +0200 Subject: [PATCH 024/191] /vsicrypt/: if opening in write-only mode, do so on the underlying file as well --- port/cpl_vsil_crypt.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/port/cpl_vsil_crypt.cpp b/port/cpl_vsil_crypt.cpp index fc0cbdd1b0fa..9497918b6247 100644 --- a/port/cpl_vsil_crypt.cpp +++ b/port/cpl_vsil_crypt.cpp @@ -1090,7 +1090,9 @@ size_t VSICryptFileHandle::Read(void *pBuffer, size_t nSize, size_t nMemb) #endif if ((nPerms & VSICRYPT_READ) == 0) + { return 0; + } if (nCurPos >= poHeader->nPayloadFileSize) { @@ -1584,11 +1586,11 @@ VSICryptFilesystemHandler::Open(const char *pszFilename, const char *pszAccess, return nullptr; } + CPLString osAccess(pszAccess); + if (strchr(pszAccess, 'b') == nullptr) + osAccess += "b"; if (strchr(pszAccess, 'r')) { - CPLString osAccess(pszAccess); - if (strchr(pszAccess, 'b') == nullptr) - osAccess += "b"; VSIVirtualHandle *fpBase = reinterpret_cast( VSIFOpenL(osFilename, osAccess)); if (fpBase == nullptr) @@ -1721,8 +1723,8 @@ VSICryptFilesystemHandler::Open(const char *pszFilename, const char *pszAccess, return nullptr; } - VSIVirtualHandle *fpBase = - reinterpret_cast(VSIFOpenL(osFilename, "wb+")); + VSIVirtualHandle *fpBase = reinterpret_cast( + VSIFOpenL(osFilename, osAccess.c_str())); if (fpBase == nullptr) { memset(const_cast(osKey.c_str()), 0, osKey.size()); From d5fd8c4ed943b0895a1f70f462d3459afe9dcc64 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 16 May 2024 02:27:34 +0200 Subject: [PATCH 025/191] /vsigzip/: sanitize Eof() detection --- autotest/gcore/vsifile.py | 13 +++++++- port/cpl_vsil_gzip.cpp | 70 +++++++++++++++++++++------------------ 2 files changed, 50 insertions(+), 33 deletions(-) diff --git a/autotest/gcore/vsifile.py b/autotest/gcore/vsifile.py index f10d39d5cbb3..7344ddf26e82 100755 --- a/autotest/gcore/vsifile.py +++ b/autotest/gcore/vsifile.py @@ -1141,6 +1141,9 @@ def test_vsifile_vsitar_gz_with_tar_multiple_of_65536_bytes(): f = gdal.VSIFOpenL("/vsitar/data/tar_of_65536_bytes.tar.gz/zero.bin", "rb") assert f is not None read_bytes = gdal.VSIFReadL(1, 65024, f) + assert gdal.VSIFEofL(f) == 0 + assert gdal.VSIFReadL(1, 1, f) == b"" + assert gdal.VSIFEofL(f) == 1 gdal.VSIFCloseL(f) assert read_bytes == b"\x00" * 65024 gdal.Unlink("data/tar_of_65536_bytes.tar.gz.properties") @@ -1155,7 +1158,15 @@ def test_vsifile_vsizip_stored(): f = gdal.VSIFOpenL("/vsizip/data/stored.zip/foo.txt", "rb") assert f assert gdal.VSIFReadL(1, 5, f) == b"foo\n" - assert gdal.VSIFEofL(f) + assert gdal.VSIFEofL(f) == 1 + gdal.VSIFCloseL(f) + + f = gdal.VSIFOpenL("/vsizip/data/stored.zip/foo.txt", "rb") + assert f + assert gdal.VSIFReadL(1, 4, f) == b"foo\n" + assert gdal.VSIFEofL(f) == 0 + assert gdal.VSIFReadL(1, 1, f) == b"" + assert gdal.VSIFEofL(f) == 1 gdal.VSIFCloseL(f) diff --git a/port/cpl_vsil_gzip.cpp b/port/cpl_vsil_gzip.cpp index 460208255964..f9a6df9501e3 100644 --- a/port/cpl_vsil_gzip.cpp +++ b/port/cpl_vsil_gzip.cpp @@ -173,9 +173,10 @@ class VSIGZipHandle final : public VSIVirtualHandle /* Fields from gz_stream structure */ z_stream stream; - int z_err = Z_OK; /* error code for last stream operation */ - int z_eof = 0; /* set if end of input file (but not necessarily of the - uncompressed stream ! "in" must be null too ) */ + int z_err = Z_OK; /* error code for last stream operation */ + int z_eof = 0; /* set if end of input file (but not necessarily of the + uncompressed stream !) */ + bool m_bEOF = false; /* EOF flag for uncompressed stream */ Byte *inbuf = nullptr; /* input buffer */ Byte *outbuf = nullptr; /* output buffer */ uLong crc = 0; /* crc32 of uncompressed data */ @@ -282,9 +283,10 @@ class VSIDeflate64Handle final : public VSIVirtualHandle /* Fields from gz_stream structure */ z_stream stream; - int z_err = Z_OK; /* error code for last stream operation */ - int z_eof = 0; /* set if end of input file (but not necessarily of the - uncompressed stream ! "in" must be null too ) */ + int z_err = Z_OK; /* error code for last stream operation */ + int z_eof = 0; /* set if end of input file (but not necessarily of the + uncompressed stream ! ) */ + bool m_bEOF = false; /* EOF flag for uncompressed stream */ Byte *inbuf = nullptr; /* input buffer */ Byte *outbuf = nullptr; /* output buffer */ std::vector extraOutput{}; @@ -723,6 +725,7 @@ int VSIGZipHandle::gzrewind() { z_err = Z_OK; z_eof = 0; + m_bEOF = false; stream.avail_in = 0; stream.next_in = inbuf; crc = 0; @@ -739,6 +742,8 @@ int VSIGZipHandle::gzrewind() int VSIGZipHandle::Seek(vsi_l_offset nOffset, int nWhence) { + m_bEOF = false; + return gzseek(nOffset, nWhence) ? 0 : -1; } @@ -1005,14 +1010,11 @@ size_t VSIGZipHandle::Read(void *const buf, size_t const nSize, static_cast(nMemb)); #endif - if ((z_eof && in == 0) || z_err == Z_STREAM_END) + if (m_bEOF || z_err != Z_OK) { - z_eof = 1; - in = 0; -#ifdef ENABLE_DEBUG - CPLDebug("GZIP", "Read: Eof"); -#endif - return 0; /* EOF */ + if (z_err == Z_STREAM_END && nSize > 0 && nMemb > 0) + m_bEOF = true; + return 0; } const unsigned len = @@ -1048,8 +1050,10 @@ size_t VSIGZipHandle::Read(void *const buf, size_t const nSize, const uInt nToRead = static_cast( std::min(m_compressed_size - (in + nRead), static_cast(stream.avail_out))); - uInt nReadFromFile = static_cast( + const uInt nReadFromFile = static_cast( m_poBaseHandle->Read(next_out, 1, nToRead)); + if (nReadFromFile < nToRead && !m_poBaseHandle->Eof()) + z_err = Z_ERRNO; stream.avail_out -= nReadFromFile; nRead += nReadFromFile; } @@ -1057,8 +1061,8 @@ size_t VSIGZipHandle::Read(void *const buf, size_t const nSize, out += nRead; if (nRead < len) { + m_bEOF = true; z_eof = 1; - in = 0; } #ifdef ENABLE_DEBUG CPLDebug("GZIP", "Read return %d", static_cast(nRead / nSize)); @@ -1078,8 +1082,7 @@ size_t VSIGZipHandle::Read(void *const buf, size_t const nSize, // discarding it. CPLError(CE_Failure, CPLE_AppDefined, "File size of underlying /vsigzip/ file has changed"); - z_eof = 1; - in = 0; + z_err = Z_ERRNO; CPL_VSIL_GZ_RETURN(0); return 0; } @@ -1204,13 +1207,16 @@ size_t VSIGZipHandle::Read(void *const buf, size_t const nSize, size_t ret = (len - stream.avail_out) / nSize; if (z_err != Z_OK && z_err != Z_STREAM_END) { - z_eof = 1; - in = 0; + m_bEOF = true; // wrong... CPLError(CE_Failure, CPLE_AppDefined, "In file %s, at line %d, decompression failed with " "z_err = %d, return = %d", __FILE__, __LINE__, z_err, static_cast(ret)); } + else if (ret < nMemb) + { + m_bEOF = true; + } #ifdef ENABLE_DEBUG CPLDebug("GZIP", "Read return %d (z_err=%d, z_eof=%d)", @@ -1261,7 +1267,7 @@ int VSIGZipHandle::Eof() #ifdef ENABLE_DEBUG CPLDebug("GZIP", "Eof()"); #endif - return z_eof && in == 0; + return m_bEOF; } /************************************************************************/ @@ -1467,6 +1473,7 @@ int VSIDeflate64Handle::gzrewind() int VSIDeflate64Handle::Seek(vsi_l_offset nOffset, int nWhence) { + m_bEOF = false; return gzseek(nOffset, nWhence) ? 0 : -1; } @@ -1652,14 +1659,11 @@ size_t VSIDeflate64Handle::Read(void *const buf, size_t const nSize, static_cast(nMemb)); #endif - if ((z_eof && in == 0) || z_err == Z_STREAM_END) + if (m_bEOF || z_err != Z_OK) { - z_eof = 1; - in = 0; -#ifdef ENABLE_DEBUG - CPLDebug("GZIP", "Read: Eof"); -#endif - return 0; /* EOF */ + if (z_err == Z_STREAM_END && nSize > 0 && nMemb > 0) + m_bEOF = true; + return 0; } const unsigned len = @@ -1708,8 +1712,7 @@ size_t VSIDeflate64Handle::Read(void *const buf, size_t const nSize, // discarding it. CPLError(CE_Failure, CPLE_AppDefined, "File size of underlying /vsigzip/ file has changed"); - z_eof = 1; - in = 0; + z_err = Z_ERRNO; CPL_VSIL_GZ_RETURN(0); return 0; } @@ -1873,13 +1876,16 @@ size_t VSIDeflate64Handle::Read(void *const buf, size_t const nSize, size_t ret = (len - stream.avail_out) / nSize; if (z_err != Z_OK && z_err != Z_STREAM_END) { - z_eof = 1; - in = 0; + m_bEOF = true; // Wrong... CPLError(CE_Failure, CPLE_AppDefined, "In file %s, at line %d, decompression failed with " "z_err = %d, return = %d", __FILE__, __LINE__, z_err, static_cast(ret)); } + else if (ret < nMemb) + { + m_bEOF = true; + } #ifdef ENABLE_DEBUG CPLDebug("GZIP", "Read return %d (z_err=%d, z_eof=%d)", @@ -1909,7 +1915,7 @@ int VSIDeflate64Handle::Eof() #ifdef ENABLE_DEBUG CPLDebug("GZIP", "Eof()"); #endif - return z_eof && in == 0; + return m_bEOF; } /************************************************************************/ From 36267863aaffdaee747304cbc8e46e7125bcd782 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 15 May 2024 19:27:35 +0200 Subject: [PATCH 026/191] Add VSIFErrorL() and VSIFClearErrL(), and implement them ``` * \fn VSIVirtualHandle::Error() * \brief Test the error indicator. * * Returns TRUE (non-zero) if an error condition occurred during the * previous read operation. The error indicator is cleared by a call to * VSIFClearErrL(). Note that a end-of-file situation, reported by VSIFEofL(), * is *not* an error reported by VSIFErrorL(). ``` ``` * \fn VSIVirtualHandle::ClearErr() * \brief Reset the error and end-of-file indicators. ``` --- MIGRATION_GUIDE.TXT | 17 +++ autotest/gcore/vsicurl.py | 31 +++-- autotest/gcore/vsifile.py | 95 ++++++++++++---- autotest/gcore/vsizip.py | 11 ++ frmts/georaster/cpl_vsil_ocilob.cpp | 10 ++ port/cpl_vsi.h | 17 +++ port/cpl_vsi_mem.cpp | 27 ++++- port/cpl_vsi_virtual.h | 4 + port/cpl_vsil.cpp | 98 +++++++++++++++- port/cpl_vsil_buffered_reader.cpp | 51 ++++++++- port/cpl_vsil_cache.cpp | 37 +++++- port/cpl_vsil_crypt.cpp | 34 +++++- port/cpl_vsil_curl.cpp | 37 ++++-- port/cpl_vsil_curl_class.h | 35 +++++- port/cpl_vsil_curl_streaming.cpp | 42 +++++-- port/cpl_vsil_gzip.cpp | 169 +++++++++++++++++++++------- port/cpl_vsil_libarchive.cpp | 18 ++- port/cpl_vsil_plugin.cpp | 32 ++++++ port/cpl_vsil_plugin.h | 4 + port/cpl_vsil_s3.cpp | 9 -- port/cpl_vsil_sparsefile.cpp | 43 +++++++ port/cpl_vsil_stdin.cpp | 39 ++++++- port/cpl_vsil_stdout.cpp | 78 ++++++++----- port/cpl_vsil_subfile.cpp | 32 +++++- port/cpl_vsil_unix_stdio_64.cpp | 34 +++++- port/cpl_vsil_uploadonclose.cpp | 9 ++ port/cpl_vsil_win32.cpp | 65 +++++++---- swig/include/cpl.i | 2 + 28 files changed, 904 insertions(+), 176 deletions(-) diff --git a/MIGRATION_GUIDE.TXT b/MIGRATION_GUIDE.TXT index 0031236db8b6..c7d011677e4c 100644 --- a/MIGRATION_GUIDE.TXT +++ b/MIGRATION_GUIDE.TXT @@ -1,3 +1,20 @@ +MIGRATION GUIDE FROM GDAL 3.9 to GDAL 3.10 +------------------------------------------ + +- User code using VSIFEofL() to potentially to end read loops should also test + the return code of the new VSIFError() function. Some virtual file systems + that used to report errors through VSIFEofL() now do through VSIFError(). + +- Out-of-tree implementations of VSIVirtualHandle(): + 2 new required virtual methods must be implemented: int Error(), and + void ClearErr() following POSIX semantics of ferror() and clearerr(). + This is to distinguish Read() that returns less bytes than requested because + of an error (Error() != 0) or because of end-of-file (Eof() != 0) + + The VSIFilesystemPluginCallbacksStruct structure is extended with 2 + corresponding optional (but recommended to be implemented to reliably detect + reading errors) callbacks "error" and "clear_err". + MIGRATION GUIDE FROM GDAL 3.8 to GDAL 3.9 ----------------------------------------- diff --git a/autotest/gcore/vsicurl.py b/autotest/gcore/vsicurl.py index 1b425be6935d..6f5cf197f1f3 100755 --- a/autotest/gcore/vsicurl.py +++ b/autotest/gcore/vsicurl.py @@ -582,9 +582,16 @@ def test_vsicurl_test_retry(server): ) data_len = 0 if f: - data_len = len(gdal.VSIFReadL(1, 1, f)) - gdal.VSIFCloseL(f) - assert data_len == 0 + try: + data_len = len(gdal.VSIFReadL(1, 1, f)) + assert data_len == 0 + assert gdal.VSIFEofL(f) == 0 + assert gdal.VSIFErrorL(f) == 1 + gdal.VSIFClearErrL(f) + assert gdal.VSIFEofL(f) == 0 + assert gdal.VSIFErrorL(f) == 0 + finally: + gdal.VSIFCloseL(f) gdal.VSICurlClearCache() @@ -601,13 +608,17 @@ def test_vsicurl_test_retry(server): "rb", ) assert f is not None - gdal.ErrorReset() - with gdal.quiet_errors(): - data = gdal.VSIFReadL(1, 3, f).decode("ascii") - error_msg = gdal.GetLastErrorMsg() - gdal.VSIFCloseL(f) - assert data == "foo" - assert "429" in error_msg + try: + gdal.ErrorReset() + with gdal.quiet_errors(): + data = gdal.VSIFReadL(1, 3, f).decode("ascii") + assert data == "foo" + error_msg = gdal.GetLastErrorMsg() + assert "429" in error_msg + assert gdal.VSIFEofL(f) == 0 + assert gdal.VSIFErrorL(f) == 0 + finally: + gdal.VSIFCloseL(f) ############################################################################### diff --git a/autotest/gcore/vsifile.py b/autotest/gcore/vsifile.py index 7344ddf26e82..ac8ff4943160 100755 --- a/autotest/gcore/vsifile.py +++ b/autotest/gcore/vsifile.py @@ -85,18 +85,32 @@ def vsifile_generic(filename, options=[]): assert start_time == pytest.approx(statBuf.mtime, abs=2) fp = gdal.VSIFOpenExL(filename, "rb", False, options) - assert gdal.VSIFReadL(1, 0, fp) is None - assert gdal.VSIFReadL(0, 1, fp) is None - buf = gdal.VSIFReadL(1, 7, fp) - assert gdal.VSIFWriteL("a", 1, 1, fp) == 0 - assert gdal.VSIFTruncateL(fp, 0) != 0 - gdal.VSIFCloseL(fp) - - assert buf.decode("ascii") == "01234XX" + try: + assert fp + assert gdal.VSIFReadL(1, 0, fp) is None + assert gdal.VSIFReadL(0, 1, fp) is None + buf = gdal.VSIFReadL(1, 7, fp) + assert gdal.VSIFEofL(fp) == 0 + assert gdal.VSIFErrorL(fp) == 0 + assert buf == b"01234XX" + + buf = gdal.VSIFReadL(1, 1, fp) + assert gdal.VSIFEofL(fp) == 1 + assert gdal.VSIFErrorL(fp) == 0 + assert buf == b"" + gdal.VSIFClearErrL(fp) + assert gdal.VSIFEofL(fp) == 0 + assert gdal.VSIFErrorL(fp) == 0 + + assert gdal.VSIFWriteL("a", 1, 1, fp) == 0 + assert gdal.VSIFTruncateL(fp, 0) != 0 + finally: + if fp: + gdal.VSIFCloseL(fp) # Test append mode on existing file fp = gdal.VSIFOpenExL(filename, "ab", False, options) - gdal.VSIFWriteL("XX", 1, 2, fp) + assert gdal.VSIFWriteL("XX", 1, 2, fp) == 2 gdal.VSIFCloseL(fp) statBuf = gdal.VSIStatL( @@ -112,7 +126,7 @@ def vsifile_generic(filename, options=[]): # Test append mode on non existing file fp = gdal.VSIFOpenExL(filename, "ab", False, options) - gdal.VSIFWriteL("XX", 1, 2, fp) + assert gdal.VSIFWriteL("XX", 1, 2, fp) == 2 gdal.VSIFCloseL(fp) statBuf = gdal.VSIStatL( @@ -123,6 +137,17 @@ def vsifile_generic(filename, options=[]): assert gdal.Unlink(filename) == 0 + # Test read on a file opened in write-only mode + fp = gdal.VSIFOpenExL(filename, "wb", False, options) + try: + assert fp + assert len(gdal.VSIFReadL(1, 1, fp)) == 0 + assert gdal.VSIFErrorL(fp) == 1 + assert gdal.VSIFEofL(fp) == 0 + finally: + if fp: + gdal.VSIFCloseL(fp) + ############################################################################### # Test /vsimem @@ -271,6 +296,8 @@ def test_vsifile_vsicache_read_error(): gdal.VSIFTruncateL(f, 0) assert len(gdal.VSIFReadL(1, 5000 * 1000, f2)) == 0 + assert gdal.VSIFEofL(f2) + assert gdal.VSIFErrorL(f2) == 0 # Extend the file again gdal.VSIFTruncateL(f, 1000 * 1000) @@ -291,6 +318,8 @@ def test_vsifile_vsicache_read_error(): gdal.VSIFSeekL(f2, 0, 0) assert len(gdal.VSIFReadL(1, CHUNK_SIZE, f2)) == 10 + assert gdal.VSIFEofL(f2) + assert gdal.VSIFErrorL(f2) == 0 gdal.VSIFSeekL(f2, 100, 0) assert len(gdal.VSIFReadL(1, CHUNK_SIZE, f2)) == 0 @@ -365,6 +394,7 @@ def test_vsifile_7(): assert gdal.VSIFTellL(fp) == 0x7FFFFFFFFFFFFFFF assert not gdal.VSIFReadL(1, 1, fp) assert gdal.VSIFEofL(fp) == 1 + assert gdal.VSIFErrorL(fp) == 0 gdal.VSIFCloseL(fp) gdal.Unlink("/vsimem/vsifile_7.bin") @@ -650,32 +680,44 @@ def test_vsifile_14(): ############################################################################### -# Test issue with Eof() not detecting end of corrupted gzip stream (#6944) +# Test issue with Error() not detecting end of corrupted gzip stream (#6944) def test_vsifile_15(): fp = gdal.VSIFOpenL("/vsigzip/data/corrupted_z_buf_error.gz", "rb") assert fp is not None - file_len = 0 - while not gdal.VSIFEofL(fp): + try: + file_len = 0 + while not gdal.VSIFErrorL(fp): + with gdal.quiet_errors(): + file_len += len(gdal.VSIFReadL(1, 4, fp)) + assert file_len == 6469 + assert gdal.VSIFEofL(fp) == 0 + with gdal.quiet_errors(): file_len += len(gdal.VSIFReadL(1, 4, fp)) - assert file_len == 6469 + assert file_len == 6469 + assert gdal.VSIFErrorL(fp) == 1 + assert gdal.VSIFEofL(fp) == 0 - with gdal.quiet_errors(): - file_len += len(gdal.VSIFReadL(1, 4, fp)) - assert file_len == 6469 - - with gdal.quiet_errors(): - assert gdal.VSIFSeekL(fp, 0, 2) != 0 + with gdal.quiet_errors(): + assert gdal.VSIFSeekL(fp, 0, 2) != 0 - assert gdal.VSIFSeekL(fp, 0, 0) == 0 + assert gdal.VSIFSeekL(fp, 0, 0) == 0 + assert gdal.VSIFErrorL(fp) == 1 + assert gdal.VSIFEofL(fp) == 0 - len_read = len(gdal.VSIFReadL(1, file_len, fp)) - assert len_read == file_len + gdal.VSIFClearErrL(fp) + assert gdal.VSIFErrorL(fp) == 0 + assert gdal.VSIFEofL(fp) == 0 - gdal.VSIFCloseL(fp) + len_read = len(gdal.VSIFReadL(1, file_len, fp)) + assert len_read == file_len + assert gdal.VSIFErrorL(fp) == 0 + assert gdal.VSIFEofL(fp) == 0 + finally: + gdal.VSIFCloseL(fp) ############################################################################### @@ -1142,8 +1184,10 @@ def test_vsifile_vsitar_gz_with_tar_multiple_of_65536_bytes(): assert f is not None read_bytes = gdal.VSIFReadL(1, 65024, f) assert gdal.VSIFEofL(f) == 0 + assert gdal.VSIFErrorL(f) == 0 assert gdal.VSIFReadL(1, 1, f) == b"" assert gdal.VSIFEofL(f) == 1 + assert gdal.VSIFErrorL(f) == 0 gdal.VSIFCloseL(f) assert read_bytes == b"\x00" * 65024 gdal.Unlink("data/tar_of_65536_bytes.tar.gz.properties") @@ -1159,14 +1203,17 @@ def test_vsifile_vsizip_stored(): assert f assert gdal.VSIFReadL(1, 5, f) == b"foo\n" assert gdal.VSIFEofL(f) == 1 + assert gdal.VSIFErrorL(f) == 0 gdal.VSIFCloseL(f) f = gdal.VSIFOpenL("/vsizip/data/stored.zip/foo.txt", "rb") assert f assert gdal.VSIFReadL(1, 4, f) == b"foo\n" assert gdal.VSIFEofL(f) == 0 + assert gdal.VSIFErrorL(f) == 0 assert gdal.VSIFReadL(1, 1, f) == b"" assert gdal.VSIFEofL(f) == 1 + assert gdal.VSIFErrorL(f) == 0 gdal.VSIFCloseL(f) diff --git a/autotest/gcore/vsizip.py b/autotest/gcore/vsizip.py index c10be914dfeb..d28765150af6 100755 --- a/autotest/gcore/vsizip.py +++ b/autotest/gcore/vsizip.py @@ -737,10 +737,16 @@ def test_vsizip_deflate64(): assert f try: data = gdal.VSIFReadL(1, size, f) + assert gdal.VSIFEofL(f) == 0 + assert gdal.VSIFErrorL(f) == 0 assert len(data) == size assert len(gdal.VSIFReadL(1, 1, f)) == 0 + assert gdal.VSIFEofL(f) == 1 + assert gdal.VSIFErrorL(f) == 0 assert gdal.VSIFSeekL(f, 0, 0) == 0 data2 = gdal.VSIFReadL(1, size, f) + assert gdal.VSIFEofL(f) == 0 + assert gdal.VSIFErrorL(f) == 0 len_data2 = len(data2) assert len_data2 == size assert data2 == data @@ -752,6 +758,11 @@ def test_vsizip_deflate64(): ]: assert gdal.VSIFSeekL(f, pos, 0) == 0 data2 = gdal.VSIFReadL(1, nread, f) + if pos + nread > size: + assert gdal.VSIFEofL(f) == 1 + else: + assert gdal.VSIFEofL(f) == 0 + assert gdal.VSIFErrorL(f) == 0, (pos, nread) len_data2 = len(data2) assert len_data2 == min(nread, size - pos), (pos, nread) assert data2 == data[pos : pos + len_data2], (pos, nread) diff --git a/frmts/georaster/cpl_vsil_ocilob.cpp b/frmts/georaster/cpl_vsil_ocilob.cpp index a5237f3f86f5..f930d1173f18 100644 --- a/frmts/georaster/cpl_vsil_ocilob.cpp +++ b/frmts/georaster/cpl_vsil_ocilob.cpp @@ -80,6 +80,16 @@ class VSIOCILobHandle : public VSIVirtualHandle size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; int Eof() override; + + int Error() override + { + return 0; + } // TODO? + + void ClearErr() override + { + } // TODO? + int Close() override; }; diff --git a/port/cpl_vsi.h b/port/cpl_vsi.h index fb996df5e299..0561eef65e8e 100644 --- a/port/cpl_vsi.h +++ b/port/cpl_vsi.h @@ -168,6 +168,8 @@ int CPL_DLL VSIFReadMultiRangeL(int nRanges, void **ppData, VSILFILE *) EXPERIMENTAL_CPL_WARN_UNUSED_RESULT; size_t CPL_DLL VSIFWriteL(const void *, size_t, size_t, VSILFILE *) EXPERIMENTAL_CPL_WARN_UNUSED_RESULT; +void CPL_DLL VSIFClearErrL(VSILFILE *); +int CPL_DLL VSIFErrorL(VSILFILE *) CPL_WARN_UNUSED_RESULT; int CPL_DLL VSIFEofL(VSILFILE *) EXPERIMENTAL_CPL_WARN_UNUSED_RESULT; int CPL_DLL VSIFTruncateL(VSILFILE *, vsi_l_offset) EXPERIMENTAL_CPL_WARN_UNUSED_RESULT; @@ -667,6 +669,18 @@ typedef void (*VSIFilesystemPluginAdviseReadCallback)( void *pFile, int nRanges, const vsi_l_offset *panOffsets, const size_t *panSizes); +/** + * Has a read error (non end-of-file related) has occurred? + * @since GDAL 3.10 + */ +typedef int (*VSIFilesystemPluginErrorCallback)(void *pFile); + +/** + * Clear error and end-of-file flags. + * @since GDAL 3.10 + */ +typedef void (*VSIFilesystemPluginClearErrCallback)(void *pFile); + /** * struct containing callbacks to used by the handler. * (rw), (r), (w) or () at the end indicate whether the given callback is @@ -712,6 +726,9 @@ typedef struct /** The following optional member has been added in GDAL 3.7: */ VSIFilesystemPluginAdviseReadCallback advise_read; /**< AdviseRead() */ + + VSIFilesystemPluginErrorCallback error; /**< has read error occurred (r) */ + VSIFilesystemPluginClearErrCallback clear_err; /**< clear error flags(r) */ /* Callbacks are defined as a struct allocated by a call to VSIAllocFilesystemPluginCallbacksStruct in order to try to maintain ABI diff --git a/port/cpl_vsi_mem.cpp b/port/cpl_vsi_mem.cpp index 3ee6b4b635de..caf3a5436c22 100644 --- a/port/cpl_vsi_mem.cpp +++ b/port/cpl_vsi_mem.cpp @@ -148,6 +148,8 @@ class VSIMemHandle final : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; + void ClearErr() override; + int Error() override; int Eof() override; int Close() override; int Truncate(vsi_l_offset nNewSize) override; @@ -506,6 +508,29 @@ size_t VSIMemHandle::Write(const void *pBuffer, size_t nSize, size_t nCount) return nCount; } +/************************************************************************/ +/* ClearErr() */ +/************************************************************************/ + +void VSIMemHandle::ClearErr() + +{ + CPL_SHARED_LOCK oLock(poFile->m_oMutex); + bEOF = false; + m_bError = false; +} + +/************************************************************************/ +/* Error() */ +/************************************************************************/ + +int VSIMemHandle::Error() + +{ + CPL_SHARED_LOCK oLock(poFile->m_oMutex); + return m_bError ? TRUE : FALSE; +} + /************************************************************************/ /* Eof() */ /************************************************************************/ @@ -514,7 +539,7 @@ int VSIMemHandle::Eof() { CPL_SHARED_LOCK oLock(poFile->m_oMutex); - return bEOF; + return bEOF ? TRUE : FALSE; } /************************************************************************/ diff --git a/port/cpl_vsi_virtual.h b/port/cpl_vsi_virtual.h index d3cd09f41eeb..915ff79a9a87 100644 --- a/port/cpl_vsi_virtual.h +++ b/port/cpl_vsi_virtual.h @@ -109,8 +109,12 @@ struct CPL_DLL VSIVirtualHandle int Printf(CPL_FORMAT_STRING(const char *pszFormat), ...) CPL_PRINT_FUNC_FORMAT(2, 3); + virtual void ClearErr() = 0; + virtual int Eof() = 0; + virtual int Error() = 0; + virtual int Flush() { return 0; diff --git a/port/cpl_vsil.cpp b/port/cpl_vsil.cpp index e822a47b0e29..1b832ecc20a3 100644 --- a/port/cpl_vsil.cpp +++ b/port/cpl_vsil.cpp @@ -2371,7 +2371,9 @@ int VSIFFlushL(VSILFILE *fp) * @param nSize size of objects to read in bytes. * @param nCount number of objects to read. * - * @return number of objects successfully read. + * @return number of objects successfully read. If that number is less than + * nCount, VSIFEofL() or VSIFErrorL() can be used to determine the reason for + * the short read. */ /** @@ -2391,7 +2393,9 @@ int VSIFFlushL(VSILFILE *fp) * @param nCount number of objects to read. * @param fp file handle opened with VSIFOpenL(). * - * @return number of objects successfully read. + * @return number of objects successfully read. If that number is less than + * nCount, VSIFEofL() or VSIFErrorL() can be used to determine the reason for + * the short read. */ size_t VSIFReadL(void *pBuffer, size_t nSize, size_t nCount, VSILFILE *fp) @@ -2521,14 +2525,14 @@ size_t VSIFWriteL(const void *pBuffer, size_t nSize, size_t nCount, * * Returns TRUE (non-zero) if an end-of-file condition occurred during the * previous read operation. The end-of-file flag is cleared by a successful - * VSIFSeekL() call. + * VSIFSeekL() call, or a call to VSIFClearErrL(). * * This method goes through the VSIFileHandler virtualization and may * work on unusual filesystems such as in memory. * * Analog of the POSIX feof() call. * - * @return TRUE if at EOF else FALSE. + * @return TRUE if at EOF, else FALSE. */ /** @@ -2536,7 +2540,7 @@ size_t VSIFWriteL(const void *pBuffer, size_t nSize, size_t nCount, * * Returns TRUE (non-zero) if an end-of-file condition occurred during the * previous read operation. The end-of-file flag is cleared by a successful - * VSIFSeekL() call. + * VSIFSeekL() call, or a call to VSIFClearErrL(). * * This method goes through the VSIFileHandler virtualization and may * work on unusual filesystems such as in memory. @@ -2545,7 +2549,7 @@ size_t VSIFWriteL(const void *pBuffer, size_t nSize, size_t nCount, * * @param fp file handle opened with VSIFOpenL(). * - * @return TRUE if at EOF else FALSE. + * @return TRUE if at EOF, else FALSE. */ int VSIFEofL(VSILFILE *fp) @@ -2554,6 +2558,88 @@ int VSIFEofL(VSILFILE *fp) return fp->Eof(); } +/************************************************************************/ +/* VSIFErrorL() */ +/************************************************************************/ + +/** + * \fn VSIVirtualHandle::Error() + * \brief Test the error indicator. + * + * Returns TRUE (non-zero) if an error condition occurred during the + * previous read operation. The error indicator is cleared by a call to + * VSIFClearErrL(). Note that a end-of-file situation, reported by VSIFEofL(), + * is *not* an error reported by VSIFErrorL(). + * + * This method goes through the VSIFileHandler virtualization and may + * work on unusual filesystems such as in memory. + * + * Analog of the POSIX ferror() call. + * + * @return TRUE if the error indicator is set, else FALSE. + * @since 3.10 + */ + +/** + * \brief Test the error indicator. + * + * Returns TRUE (non-zero) if an error condition occurred during the + * previous read operation. The error indicator is cleared by a call to + * VSIFClearErrL(). Note that a end-of-file situation, reported by VSIFEofL(), + * is *not* an error reported by VSIFErrorL(). + * + * This method goes through the VSIFileHandler virtualization and may + * work on unusual filesystems such as in memory. + * + * Analog of the POSIX feof() call. + * + * @param fp file handle opened with VSIFOpenL(). + * + * @return TRUE if the error indicator is set, else FALSE. + * @since 3.10 + */ + +int VSIFErrorL(VSILFILE *fp) + +{ + return fp->Error(); +} + +/************************************************************************/ +/* VSIFClearErrL() */ +/************************************************************************/ + +/** + * \fn VSIVirtualHandle::ClearErr() + * \brief Reset the error and end-of-file indicators. + * + * This method goes through the VSIFileHandler virtualization and may + * work on unusual filesystems such as in memory. + * + * Analog of the POSIX clearerr() call. + * + * @since 3.10 + */ + +/** + * \brief Reset the error and end-of-file indicators. + * + * This method goes through the VSIFileHandler virtualization and may + * work on unusual filesystems such as in memory. + * + * Analog of the POSIX clearerr() call. + * + * @param fp file handle opened with VSIFOpenL(). + * + * @since 3.10 + */ + +void VSIFClearErrL(VSILFILE *fp) + +{ + fp->ClearErr(); +} + /************************************************************************/ /* VSIFTruncateL() */ /************************************************************************/ diff --git a/port/cpl_vsil_buffered_reader.cpp b/port/cpl_vsil_buffered_reader.cpp index 456e4c174e26..2e8c954b031f 100644 --- a/port/cpl_vsil_buffered_reader.cpp +++ b/port/cpl_vsil_buffered_reader.cpp @@ -63,6 +63,7 @@ class VSIBufferedReaderHandle final : public VSIVirtualHandle GUIntBig nCurOffset = 0; bool bNeedBaseHandleSeek = false; bool bEOF = false; + bool bError = false; vsi_l_offset nCheatFileSize = 0; int SeekBaseTo(vsi_l_offset nTargetOffset); @@ -80,6 +81,8 @@ class VSIBufferedReaderHandle final : public VSIVirtualHandle size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; int Eof() override; + int Error() override; + void ClearErr() override; int Flush() override; int Close() override; }; @@ -216,7 +219,8 @@ int VSIBufferedReaderHandle::SeekBaseTo(vsi_l_offset nTargetOffset) if (nRead < nToRead) { - bEOF = true; + bEOF = CPL_TO_BOOL(m_poBaseHandle->Eof()); + bError = CPL_TO_BOOL(m_poBaseHandle->Error()); return FALSE; } if (nToRead < nMaxOffset) @@ -268,6 +272,16 @@ size_t VSIBufferedReaderHandle::Read(void *pBuffer, size_t nSize, size_t nMemb) const size_t nReadInFile = m_poBaseHandle->Read( static_cast(pBuffer) + nReadInBuffer, 1, nToReadInFile); + if (nReadInFile < nToReadInFile) + { + if (m_poBaseHandle->Eof()) + bEOF = true; + else + { + CPLAssert(m_poBaseHandle->Error()); + bError = true; + } + } const size_t nRead = nReadInBuffer + nReadInFile; nBufferSize = static_cast( @@ -283,8 +297,6 @@ size_t VSIBufferedReaderHandle::Read(void *pBuffer, size_t nSize, size_t nMemb) CPLAssert(m_poBaseHandle->Tell() == nCurOffset); #endif - bEOF = CPL_TO_BOOL(m_poBaseHandle->Eof()); - return nRead / nSize; } else @@ -303,6 +315,16 @@ size_t VSIBufferedReaderHandle::Read(void *pBuffer, size_t nSize, size_t nMemb) bNeedBaseHandleSeek = false; const size_t nReadInFile = m_poBaseHandle->Read(pBuffer, 1, nTotalToRead); + if (nReadInFile < nTotalToRead) + { + if (m_poBaseHandle->Eof()) + bEOF = true; + else + { + CPLAssert(m_poBaseHandle->Error()); + bError = true; + } + } nBufferSize = static_cast( std::min(nReadInFile, static_cast(MAX_BUFFER_SIZE))); nBufferOffset = nCurOffset + nReadInFile - nBufferSize; @@ -316,8 +338,6 @@ size_t VSIBufferedReaderHandle::Read(void *pBuffer, size_t nSize, size_t nMemb) CPLAssert(m_poBaseHandle->Tell() == nCurOffset); #endif - bEOF = CPL_TO_BOOL(m_poBaseHandle->Eof()); - return nReadInFile / nSize; } } @@ -334,6 +354,18 @@ size_t VSIBufferedReaderHandle::Write(const void * /* pBuffer */, return 0; } +/************************************************************************/ +/* ClearErr() */ +/************************************************************************/ + +void VSIBufferedReaderHandle::ClearErr() + +{ + m_poBaseHandle->ClearErr(); + bEOF = false; + bError = false; +} + /************************************************************************/ /* Eof() */ /************************************************************************/ @@ -343,6 +375,15 @@ int VSIBufferedReaderHandle::Eof() return bEOF; } +/************************************************************************/ +/* Error() */ +/************************************************************************/ + +int VSIBufferedReaderHandle::Error() +{ + return bError; +} + /************************************************************************/ /* Flush() */ /************************************************************************/ diff --git a/port/cpl_vsil_cache.cpp b/port/cpl_vsil_cache.cpp index cc0d82646a43..edbde4ae2511 100644 --- a/port/cpl_vsil_cache.cpp +++ b/port/cpl_vsil_cache.cpp @@ -83,6 +83,7 @@ class VSICachedFile final : public VSIVirtualHandle m_oCache; // can only been initialized in constructor bool m_bEOF = false; + bool m_bError = false; int Seek(vsi_l_offset nOffset, int nWhence) override; vsi_l_offset Tell() override; @@ -98,7 +99,9 @@ class VSICachedFile final : public VSIVirtualHandle } size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; + void ClearErr() override; int Eof() override; + int Error() override; int Flush() override; int Close() override; @@ -241,6 +244,8 @@ bool VSICachedFile::LoadBlocks(vsi_l_offset nStartBlock, size_t nBlockCount, m_poBase->Read(oData.data(), 1, m_nChunkSize); if (nDataRead == 0) return false; + if (nDataRead < m_nChunkSize && m_poBase->Error()) + m_bError = true; oData.resize(nDataRead); m_oCache.insert(nStartBlock, std::move(oData)); @@ -292,11 +297,13 @@ bool VSICachedFile::LoadBlocks(vsi_l_offset nStartBlock, size_t nBlockCount, /* Read the whole request into the working buffer. */ /* -------------------------------------------------------------------- */ - const size_t nDataRead = - m_poBase->Read(pabyWorkBuffer, 1, nBlockCount * m_nChunkSize); + const size_t nToRead = nBlockCount * m_nChunkSize; + const size_t nDataRead = m_poBase->Read(pabyWorkBuffer, 1, nToRead); + if (nDataRead < nToRead && m_poBase->Error()) + m_bError = true; bool ret = true; - if (nBlockCount * m_nChunkSize > nDataRead + m_nChunkSize - 1) + if (nToRead > nDataRead + m_nChunkSize - 1) { size_t nNewBlockCount = (nDataRead + m_nChunkSize - 1) / m_nChunkSize; if (nNewBlockCount < nBlockCount) @@ -429,7 +436,7 @@ size_t VSICachedFile::Read(void *pBuffer, size_t nSize, size_t nCount) m_nOffset += nAmountCopied; const size_t nRet = nAmountCopied / nSize; - if (nRet != nCount) + if (nRet != nCount && !m_bError) m_bEOF = true; return nRet; } @@ -456,6 +463,28 @@ size_t VSICachedFile::Write(const void * /* pBuffer */, size_t /*nSize */, return 0; } +/************************************************************************/ +/* ClearErr() */ +/************************************************************************/ + +void VSICachedFile::ClearErr() + +{ + m_poBase->ClearErr(); + m_bEOF = false; + m_bError = false; +} + +/************************************************************************/ +/* Error() */ +/************************************************************************/ + +int VSICachedFile::Error() + +{ + return m_bError; +} + /************************************************************************/ /* Eof() */ /************************************************************************/ diff --git a/port/cpl_vsil_crypt.cpp b/port/cpl_vsil_crypt.cpp index 9497918b6247..7f4b931165fa 100644 --- a/port/cpl_vsil_crypt.cpp +++ b/port/cpl_vsil_crypt.cpp @@ -764,6 +764,7 @@ class VSICryptFileHandle final : public VSIVirtualHandle bool bUpdateHeader = false; vsi_l_offset nCurPos = 0; bool bEOF = false; + bool bError = false; CryptoPP::BlockCipher *poEncCipher = nullptr; CryptoPP::BlockCipher *poDecCipher = nullptr; @@ -793,6 +794,8 @@ class VSICryptFileHandle final : public VSIVirtualHandle size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; int Eof() override; + int Error() override; + void ClearErr() override; int Flush() override; int Close() override; int Truncate(vsi_l_offset nNewSize) override; @@ -1091,6 +1094,7 @@ size_t VSICryptFileHandle::Read(void *pBuffer, size_t nSize, size_t nMemb) if ((nPerms & VSICRYPT_READ) == 0) { + bError = true; return 0; } @@ -1131,11 +1135,13 @@ size_t VSICryptFileHandle::Read(void *pBuffer, size_t nSize, size_t nMemb) poBaseHandle->Seek(poHeader->nHeaderSize + nSectorOffset, SEEK_SET); if (poBaseHandle->Read(pabyWB, poHeader->nSectorSize, 1) != 1) { - bEOF = true; + bEOF = poBaseHandle->Eof(); + bError = poBaseHandle->Error(); break; } if (!DecryptBlock(pabyWB, nSectorOffset)) { + bError = true; break; } if ((nPerms & VSICRYPT_WRITE) && @@ -1357,6 +1363,32 @@ int VSICryptFileHandle::Eof() return bEOF; } +/************************************************************************/ +/* Error() */ +/************************************************************************/ + +int VSICryptFileHandle::Error() +{ +#ifdef VERBOSE_VSICRYPT + CPLDebug("VSICRYPT", "Error() = %d", static_cast(bError)); +#endif + return bError; +} + +/************************************************************************/ +/* ClearErr() */ +/************************************************************************/ + +void VSICryptFileHandle::ClearErr() +{ +#ifdef VERBOSE_VSICRYPT + CPLDebug("VSICRYPT", "ClearErr()"); +#endif + bEOF = false; + bError = false; + poBaseHandle->ClearErr(); +} + /************************************************************************/ /* Flush() */ /************************************************************************/ diff --git a/port/cpl_vsil_curl.cpp b/port/cpl_vsil_curl.cpp index 65fbb68b1e1c..32412dd551a4 100644 --- a/port/cpl_vsil_curl.cpp +++ b/port/cpl_vsil_curl.cpp @@ -2299,7 +2299,7 @@ size_t VSICurlHandle::Read(void *const pBufferIn, size_t const nSize, if (osRegion.empty()) { if (!bInterrupted) - bEOF = true; + bError = true; return 0; } } @@ -3505,12 +3505,34 @@ size_t VSICurlHandle::Write(const void * /* pBuffer */, size_t /* nSize */, } /************************************************************************/ -/* Eof() */ +/* ClearErr() */ +/************************************************************************/ + +void VSICurlHandle::ClearErr() + +{ + bEOF = false; + bError = false; +} + +/************************************************************************/ +/* Error() */ +/************************************************************************/ + +int VSICurlHandle::Error() + +{ + return bError ? TRUE : FALSE; +} + +/************************************************************************/ +/* Eof() */ /************************************************************************/ int VSICurlHandle::Eof() + { - return bEOF; + return bEOF ? TRUE : FALSE; } /************************************************************************/ @@ -5503,15 +5525,6 @@ size_t VSIAppendWriteHandle::Write(const void *pBuffer, size_t nSize, return nMemb; } -/************************************************************************/ -/* Eof() */ -/************************************************************************/ - -int VSIAppendWriteHandle::Eof() -{ - return FALSE; -} - /************************************************************************/ /* Close() */ /************************************************************************/ diff --git a/port/cpl_vsil_curl_class.h b/port/cpl_vsil_curl_class.h index 99752bd2e4ee..ad7e6bae69f3 100644 --- a/port/cpl_vsil_curl_class.h +++ b/port/cpl_vsil_curl_class.h @@ -405,6 +405,7 @@ class VSICurlHandle : public VSIVirtualHandle vsi_l_offset curOffset = 0; bool bEOF = false; + bool bError = false; virtual std::string DownloadRegion(vsi_l_offset startOffset, int nBlocks); @@ -491,7 +492,9 @@ class VSICurlHandle : public VSIVirtualHandle const vsi_l_offset *panOffsets, const size_t *panSizes) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; + void ClearErr() override; int Eof() override; + int Error() override; int Flush() override; int Close() override; @@ -811,7 +814,21 @@ class VSIS3LikeWriteHandle final : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; - int Eof() override; + + void ClearErr() override + { + } + + int Error() override + { + return FALSE; + } + + int Eof() override + { + return FALSE; + } + int Close() override; bool IsOK() @@ -855,7 +872,21 @@ class VSIAppendWriteHandle : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; - int Eof() override; + + void ClearErr() override + { + } + + int Error() override + { + return FALSE; + } + + int Eof() override + { + return FALSE; + } + int Close() override; bool IsOK() diff --git a/port/cpl_vsil_curl_streaming.cpp b/port/cpl_vsil_curl_streaming.cpp index bbc213a5b86a..8ca50cf7f21f 100644 --- a/port/cpl_vsil_curl_streaming.cpp +++ b/port/cpl_vsil_curl_streaming.cpp @@ -280,6 +280,7 @@ class VSICurlStreamingHandle : public VSIVirtualHandle vsi_l_offset nCandidateFileSize = 0; bool bEOF = false; + bool m_bError = false; size_t nCachedSize = 0; GByte *pCachedData = nullptr; @@ -302,7 +303,7 @@ class VSICurlStreamingHandle : public VSIVirtualHandle vsi_l_offset nBodySize = 0; int nHTTPCode = 0; char m_szCurlErrBuf[CURL_ERROR_SIZE + 1]; - bool m_bErrorOccurred = false; + bool m_bErrorOccurredInThread = false; void AcquireMutex(); void ReleaseMutex(); @@ -344,6 +345,8 @@ class VSICurlStreamingHandle : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; + void ClearErr() override; + int Error() override; int Eof() override; int Flush() override; int Close() override; @@ -1056,8 +1059,8 @@ void VSICurlStreamingHandle::DownloadInThread() unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, nullptr); AcquireMutex(); - m_bErrorOccurred = eRet != CURLE_OK; - if (m_bErrorOccurred) + m_bErrorOccurredInThread = eRet != CURLE_OK; + if (m_bErrorOccurredInThread) { // For autotest purposes only ! const char *pszSimulatedCurlError = CPLGetConfigOption( @@ -1109,7 +1112,7 @@ void VSICurlStreamingHandle::StartDownload() oRingBuffer.Reset(); bDownloadInProgress = TRUE; nRingBufferFileOffset = 0; - m_bErrorOccurred = false; + m_bErrorOccurredInThread = false; hThread = CPLCreateJoinableThread(VSICurlDownloadInThread, this); } @@ -1142,7 +1145,7 @@ void VSICurlStreamingHandle::StopDownload() oRingBuffer.Reset(); bDownloadStopped = FALSE; - m_bErrorOccurred = false; + m_bErrorOccurredInThread = false; nRingBufferFileOffset = 0; bEOF = false; } @@ -1253,9 +1256,9 @@ size_t VSICurlStreamingHandle::Read(void *const pBuffer, size_t const nSize, bEOF = true; } - // Has a Seek() being done since the last Read()? bool bErrorOccurred = false; + // Has a Seek() being done since the last Read()? if (!bEOF && nRemaining > 0 && curOffset != nRingBufferFileOffset) { // Backward seek: Need to restart the download from the beginning. @@ -1301,7 +1304,7 @@ size_t VSICurlStreamingHandle::Read(void *const pBuffer, size_t const nSize, while (oRingBuffer.GetSize() == 0 && bDownloadInProgress) CPLCondWait(hCondProducer, hRingBufferMutex); const int bBufferEmpty = (oRingBuffer.GetSize() == 0); - bErrorOccurred = m_bErrorOccurred; + bErrorOccurred = m_bErrorOccurredInThread; ReleaseMutex(); if (bBufferEmpty && !bDownloadInProgress) @@ -1355,7 +1358,7 @@ size_t VSICurlStreamingHandle::Read(void *const pBuffer, size_t const nSize, while (oRingBuffer.GetSize() == 0 && bDownloadInProgress) CPLCondWait(hCondProducer, hRingBufferMutex); const bool bBufferEmpty = oRingBuffer.GetSize() == 0; - bErrorOccurred = m_bErrorOccurred; + bErrorOccurred = m_bErrorOccurredInThread; ReleaseMutex(); if (bBufferEmpty && !bDownloadInProgress) @@ -1450,6 +1453,9 @@ size_t VSICurlStreamingHandle::Read(void *const pBuffer, size_t const nSize, } } + if (bErrorOccurred) + m_bError = true; + return nRet; } @@ -1499,6 +1505,26 @@ int VSICurlStreamingHandle::Eof() return bEOF; } +/************************************************************************/ +/* Error() */ +/************************************************************************/ + +int VSICurlStreamingHandle::Error() + +{ + return m_bError; +} + +/************************************************************************/ +/* ClearErr() */ +/************************************************************************/ + +void VSICurlStreamingHandle::ClearErr() +{ + bEOF = false; + m_bError = false; +} + /************************************************************************/ /* Flush() */ /************************************************************************/ diff --git a/port/cpl_vsil_gzip.cpp b/port/cpl_vsil_gzip.cpp index f9a6df9501e3..ec5f111fd654 100644 --- a/port/cpl_vsil_gzip.cpp +++ b/port/cpl_vsil_gzip.cpp @@ -215,7 +215,9 @@ class VSIGZipHandle final : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; + void ClearErr() override; int Eof() override; + int Error() override; int Flush() override; int Close() override; @@ -323,7 +325,9 @@ class VSIDeflate64Handle final : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; + void ClearErr() override; int Eof() override; + int Error() override; int Flush() override; int Close() override; @@ -1052,7 +1056,7 @@ size_t VSIGZipHandle::Read(void *const buf, size_t const nSize, static_cast(stream.avail_out))); const uInt nReadFromFile = static_cast( m_poBaseHandle->Read(next_out, 1, nToRead)); - if (nReadFromFile < nToRead && !m_poBaseHandle->Eof()) + if (nReadFromFile < nToRead && m_poBaseHandle->Error()) z_err = Z_ERRNO; stream.avail_out -= nReadFromFile; nRead += nReadFromFile; @@ -1137,7 +1141,8 @@ size_t VSIGZipHandle::Read(void *const buf, size_t const nSize, if (stream.avail_in == 0) { z_eof = 1; - if (m_poBaseHandle->Tell() != offsetEndCompressedData) + if (m_poBaseHandle->Error() || + m_poBaseHandle->Tell() != offsetEndCompressedData) { z_err = Z_ERRNO; break; @@ -1207,7 +1212,6 @@ size_t VSIGZipHandle::Read(void *const buf, size_t const nSize, size_t ret = (len - stream.avail_out) / nSize; if (z_err != Z_OK && z_err != Z_STREAM_END) { - m_bEOF = true; // wrong... CPLError(CE_Failure, CPLE_AppDefined, "In file %s, at line %d, decompression failed with " "z_err = %d, return = %d", @@ -1270,6 +1274,30 @@ int VSIGZipHandle::Eof() return m_bEOF; } +/************************************************************************/ +/* Error() */ +/************************************************************************/ + +int VSIGZipHandle::Error() +{ +#ifdef ENABLE_DEBUG + CPLDebug("GZIP", "Error()"); +#endif + return z_err != Z_OK && z_err != Z_STREAM_END; +} + +/************************************************************************/ +/* ClearErr() */ +/************************************************************************/ + +void VSIGZipHandle::ClearErr() +{ + m_poBaseHandle->ClearErr(); + z_eof = 0; + m_bEOF = false; + z_err = Z_OK; +} + /************************************************************************/ /* Flush() */ /************************************************************************/ @@ -1767,7 +1795,8 @@ size_t VSIDeflate64Handle::Read(void *const buf, size_t const nSize, if (stream.avail_in == 0) { z_eof = 1; - if (m_poBaseHandle->Tell() != offsetEndCompressedData) + if (m_poBaseHandle->Error() || + m_poBaseHandle->Tell() != offsetEndCompressedData) { z_err = Z_ERRNO; break; @@ -1876,7 +1905,6 @@ size_t VSIDeflate64Handle::Read(void *const buf, size_t const nSize, size_t ret = (len - stream.avail_out) / nSize; if (z_err != Z_OK && z_err != Z_STREAM_END) { - m_bEOF = true; // Wrong... CPLError(CE_Failure, CPLE_AppDefined, "In file %s, at line %d, decompression failed with " "z_err = %d, return = %d", @@ -1918,6 +1946,30 @@ int VSIDeflate64Handle::Eof() return m_bEOF; } +/************************************************************************/ +/* Error() */ +/************************************************************************/ + +int VSIDeflate64Handle::Error() +{ +#ifdef ENABLE_DEBUG + CPLDebug("GZIP", "Error()"); +#endif + return z_err != Z_OK && z_err != Z_STREAM_END; +} + +/************************************************************************/ +/* ClearErr() */ +/************************************************************************/ + +void VSIDeflate64Handle::ClearErr() +{ + m_poBaseHandle->ClearErr(); + z_eof = 0; + m_bEOF = false; + z_err = Z_OK; +} + /************************************************************************/ /* Flush() */ /************************************************************************/ @@ -2002,7 +2054,21 @@ class VSIGZipWriteHandleMT final : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; - int Eof() override; + + int Eof() override + { + return 0; + } + + int Error() override + { + return 0; + } + + void ClearErr() override + { + } + int Flush() override; int Close() override; }; @@ -2542,16 +2608,6 @@ int VSIGZipWriteHandleMT::Flush() return 0; } -/************************************************************************/ -/* Eof() */ -/************************************************************************/ - -int VSIGZipWriteHandleMT::Eof() - -{ - return 1; -} - /************************************************************************/ /* Seek() */ /************************************************************************/ @@ -2612,7 +2668,21 @@ class VSIGZipWriteHandle final : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; - int Eof() override; + + int Eof() override + { + return 0; + } + + int Error() override + { + return 0; + } + + void ClearErr() override + { + } + int Flush() override; int Close() override; }; @@ -2860,16 +2930,6 @@ int VSIGZipWriteHandle::Flush() return 0; } -/************************************************************************/ -/* Eof() */ -/************************************************************************/ - -int VSIGZipWriteHandle::Eof() - -{ - return 1; -} - /************************************************************************/ /* Seek() */ /************************************************************************/ @@ -3572,7 +3632,21 @@ class VSIZipWriteHandle final : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; - int Eof() override; + + int Eof() override + { + return 0; + } + + int Error() override + { + return 0; + } + + void ClearErr() override + { + } + int Flush() override; int Close() override; @@ -3681,6 +3755,7 @@ class VSISOZipHandle final : public VSIVirtualHandle uint32_t nToSkip_; uint32_t nChunkSize_; bool bEOF_ = false; + bool bError_ = false; vsi_l_offset nCurPos_ = 0; bool bOK_ = true; #ifdef HAVE_LIBDEFLATE @@ -3718,6 +3793,17 @@ class VSISOZipHandle final : public VSIVirtualHandle return bEOF_; } + virtual int Error() override + { + return bError_; + } + + virtual void ClearErr() override + { + bEOF_ = false; + bError_ = false; + } + virtual int Close() override; bool IsOK() const @@ -3812,11 +3898,13 @@ size_t VSISOZipHandle::Read(void *pBuffer, size_t nSize, size_t nCount) if (nSize != 1) { + bError_ = true; CPLError(CE_Failure, CPLE_NotSupported, "Unsupported nSize"); return 0; } if ((nCurPos_ % nChunkSize_) != 0) { + bError_ = true; CPLError(CE_Failure, CPLE_NotSupported, "nCurPos is not a multiple of nChunkSize"); return 0; @@ -3828,6 +3916,7 @@ size_t VSISOZipHandle::Read(void *pBuffer, size_t nSize, size_t nCount) } else if ((nToRead % nChunkSize_) != 0) { + bError_ = true; CPLError(CE_Failure, CPLE_NotSupported, "nToRead is not a multiple of nChunkSize"); return 0; @@ -3860,6 +3949,7 @@ size_t VSISOZipHandle::Read(void *pBuffer, size_t nSize, size_t nCount) ReadOffsetInCompressedStream(nCurPos_ / nChunkSize_); if (nOffsetInCompressedStream == static_cast(-1)) { + bError_ = true; CPLError(CE_Failure, CPLE_AppDefined, "Cannot read nOffsetInCompressedStream"); return 0; @@ -3868,6 +3958,7 @@ size_t VSISOZipHandle::Read(void *pBuffer, size_t nSize, size_t nCount) ReadOffsetInCompressedStream(1 + nCurPos_ / nChunkSize_); if (nNextOffsetInCompressedStream == static_cast(-1)) { + bError_ = true; CPLError(CE_Failure, CPLE_AppDefined, "Cannot read nNextOffsetInCompressedStream"); return 0; @@ -3878,6 +3969,7 @@ size_t VSISOZipHandle::Read(void *pBuffer, size_t nSize, size_t nCount) 13 + 2 * nChunkSize_ || nNextOffsetInCompressedStream > compressed_size_) { + bError_ = true; CPLError( CE_Failure, CPLE_AppDefined, "Invalid values for nOffsetInCompressedStream (" CPL_FRMT_GUIB @@ -3894,7 +3986,10 @@ size_t VSISOZipHandle::Read(void *pBuffer, size_t nSize, size_t nCount) if (poBaseHandle_->Seek( nPosCompressedStream_ + nOffsetInCompressedStream, SEEK_SET) != 0) + { + bError_ = true; return 0; + } const int nCompressedToRead = static_cast( nNextOffsetInCompressedStream - nOffsetInCompressedStream); @@ -3902,7 +3997,10 @@ size_t VSISOZipHandle::Read(void *pBuffer, size_t nSize, size_t nCount) std::vector abyCompressedData(nCompressedToRead); if (poBaseHandle_->Read(&abyCompressedData[0], nCompressedToRead, 1) != 1) + { + bError_ = true; return 0; + } size_t nToReadThisIter = std::min(nToRead, static_cast(nChunkSize_)); @@ -3923,6 +4021,7 @@ size_t VSISOZipHandle::Read(void *pBuffer, size_t nSize, size_t nCount) static_cast(pBuffer) + nOffsetInOutputBuffer, nToReadThisIter, &nOut) != LIBDEFLATE_SUCCESS) { + bError_ = true; CPLError( CE_Failure, CPLE_AppDefined, "libdeflate_deflate_decompress() failed at pos " CPL_FRMT_GUIB, @@ -3931,6 +4030,7 @@ size_t VSISOZipHandle::Read(void *pBuffer, size_t nSize, size_t nCount) } if (nOut != nToReadThisIter) { + bError_ = true; CPLError(CE_Failure, CPLE_AppDefined, "Only %u bytes decompressed at pos " CPL_FRMT_GUIB " whereas %u where expected", @@ -3949,6 +4049,7 @@ size_t VSISOZipHandle::Read(void *pBuffer, size_t nSize, size_t nCount) int err = inflate(&sStream_, Z_FINISH); if ((err != Z_OK && err != Z_STREAM_END)) { + bError_ = true; CPLError(CE_Failure, CPLE_AppDefined, "inflate() failed at pos " CPL_FRMT_GUIB, static_cast(nCurPos_)); @@ -3959,6 +4060,7 @@ size_t VSISOZipHandle::Read(void *pBuffer, size_t nSize, size_t nCount) CPLDebug("VSIZIP", "avail_in = %d", sStream_.avail_in); if (sStream_.avail_out != 0) { + bError_ = true; CPLError( CE_Failure, CPLE_AppDefined, "Only %u bytes decompressed at pos " CPL_FRMT_GUIB @@ -4780,17 +4882,6 @@ size_t VSIZipWriteHandle::Write(const void *pBuffer, size_t nSize, size_t nMemb) return nMemb; } -/************************************************************************/ -/* Eof() */ -/************************************************************************/ - -int VSIZipWriteHandle::Eof() -{ - CPLError(CE_Failure, CPLE_NotSupported, - "VSIFEofL() is not supported on writable Zip files"); - return FALSE; -} - /************************************************************************/ /* Flush() */ /************************************************************************/ diff --git a/port/cpl_vsil_libarchive.cpp b/port/cpl_vsil_libarchive.cpp index 96efb4863fa4..dadf5268a0ea 100644 --- a/port/cpl_vsil_libarchive.cpp +++ b/port/cpl_vsil_libarchive.cpp @@ -394,11 +394,22 @@ class VSILibArchiveHandler final : public VSIVirtualHandle return 0; } + virtual void ClearErr() override + { + m_bEOF = false; + m_bError = false; + } + virtual int Eof() override { return m_bEOF ? 1 : 0; } + virtual int Error() override + { + return m_bError ? 1 : 0; + } + virtual int Close() override { return 0; @@ -422,7 +433,12 @@ size_t VSILibArchiveHandler::Read(void *pBuffer, size_t nSize, size_t nCount) auto nRead = static_cast( archive_read_data(m_poReader->GetArchiveHandler(), pBuffer, nToRead)); if (nRead < nToRead) - m_bEOF = true; + { + if (m_nOffset + nRead == m_poReader->GetFileSize()) + m_bEOF = true; + else + m_bError = true; + } m_nOffset += nRead; return nRead / nSize; } diff --git a/port/cpl_vsil_plugin.cpp b/port/cpl_vsil_plugin.cpp index 32c5013bf082..774e87a611d9 100644 --- a/port/cpl_vsil_plugin.cpp +++ b/port/cpl_vsil_plugin.cpp @@ -70,6 +70,16 @@ int VSIPluginHandle::Eof() return poFS->Eof(cbData); } +int VSIPluginHandle::Error() +{ + return poFS->Error(cbData); +} + +void VSIPluginHandle::ClearErr() +{ + poFS->ClearErr(cbData); +} + int VSIPluginHandle::Close() { int ret = poFS->Close(cbData); @@ -359,6 +369,28 @@ int VSIPluginFilesystemHandler::Eof(void *pFile) return -1; } +int VSIPluginFilesystemHandler::Error(void *pFile) +{ + if (m_cb->error) + { + return m_cb->error(pFile); + } + CPLDebug("CPL", "Error() not implemented for %s plugin", m_Prefix); + return 0; +} + +void VSIPluginFilesystemHandler::ClearErr(void *pFile) +{ + if (m_cb->clear_err) + { + m_cb->clear_err(pFile); + } + else + { + CPLDebug("CPL", "ClearErr() not implemented for %s plugin", m_Prefix); + } +} + int VSIPluginFilesystemHandler::Close(void *pFile) { if (m_cb->close != nullptr) diff --git a/port/cpl_vsil_plugin.h b/port/cpl_vsil_plugin.h index a6990f56be94..dd9c0ca7dd25 100644 --- a/port/cpl_vsil_plugin.h +++ b/port/cpl_vsil_plugin.h @@ -70,6 +70,8 @@ class VSIPluginFilesystemHandler : public VSIFilesystemHandler VSIRangeStatus GetRangeStatus(void *pFile, vsi_l_offset nOffset, vsi_l_offset nLength); int Eof(void *pFile); + int Error(void *pFile); + void ClearErr(void *pFile); size_t Write(void *pFile, const void *pBuffer, size_t nSize, size_t nCount); int Flush(void *pFile); int Truncate(void *pFile, vsi_l_offset nNewSize); @@ -121,7 +123,9 @@ class VSIPluginHandle : public VSIVirtualHandle const size_t *panSizes) override; VSIRangeStatus GetRangeStatus(vsi_l_offset nOffset, vsi_l_offset nLength) override; + void ClearErr() override; int Eof() override; + int Error() override; size_t Write(const void *pBuffer, size_t nSize, size_t nCount) override; int Flush() override; int Truncate(vsi_l_offset nNewSize) override; diff --git a/port/cpl_vsil_s3.cpp b/port/cpl_vsil_s3.cpp index a265eea4d9f9..476712a5c6a9 100644 --- a/port/cpl_vsil_s3.cpp +++ b/port/cpl_vsil_s3.cpp @@ -1516,15 +1516,6 @@ size_t VSIS3LikeWriteHandle::Write(const void *pBuffer, size_t nSize, return nMemb; } -/************************************************************************/ -/* Eof() */ -/************************************************************************/ - -int VSIS3LikeWriteHandle::Eof() -{ - return FALSE; -} - /************************************************************************/ /* InvalidateParentDirectory() */ /************************************************************************/ diff --git a/port/cpl_vsil_sparsefile.cpp b/port/cpl_vsil_sparsefile.cpp index 66ab0b41e773..2dfe2f4349ce 100644 --- a/port/cpl_vsil_sparsefile.cpp +++ b/port/cpl_vsil_sparsefile.cpp @@ -77,6 +77,7 @@ class VSISparseFileHandle : public VSIVirtualHandle VSISparseFileFilesystemHandler *m_poFS = nullptr; bool bEOF = false; + bool bError = false; public: explicit VSISparseFileHandle(VSISparseFileFilesystemHandler *poFS) @@ -93,7 +94,9 @@ class VSISparseFileHandle : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; + void ClearErr() override; int Eof() override; + int Error() override; int Close() override; }; @@ -277,6 +280,11 @@ size_t VSISparseFileHandle::Read(void *pBuffer, size_t nSize, size_t nCount) 1, nExtraBytes); nCurOffset = nCurOffsetSave; bEOF = bEOFSave; + if (nBytesRead < nExtraBytes) + { + // A short read in a region of a sparse file is always an error + bError = true; + } nBytesReturnCount += nBytesRead; nBytesRequested -= nExtraBytes; @@ -313,6 +321,7 @@ size_t VSISparseFileHandle::Read(void *pBuffer, size_t nSize, size_t nCount) } if (aoRegions[iRegion].fp == nullptr) { + bError = true; return 0; } } @@ -321,13 +330,21 @@ size_t VSISparseFileHandle::Read(void *pBuffer, size_t nSize, size_t nCount) nCurOffset - aoRegions[iRegion].nDstOffset + aoRegions[iRegion].nSrcOffset, SEEK_SET) != 0) + { + bError = true; return 0; + } m_poFS->IncRecCounter(); const size_t nBytesRead = VSIFReadL(pBuffer, 1, static_cast(nBytesRequested), aoRegions[iRegion].fp); m_poFS->DecRecCounter(); + if (nBytesRead < static_cast(nBytesRequested)) + { + // A short read in a region of a sparse file is always an error + bError = true; + } nBytesReturnCount += nBytesRead; } @@ -358,6 +375,32 @@ int VSISparseFileHandle::Eof() return bEOF ? 1 : 0; } +/************************************************************************/ +/* Error() */ +/************************************************************************/ + +int VSISparseFileHandle::Error() + +{ + return bError ? 1 : 0; +} + +/************************************************************************/ +/* ClearErr() */ +/************************************************************************/ + +void VSISparseFileHandle::ClearErr() + +{ + for (const auto ®ion : aoRegions) + { + if (region.fp) + region.fp->ClearErr(); + } + bEOF = false; + bError = false; +} + /************************************************************************/ /* ==================================================================== */ /* VSISparseFileFilesystemHandler */ diff --git a/port/cpl_vsil_stdin.cpp b/port/cpl_vsil_stdin.cpp index 4eae39656193..cb64c1f98667 100644 --- a/port/cpl_vsil_stdin.cpp +++ b/port/cpl_vsil_stdin.cpp @@ -62,6 +62,7 @@ static size_t gnBufferAlloc = 0; // current allocation static size_t gnBufferLen = 0; // number of valid bytes in gpabyBuffer static uint64_t gnRealPos = 0; // current offset on stdin static bool gbHasSoughtToEnd = false; +static bool gbHasErrored = false; static uint64_t gnFileSize = 0; /************************************************************************/ @@ -126,6 +127,7 @@ class VSIStdinHandle final : public VSIVirtualHandle CPL_DISALLOW_COPY_ASSIGN(VSIStdinHandle) bool m_bEOF = false; + bool m_bError = false; uint64_t m_nCurOff = 0; size_t ReadAndCache(void *pBuffer, size_t nToRead); @@ -141,6 +143,8 @@ class VSIStdinHandle final : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; + void ClearErr() override; + int Error() override; int Eof() override; int Close() override; }; @@ -192,8 +196,10 @@ size_t VSIStdinHandle::ReadAndCache(void *pUserBuffer, size_t nToRead) if (nRead < nToRead) { - gnFileSize = gnRealPos; - gbHasSoughtToEnd = true; + gbHasSoughtToEnd = feof(gStdinFile); + if (gbHasSoughtToEnd) + gnFileSize = gnRealPos; + gbHasErrored = ferror(gStdinFile); } return nRead; @@ -338,13 +344,15 @@ size_t VSIStdinHandle::Read(void *pBuffer, size_t nSize, size_t nCount) const size_t nRead = ReadAndCache(static_cast(pBuffer) + nAlreadyCached, nBytesToRead - nAlreadyCached); - m_bEOF = nRead < nBytesToRead - nAlreadyCached; + m_bEOF = gbHasSoughtToEnd; + m_bError = gbHasErrored; return (nRead + nAlreadyCached) / nSize; } const size_t nRead = ReadAndCache(pBuffer, nBytesToRead); - m_bEOF = nRead < nBytesToRead; + m_bEOF = gbHasSoughtToEnd; + m_bError = gbHasErrored; return nRead / nSize; } @@ -359,6 +367,28 @@ size_t VSIStdinHandle::Write(const void * /* pBuffer */, size_t /* nSize */, return 0; } +/************************************************************************/ +/* ClearErr() */ +/************************************************************************/ + +void VSIStdinHandle::ClearErr() + +{ + clearerr(gStdinFile); + m_bEOF = false; + m_bError = false; +} + +/************************************************************************/ +/* Error() */ +/************************************************************************/ + +int VSIStdinHandle::Error() + +{ + return m_bError; +} + /************************************************************************/ /* Eof() */ /************************************************************************/ @@ -386,6 +416,7 @@ int VSIStdinHandle::Close() gnRealPos = ftell(stdin); gnBufferLen = 0; gbHasSoughtToEnd = false; + gbHasErrored = false; gnFileSize = 0; } return 0; diff --git a/port/cpl_vsil_stdout.cpp b/port/cpl_vsil_stdout.cpp index 30ca43d6ba8e..b17e7b4410e5 100644 --- a/port/cpl_vsil_stdout.cpp +++ b/port/cpl_vsil_stdout.cpp @@ -103,6 +103,7 @@ class VSIStdoutHandle final : public VSIVirtualHandle CPL_DISALLOW_COPY_ASSIGN(VSIStdoutHandle) vsi_l_offset m_nOffset = 0; + bool m_bError = false; public: VSIStdoutHandle() = default; @@ -112,7 +113,22 @@ class VSIStdoutHandle final : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; - int Eof() override; + + void ClearErr() override + { + m_bError = false; + } + + int Error() override + { + return m_bError; + } + + int Eof() override + { + return FALSE; + } + int Flush() override; int Close() override; }; @@ -158,10 +174,14 @@ int VSIStdoutHandle::Flush() /* Read() */ /************************************************************************/ -size_t VSIStdoutHandle::Read(void * /* pBuffer */, size_t /* nSize */, - size_t /* nCount */) +size_t VSIStdoutHandle::Read(void * /* pBuffer */, size_t nSize, size_t nCount) { - CPLError(CE_Failure, CPLE_NotSupported, "Read() unsupported on /vsistdout"); + if (nSize > 0 && nCount > 0) + { + CPLError(CE_Failure, CPLE_NotSupported, + "Read() unsupported on /vsistdout"); + m_bError = true; + } return 0; } @@ -177,16 +197,6 @@ size_t VSIStdoutHandle::Write(const void *pBuffer, size_t nSize, size_t nCount) return nRet; } -/************************************************************************/ -/* Eof() */ -/************************************************************************/ - -int VSIStdoutHandle::Eof() - -{ - return 0; -} - /************************************************************************/ /* Close() */ /************************************************************************/ @@ -270,6 +280,7 @@ class VSIStdoutRedirectFilesystemHandler final : public VSIFilesystemHandler class VSIStdoutRedirectHandle final : public VSIVirtualHandle { VSIVirtualHandle *m_poHandle = nullptr; + bool m_bError = false; CPL_DISALLOW_COPY_ASSIGN(VSIStdoutRedirectHandle) @@ -281,7 +292,22 @@ class VSIStdoutRedirectHandle final : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; - int Eof() override; + + void ClearErr() override + { + m_bError = false; + } + + int Error() override + { + return m_bError; + } + + int Eof() override + { + return FALSE; + } + int Flush() override; int Close() override; }; @@ -338,11 +364,15 @@ int VSIStdoutRedirectHandle::Flush() /* Read() */ /************************************************************************/ -size_t VSIStdoutRedirectHandle::Read(void * /* pBuffer */, size_t /* nSize */, - size_t /* nCount */) +size_t VSIStdoutRedirectHandle::Read(void * /* pBuffer */, size_t nSize, + size_t nCount) { - CPLError(CE_Failure, CPLE_NotSupported, - "Read() unsupported on /vsistdout_redirect"); + if (nSize > 0 && nCount > 0) + { + CPLError(CE_Failure, CPLE_NotSupported, + "Read() unsupported on /vsistdout"); + m_bError = true; + } return 0; } @@ -357,16 +387,6 @@ size_t VSIStdoutRedirectHandle::Write(const void *pBuffer, size_t nSize, return m_poHandle->Write(pBuffer, nSize, nCount); } -/************************************************************************/ -/* Eof() */ -/************************************************************************/ - -int VSIStdoutRedirectHandle::Eof() - -{ - return m_poHandle->Eof(); -} - /************************************************************************/ /* Close() */ /************************************************************************/ diff --git a/port/cpl_vsil_subfile.cpp b/port/cpl_vsil_subfile.cpp index a7aacfe06048..2e76b1da6272 100644 --- a/port/cpl_vsil_subfile.cpp +++ b/port/cpl_vsil_subfile.cpp @@ -58,6 +58,7 @@ class VSISubFileHandle final : public VSIVirtualHandle vsi_l_offset nSubregionOffset = 0; vsi_l_offset nSubregionSize = 0; bool bAtEOF = false; + bool bError = false; VSISubFileHandle() = default; ~VSISubFileHandle() override; @@ -66,7 +67,9 @@ class VSISubFileHandle final : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; + void ClearErr() override; int Eof() override; + int Error() override; int Close() override; }; @@ -216,7 +219,12 @@ size_t VSISubFileHandle::Read(void *pBuffer, size_t nSize, size_t nCount) } if (nRet < nCount) - bAtEOF = true; + { + if (fp->Eof()) + bAtEOF = true; + else /* if (fp->Error()) */ + bError = true; + } return nRet; } @@ -253,6 +261,28 @@ size_t VSISubFileHandle::Write(const void *pBuffer, size_t nSize, size_t nCount) return VSIFWriteL(pBuffer, nSize, nCount, fp); } +/************************************************************************/ +/* ClearErr() */ +/************************************************************************/ + +void VSISubFileHandle::ClearErr() + +{ + fp->ClearErr(); + bAtEOF = false; + bError = false; +} + +/************************************************************************/ +/* Error() */ +/************************************************************************/ + +int VSISubFileHandle::Error() + +{ + return bError; +} + /************************************************************************/ /* Eof() */ /************************************************************************/ diff --git a/port/cpl_vsil_unix_stdio_64.cpp b/port/cpl_vsil_unix_stdio_64.cpp index ba23969354b3..d370fe87239f 100644 --- a/port/cpl_vsil_unix_stdio_64.cpp +++ b/port/cpl_vsil_unix_stdio_64.cpp @@ -224,6 +224,7 @@ class VSIUnixStdioHandle final : public VSIVirtualHandle bool bLastOpWrite = false; bool bLastOpRead = false; bool bAtEOF = false; + bool bError = false; // In a+ mode, disable any optimization since the behavior of the file // pointer on Mac and other BSD system is to have a seek() to the end of // file and thus a call to our Seek(0, SEEK_SET) before a read will be a @@ -241,7 +242,9 @@ class VSIUnixStdioHandle final : public VSIVirtualHandle vsi_l_offset Tell() override; size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; + void ClearErr() override; int Eof() override; + int Error() override; int Flush() override; int Close() override; int Truncate(vsi_l_offset nNewSize) override; @@ -484,13 +487,20 @@ size_t VSIUnixStdioHandle::Read(void *pBuffer, size_t nSize, size_t nCount) if (nResult != nCount) { + if (ferror(fp)) + bError = true; + else + { + CPLAssert(feof(fp)); + bAtEOF = true; + } + errno = 0; vsi_l_offset nNewOffset = VSI_FTELL64(fp); if (errno == 0) // ftell() can fail if we are end of file with a pipe. m_nOffset = nNewOffset; else CPLDebug("VSI", "%s", VSIStrerror(errno)); - bAtEOF = CPL_TO_BOOL(feof(fp)); } return nResult; @@ -544,6 +554,28 @@ size_t VSIUnixStdioHandle::Write(const void *pBuffer, size_t nSize, return nResult; } +/************************************************************************/ +/* ClearErr() */ +/************************************************************************/ + +void VSIUnixStdioHandle::ClearErr() + +{ + clearerr(fp); + bAtEOF = false; + bError = false; +} + +/************************************************************************/ +/* Error() */ +/************************************************************************/ + +int VSIUnixStdioHandle::Error() + +{ + return bError ? TRUE : FALSE; +} + /************************************************************************/ /* Eof() */ /************************************************************************/ diff --git a/port/cpl_vsil_uploadonclose.cpp b/port/cpl_vsil_uploadonclose.cpp index ea57b0a6f06f..6700aa600458 100644 --- a/port/cpl_vsil_uploadonclose.cpp +++ b/port/cpl_vsil_uploadonclose.cpp @@ -75,6 +75,15 @@ class VSIUploadOnCloseHandle final : public VSIVirtualHandle return m_fpTemp->Write(pBuffer, nSize, nCount); } + void ClearErr() override + { + } + + int Error() override + { + return 0; + } + int Eof() override { return m_fpTemp->Eof(); diff --git a/port/cpl_vsil_win32.cpp b/port/cpl_vsil_win32.cpp index 794d3f775838..e3a3546770dd 100644 --- a/port/cpl_vsil_win32.cpp +++ b/port/cpl_vsil_win32.cpp @@ -108,27 +108,29 @@ class VSIWin32Handle final : public VSIVirtualHandle public: HANDLE hFile = nullptr; bool bEOF = false; + bool bError = false; bool m_bWriteThrough = false; VSIWin32Handle() = default; - virtual int Seek(vsi_l_offset nOffset, int nWhence) override; - virtual vsi_l_offset Tell() override; - virtual size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; - virtual size_t Write(const void *pBuffer, size_t nSize, - size_t nMemb) override; - virtual int Eof() override; - virtual int Flush() override; - virtual int Close() override; - virtual int Truncate(vsi_l_offset nNewSize) override; - - virtual void *GetNativeFileDescriptor() override + int Seek(vsi_l_offset nOffset, int nWhence) override; + vsi_l_offset Tell() override; + size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override; + size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override; + void ClearErr() override; + int Eof() override; + int Error() override; + int Flush() override; + int Close() override; + int Truncate(vsi_l_offset nNewSize) override; + + void *GetNativeFileDescriptor() override { return static_cast(hFile); } - virtual VSIRangeStatus GetRangeStatus(vsi_l_offset nOffset, - vsi_l_offset nLength) override; + VSIRangeStatus GetRangeStatus(vsi_l_offset nOffset, + vsi_l_offset nLength) override; }; /************************************************************************/ @@ -356,16 +358,20 @@ size_t VSIWin32Handle::Read(void *pBuffer, size_t nSize, size_t nCount) if (!ReadFile(hFile, pBuffer, static_cast(nSize * nCount), &dwSizeRead, nullptr)) { + bError = true; nResult = 0; errno = ErrnoFromGetLastError(); } - else if (nSize == 0) - nResult = 0; else - nResult = dwSizeRead / nSize; + { + if (nSize == 0) + nResult = 0; + else + nResult = dwSizeRead / nSize; - if (nResult != nCount) - bEOF = true; + if (nResult != nCount) + bEOF = true; + } return nResult; } @@ -396,6 +402,27 @@ size_t VSIWin32Handle::Write(const void *pBuffer, size_t nSize, size_t nCount) return nResult; } +/************************************************************************/ +/* ClearErr() */ +/************************************************************************/ + +void VSIWin32Handle::ClearErr() + +{ + bEOF = false; + bError = false; +} + +/************************************************************************/ +/* Error() */ +/************************************************************************/ + +int VSIWin32Handle::Error() + +{ + return bError ? TRUE : FALSE; +} + /************************************************************************/ /* Eof() */ /************************************************************************/ @@ -403,7 +430,7 @@ size_t VSIWin32Handle::Write(const void *pBuffer, size_t nSize, size_t nCount) int VSIWin32Handle::Eof() { - return bEOF; + return bEOF ? TRUE : FALSE; } /************************************************************************/ diff --git a/swig/include/cpl.i b/swig/include/cpl.i index d9574920e417..d4292154db83 100644 --- a/swig/include/cpl.i +++ b/swig/include/cpl.i @@ -902,6 +902,8 @@ VSILFILE *wrapper_VSIFOpenExL( const char *utf8_path, const char *pszMode, int %clear char **; int VSIFEofL( VSILFILE* fp ); +int VSIFErrorL( VSILFILE* fp ); +void VSIFClearErrL( VSILFILE* fp ); int VSIFFlushL( VSILFILE* fp ); VSI_RETVAL VSIFCloseL( VSILFILE* fp ); From 151a115be9e8fd6b759f0345d30279f45290d79e Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 15 May 2024 20:10:30 +0200 Subject: [PATCH 027/191] Use VSIFErrorL() in complement of VSIFEofL() --- frmts/mrsid/mrsidstream.cpp | 2 +- ogr/ogrsf_frmts/avc/avc_binwr.cpp | 2 +- ogr/ogrsf_frmts/cad/vsilfileio.cpp | 2 +- ogr/ogrsf_frmts/csv/ogrcsvlayer.cpp | 5 +++-- ogr/ogrsf_frmts/flatgeobuf/ogrflatgeobuflayer.cpp | 4 ++-- ogr/ogrsf_frmts/georss/ogrgeorssdatasource.cpp | 2 +- ogr/ogrsf_frmts/georss/ogrgeorsslayer.cpp | 7 ++++--- ogr/ogrsf_frmts/gml/gmlreader.cpp | 5 +++-- ogr/ogrsf_frmts/gpx/ogrgpxdatasource.cpp | 2 +- ogr/ogrsf_frmts/gpx/ogrgpxlayer.cpp | 6 +++--- ogr/ogrsf_frmts/jml/ogrjmllayer.cpp | 7 ++++--- ogr/ogrsf_frmts/kml/kml.cpp | 14 +++++++------- ogr/ogrsf_frmts/libkml/ogrlibkmlfeaturestyle.cpp | 2 +- ogr/ogrsf_frmts/libkml/ogrlibkmlstyle.cpp | 2 +- ogr/ogrsf_frmts/lvbag/ogrlvbaglayer.cpp | 2 +- ogr/ogrsf_frmts/mitab/mitab_utils.cpp | 2 +- ogr/ogrsf_frmts/ntf/ntfrecord.cpp | 2 +- ogr/ogrsf_frmts/ods/ogrodsdatasource.cpp | 4 ++-- ogr/ogrsf_frmts/osm/osm_parser.cpp | 2 +- ogr/ogrsf_frmts/selafin/ogrselafinlayer.cpp | 5 +++-- ogr/ogrsf_frmts/shape/ogrshapelayer.cpp | 15 +++++++++++---- ogr/ogrsf_frmts/svg/ogrsvgdatasource.cpp | 2 +- ogr/ogrsf_frmts/svg/ogrsvglayer.cpp | 7 ++++--- ogr/ogrsf_frmts/xlsx/ogrxlsxdatasource.cpp | 10 +++++----- 24 files changed, 63 insertions(+), 50 deletions(-) diff --git a/frmts/mrsid/mrsidstream.cpp b/frmts/mrsid/mrsidstream.cpp index cda4fe8173e4..51d84909f65b 100644 --- a/frmts/mrsid/mrsidstream.cpp +++ b/frmts/mrsid/mrsidstream.cpp @@ -110,7 +110,7 @@ bool LTIVSIStream::isEOF() CPLAssert(poFileHandle); errno = 0; - bool bIsEOF = (0 != poFileHandle->Eof()); + bool bIsEOF = (poFileHandle->Eof() != 0 || poFileHandle->Error() != 0); nError = errno; return bIsEOF; diff --git a/ogr/ogrsf_frmts/avc/avc_binwr.cpp b/ogr/ogrsf_frmts/avc/avc_binwr.cpp index 5febe68bc65a..6898eda2009d 100644 --- a/ogr/ogrsf_frmts/avc/avc_binwr.cpp +++ b/ogr/ogrsf_frmts/avc/avc_binwr.cpp @@ -1511,7 +1511,7 @@ static int _AVCBinWriteCreateArcDirEntry(const char *pszArcDirFile, if ((fp = VSIFOpenL(pszArcDirFile, "r")) != nullptr) { char buf[380]; - while (!VSIFEofL(fp)) + while (!VSIFEofL(fp) && !VSIFErrorL(fp)) { if (VSIFReadL(buf, 380, 1, fp) == 1) numDirEntries++; diff --git a/ogr/ogrsf_frmts/cad/vsilfileio.cpp b/ogr/ogrsf_frmts/cad/vsilfileio.cpp index 9f4f43570f78..30915328f2d1 100644 --- a/ogr/ogrsf_frmts/cad/vsilfileio.cpp +++ b/ogr/ogrsf_frmts/cad/vsilfileio.cpp @@ -49,7 +49,7 @@ const char *VSILFileIO::ReadLine() bool VSILFileIO::Eof() const { - return VSIFEofL(m_oFileStream) == 0 ? false : true; + return VSIFEofL(m_oFileStream) || VSIFErrorL(m_oFileStream); } bool VSILFileIO::Open(int mode) diff --git a/ogr/ogrsf_frmts/csv/ogrcsvlayer.cpp b/ogr/ogrsf_frmts/csv/ogrcsvlayer.cpp index ba3194e5e3ac..8dac065f30d3 100644 --- a/ogr/ogrsf_frmts/csv/ogrcsvlayer.cpp +++ b/ogr/ogrsf_frmts/csv/ogrcsvlayer.cpp @@ -49,6 +49,7 @@ #include "cpl_error.h" #include "cpl_string.h" #include "cpl_vsi.h" +#include "cpl_vsi_virtual.h" #include "ogr_api.h" #include "ogr_core.h" #include "ogr_feature.h" @@ -973,7 +974,7 @@ char **OGRCSVLayer::AutodetectFieldTypes(char **papszOpenOptions, std::vector anFieldPrecision(nFieldCount); int nStringFieldCount = 0; - while (!VSIFEofL(fp)) + while (!fp->Eof() && !fp->Error()) { char **papszTokens = CSVReadParseLine3L(fp, m_nMaxLineSize, szDelimiter, @@ -988,7 +989,7 @@ char **OGRCSVLayer::AutodetectFieldTypes(char **papszOpenOptions, if (bStreaming) { // Ignore last line if it is truncated. - if (VSIFEofL(fp) && nRead == static_cast(nRequested) && + if (fp->Eof() && nRead == static_cast(nRequested) && pszData[nRead - 1] != 13 && pszData[nRead - 1] != 10) { CSLDestroy(papszTokens); diff --git a/ogr/ogrsf_frmts/flatgeobuf/ogrflatgeobuflayer.cpp b/ogr/ogrsf_frmts/flatgeobuf/ogrflatgeobuflayer.cpp index c541bad2ed0f..f964afb0be57 100644 --- a/ogr/ogrsf_frmts/flatgeobuf/ogrflatgeobuflayer.cpp +++ b/ogr/ogrsf_frmts/flatgeobuf/ogrflatgeobuflayer.cpp @@ -1023,7 +1023,7 @@ OGRFeature *OGRFlatGeobufLayer::GetNextFeature() return nullptr; } - if (VSIFEofL(m_poFp)) + if (VSIFEofL(m_poFp) || VSIFErrorL(m_poFp)) { CPLDebug("FlatGeobuf", "GetNextFeature: iteration end due to EOF"); return nullptr; @@ -1977,7 +1977,7 @@ int OGRFlatGeobufLayer::GetNextArrowArray(struct ArrowArrayStream *stream, end_of_loop: - if (VSIFEofL(m_poFp)) + if (VSIFEofL(m_poFp) || VSIFErrorL(m_poFp)) { CPLDebug("FlatGeobuf", "GetNextFeature: iteration end due to EOF"); break; diff --git a/ogr/ogrsf_frmts/georss/ogrgeorssdatasource.cpp b/ogr/ogrsf_frmts/georss/ogrgeorssdatasource.cpp index 40d0264578a9..397b04aaf0c6 100644 --- a/ogr/ogrsf_frmts/georss/ogrgeorssdatasource.cpp +++ b/ogr/ogrsf_frmts/georss/ogrgeorssdatasource.cpp @@ -286,7 +286,7 @@ int OGRGeoRSSDataSource::Open(const char *pszFilename, int bUpdateIn) nDataHandlerCounter = 0; nLen = static_cast( VSIFReadL(aBuf.data(), 1, aBuf.size(), fp)); - nDone = VSIFEofL(fp); + nDone = nLen < aBuf.size(); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { if (nLen <= PARSER_BUF_SIZE - 1) diff --git a/ogr/ogrsf_frmts/georss/ogrgeorsslayer.cpp b/ogr/ogrsf_frmts/georss/ogrgeorsslayer.cpp index e26af9d9be32..9b968db2cbce 100644 --- a/ogr/ogrsf_frmts/georss/ogrgeorsslayer.cpp +++ b/ogr/ogrsf_frmts/georss/ogrgeorsslayer.cpp @@ -255,6 +255,7 @@ void OGRGeoRSSLayer::ResetReading() if (fpGeoRSS) { VSIFSeekL(fpGeoRSS, 0, SEEK_SET); + VSIFClearErrL(fpGeoRSS); #ifdef HAVE_EXPAT if (oParser) XML_ParserFree(oParser); @@ -968,7 +969,7 @@ OGRFeature *OGRGeoRSSLayer::GetNextFeature() return ppoFeatureTab[nFeatureTabIndex++]; } - if (VSIFEofL(fpGeoRSS)) + if (VSIFEofL(fpGeoRSS) || VSIFErrorL(fpGeoRSS)) return nullptr; CPLFree(ppoFeatureTab); @@ -982,7 +983,7 @@ OGRFeature *OGRGeoRSSLayer::GetNextFeature() { unsigned int nLen = static_cast( VSIFReadL(aBuf.data(), 1, aBuf.size(), fpGeoRSS)); - nDone = VSIFEofL(fpGeoRSS); + nDone = nLen < aBuf.size(); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { CPLError(CE_Failure, CPLE_AppDefined, @@ -1792,7 +1793,7 @@ void OGRGeoRSSLayer::LoadSchema() nDataHandlerCounter = 0; unsigned int nLen = (unsigned int)VSIFReadL(aBuf.data(), 1, aBuf.size(), fpGeoRSS); - nDone = VSIFEofL(fpGeoRSS); + nDone = nLen < aBuf.size(); if (XML_Parse(oSchemaParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { diff --git a/ogr/ogrsf_frmts/gml/gmlreader.cpp b/ogr/ogrsf_frmts/gml/gmlreader.cpp index 747f788d45ca..cabd5b27e6d5 100644 --- a/ogr/ogrsf_frmts/gml/gmlreader.cpp +++ b/ogr/ogrsf_frmts/gml/gmlreader.cpp @@ -510,7 +510,8 @@ GMLFeature *GMLReader::NextFeatureExpat() return nullptr; } - if (fpGML == nullptr || m_bStopParsing || VSIFEofL(fpGML)) + if (fpGML == nullptr || m_bStopParsing || VSIFEofL(fpGML) || + VSIFErrorL(fpGML)) return nullptr; nFeatureTabLength = 0; @@ -525,7 +526,7 @@ GMLFeature *GMLReader::NextFeatureExpat() unsigned int nLen = static_cast( VSIFReadL(pabyBuf, 1, PARSER_BUF_SIZE, fpGML)); - nDone = VSIFEofL(fpGML); + nDone = nLen < PARSER_BUF_SIZE; // Some files, such as APT_AIXM.xml from // https://nfdc.faa.gov/webContent/56DaySub/2015-03-05/aixm5.1.zip diff --git a/ogr/ogrsf_frmts/gpx/ogrgpxdatasource.cpp b/ogr/ogrsf_frmts/gpx/ogrgpxdatasource.cpp index 7c884948a4c3..84928c5836f7 100644 --- a/ogr/ogrsf_frmts/gpx/ogrgpxdatasource.cpp +++ b/ogr/ogrsf_frmts/gpx/ogrgpxdatasource.cpp @@ -493,7 +493,7 @@ int OGRGPXDataSource::Open(GDALOpenInfo *poOpenInfo) nLen = static_cast( VSIFReadL(aBuf.data(), 1, aBuf.size(), fp)); nTotalBytesRead += nLen; - nDone = VSIFEofL(fp); + nDone = (nLen < aBuf.size()); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { if (nLen <= PARSER_BUF_SIZE - 1) diff --git a/ogr/ogrsf_frmts/gpx/ogrgpxlayer.cpp b/ogr/ogrsf_frmts/gpx/ogrgpxlayer.cpp index f34bae7ebce7..cd59f73c9507 100644 --- a/ogr/ogrsf_frmts/gpx/ogrgpxlayer.cpp +++ b/ogr/ogrsf_frmts/gpx/ogrgpxlayer.cpp @@ -1019,7 +1019,7 @@ OGRFeature *OGRGPXLayer::GetNextFeature() return poFeature; } - if (m_fpGPX->Eof()) + if (m_fpGPX->Eof() || m_fpGPX->Error()) return nullptr; std::vector aBuf(PARSER_BUF_SIZE); @@ -1031,7 +1031,7 @@ OGRFeature *OGRGPXLayer::GetNextFeature() m_nDataHandlerCounter = 0; unsigned int nLen = static_cast( m_fpGPX->Read(aBuf.data(), 1, aBuf.size())); - nDone = m_fpGPX->Eof(); + nDone = (nLen < aBuf.size()); if (XML_Parse(m_oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { CPLError(CE_Failure, CPLE_AppDefined, @@ -1892,7 +1892,7 @@ void OGRGPXLayer::LoadExtensionsSchema() m_nDataHandlerCounter = 0; unsigned int nLen = static_cast( m_fpGPX->Read(aBuf.data(), 1, aBuf.size())); - nDone = m_fpGPX->Eof(); + nDone = (nLen < aBuf.size()); if (XML_Parse(m_oSchemaParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { diff --git a/ogr/ogrsf_frmts/jml/ogrjmllayer.cpp b/ogr/ogrsf_frmts/jml/ogrjmllayer.cpp index e40b7104b77c..8ff4e9462624 100644 --- a/ogr/ogrsf_frmts/jml/ogrjmllayer.cpp +++ b/ogr/ogrsf_frmts/jml/ogrjmllayer.cpp @@ -115,6 +115,7 @@ void OGRJMLLayer::ResetReading() nNextFID = 0; VSIFSeekL(fp, 0, SEEK_SET); + VSIFClearErrL(fp); if (oParser) XML_ParserFree(oParser); @@ -433,7 +434,7 @@ OGRFeature *OGRJMLLayer::GetNextFeature() return ppoFeatureTab[nFeatureTabIndex++]; } - if (VSIFEofL(fp)) + if (VSIFEofL(fp) || VSIFErrorL(fp)) return nullptr; std::vector aBuf(PARSER_BUF_SIZE); @@ -449,7 +450,7 @@ OGRFeature *OGRJMLLayer::GetNextFeature() nDataHandlerCounter = 0; unsigned int nLen = (unsigned int)VSIFReadL(aBuf.data(), 1, aBuf.size(), fp); - nDone = VSIFEofL(fp); + nDone = (nLen < aBuf.size()); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { CPLError(CE_Failure, CPLE_AppDefined, @@ -515,7 +516,7 @@ void OGRJMLLayer::LoadSchema() nDataHandlerCounter = 0; const unsigned int nLen = static_cast( VSIFReadL(aBuf.data(), 1, aBuf.size(), fp)); - nDone = VSIFEofL(fp); + nDone = (nLen < aBuf.size()); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { CPLError(CE_Failure, CPLE_AppDefined, diff --git a/ogr/ogrsf_frmts/kml/kml.cpp b/ogr/ogrsf_frmts/kml/kml.cpp index aa18e88664e3..1adc016d442c 100644 --- a/ogr/ogrsf_frmts/kml/kml.cpp +++ b/ogr/ogrsf_frmts/kml/kml.cpp @@ -96,15 +96,15 @@ bool KML::parse() nWithoutEventCounter = 0; int nDone = 0; - int nLen = 0; + unsigned nLen = 0; std::vector aBuf(PARSER_BUF_SIZE); bool bError = false; do { nDataHandlerCounter = 0; - nLen = (int)VSIFReadL(aBuf.data(), 1, aBuf.size(), pKMLFile_); - nDone = VSIFEofL(pKMLFile_); + nLen = (unsigned)VSIFReadL(aBuf.data(), 1, aBuf.size(), pKMLFile_); + nDone = nLen < aBuf.size(); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { CPLError(CE_Failure, CPLE_AppDefined, @@ -185,16 +185,16 @@ void KML::checkValidity() oCurrentParser = oParser; int nDone = 0; - int nLen = 0; + unsigned nLen = 0; std::vector aBuf(PARSER_BUF_SIZE); // Parses the file until we find the first element. do { nDataHandlerCounter = 0; - nLen = - static_cast(VSIFReadL(aBuf.data(), 1, aBuf.size(), pKMLFile_)); - nDone = VSIFEofL(pKMLFile_); + nLen = static_cast( + VSIFReadL(aBuf.data(), 1, aBuf.size(), pKMLFile_)); + nDone = nLen < aBuf.size(); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { if (nLen <= PARSER_BUF_SIZE - 1) diff --git a/ogr/ogrsf_frmts/libkml/ogrlibkmlfeaturestyle.cpp b/ogr/ogrsf_frmts/libkml/ogrlibkmlfeaturestyle.cpp index 8e9684d58a80..d6c13aa39925 100644 --- a/ogr/ogrsf_frmts/libkml/ogrlibkmlfeaturestyle.cpp +++ b/ogr/ogrsf_frmts/libkml/ogrlibkmlfeaturestyle.cpp @@ -265,7 +265,7 @@ void kml2featurestyle(FeaturePtr poKmlFeature, OGRLIBKMLDataSource *poOgrDS, szbuf[nRead] = '\0'; oStyle.append(szbuf); - } while (!VSIFEofL(fp)); + } while (!VSIFEofL(fp) && !VSIFErrorL(fp)); VSIFCloseL(fp); diff --git a/ogr/ogrsf_frmts/libkml/ogrlibkmlstyle.cpp b/ogr/ogrsf_frmts/libkml/ogrlibkmlstyle.cpp index 2a8d79bcd46e..13c63a85c345 100644 --- a/ogr/ogrsf_frmts/libkml/ogrlibkmlstyle.cpp +++ b/ogr/ogrsf_frmts/libkml/ogrlibkmlstyle.cpp @@ -827,7 +827,7 @@ static StyleSelectorPtr StyleFromStyleURL(const StyleMapPtr &stylemap, /***** copy buf to the string *****/ szbuf[nRead] = '\0'; oStyle.append(szbuf); - } while (!VSIFEofL(fp)); + } while (!VSIFEofL(fp) && !VSIFErrorL(fp)); VSIFCloseL(fp); diff --git a/ogr/ogrsf_frmts/lvbag/ogrlvbaglayer.cpp b/ogr/ogrsf_frmts/lvbag/ogrlvbaglayer.cpp index 0faf59cfef0e..cde88746997f 100644 --- a/ogr/ogrsf_frmts/lvbag/ogrlvbaglayer.cpp +++ b/ogr/ogrsf_frmts/lvbag/ogrlvbaglayer.cpp @@ -860,7 +860,7 @@ void OGRLVBAGLayer::ParseDocument() VSIFReadL(aBuf.data(), 1, aBuf.size(), fp)); if (IsParserFinished(XML_Parse(oParser.get(), aBuf.data(), nLen, - VSIFEofL(fp)))) + nLen < aBuf.size()))) return; break; diff --git a/ogr/ogrsf_frmts/mitab/mitab_utils.cpp b/ogr/ogrsf_frmts/mitab/mitab_utils.cpp index ffb2d3ba70ba..64651e14d809 100644 --- a/ogr/ogrsf_frmts/mitab/mitab_utils.cpp +++ b/ogr/ogrsf_frmts/mitab/mitab_utils.cpp @@ -317,7 +317,7 @@ char **TAB_CSLLoad(const char *pszFname) if (fp) { - while (!VSIFEofL(fp)) + while (!VSIFEofL(fp) && !VSIFErrorL(fp)) { const char *pszLine = nullptr; if ((pszLine = CPLReadLineL(fp)) != nullptr) diff --git a/ogr/ogrsf_frmts/ntf/ntfrecord.cpp b/ogr/ogrsf_frmts/ntf/ntfrecord.cpp index 1dc477fee42f..0bf6bebec369 100644 --- a/ogr/ogrsf_frmts/ntf/ntfrecord.cpp +++ b/ogr/ogrsf_frmts/ntf/ntfrecord.cpp @@ -157,7 +157,7 @@ int NTFRecord::ReadPhysicalLine(VSILFILE *fp, char *pszLine) { if (VSIFEofL(fp)) return -1; - else + else /* if (VSIFErrorL(fp)) */ { CPLError(CE_Failure, CPLE_AppDefined, "Low level read error occurred while reading NTF file."); diff --git a/ogr/ogrsf_frmts/ods/ogrodsdatasource.cpp b/ogr/ogrsf_frmts/ods/ogrodsdatasource.cpp index 8236267af054..fe166a6d3975 100644 --- a/ogr/ogrsf_frmts/ods/ogrodsdatasource.cpp +++ b/ogr/ogrsf_frmts/ods/ogrodsdatasource.cpp @@ -1318,7 +1318,7 @@ void OGRODSDataSource::AnalyseFile() nDataHandlerCounter = 0; unsigned int nLen = static_cast( VSIFReadL(aBuf.data(), 1, aBuf.size(), fpContent)); - nDone = VSIFEofL(fpContent); + nDone = (nLen < aBuf.size()); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { CPLError(CE_Failure, CPLE_AppDefined, @@ -1501,7 +1501,7 @@ void OGRODSDataSource::AnalyseSettings() nDataHandlerCounter = 0; unsigned int nLen = (unsigned int)VSIFReadL(aBuf.data(), 1, aBuf.size(), fpSettings); - nDone = VSIFEofL(fpSettings); + nDone = (nLen < aBuf.size()); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { CPLError(CE_Failure, CPLE_AppDefined, diff --git a/ogr/ogrsf_frmts/osm/osm_parser.cpp b/ogr/ogrsf_frmts/osm/osm_parser.cpp index f171428e0e0b..cb333d27fe8b 100644 --- a/ogr/ogrsf_frmts/osm/osm_parser.cpp +++ b/ogr/ogrsf_frmts/osm/osm_parser.cpp @@ -2597,7 +2597,7 @@ static OSMRetCode XML_ProcessBlock(OSMContext *psCtxt) psCtxt->nBytesRead += nLen; - psCtxt->bEOF = CPL_TO_BOOL(VSIFEofL(psCtxt->fp)); + psCtxt->bEOF = nLen < XML_BUFSIZE; const int eErr = XML_Parse(psCtxt->hXMLParser, (const char *)psCtxt->pabyBlob, nLen, psCtxt->bEOF); diff --git a/ogr/ogrsf_frmts/selafin/ogrselafinlayer.cpp b/ogr/ogrsf_frmts/selafin/ogrselafinlayer.cpp index f174d57df6ec..8a6713521dc4 100644 --- a/ogr/ogrsf_frmts/selafin/ogrselafinlayer.cpp +++ b/ogr/ogrsf_frmts/selafin/ogrselafinlayer.cpp @@ -33,6 +33,7 @@ #include "ogr_selafin.h" #include "cpl_error.h" #include "cpl_quad_tree.h" +#include "cpl_vsi_virtual.h" /************************************************************************/ /* Utilities functions */ @@ -43,9 +44,9 @@ static void MoveOverwrite(VSILFILE *fpDest, VSILFILE *fpSource) VSIRewindL(fpDest); VSIFTruncateL(fpDest, 0); char anBuf[0x10000]; - while (!VSIFEofL(fpSource)) + while (!fpSource->Eof() && !fpSource->Error()) { - size_t nSize = VSIFReadL(anBuf, 1, 0x10000, fpSource); + size_t nSize = VSIFReadL(anBuf, 1, sizeof(anBuf), fpSource); size_t nLeft = nSize; while (nLeft > 0) nLeft -= VSIFWriteL(anBuf + nSize - nLeft, 1, nLeft, fpDest); diff --git a/ogr/ogrsf_frmts/shape/ogrshapelayer.cpp b/ogr/ogrsf_frmts/shape/ogrshapelayer.cpp index 2a1507cea652..49a4c3177601 100644 --- a/ogr/ogrsf_frmts/shape/ogrshapelayer.cpp +++ b/ogr/ogrsf_frmts/shape/ogrshapelayer.cpp @@ -788,6 +788,9 @@ void OGRShapeLayer::ResetReading() if (bHeaderDirty && bUpdateAccess) SyncToDisk(); + + if (hDBF) + VSIFClearErrL(VSI_SHP_GetVSIL(hDBF->fp)); } /************************************************************************/ @@ -997,7 +1000,8 @@ OGRFeature *OGRShapeLayer::GetNextFeature() { if (DBFIsRecordDeleted(hDBF, iNextShapeId)) poFeature = nullptr; - else if (VSIFEofL(VSI_SHP_GetVSIL(hDBF->fp))) + else if (VSIFEofL(VSI_SHP_GetVSIL(hDBF->fp)) || + VSIFErrorL(VSI_SHP_GetVSIL(hDBF->fp))) return nullptr; //* I/O error. else poFeature = FetchShape(iNextShapeId); @@ -1426,7 +1430,8 @@ int OGRShapeLayer::GetFeatureCountWithSpatialFilterOnly() if (DBFIsRecordDeleted(hDBF, iShape)) continue; - if (VSIFEofL(VSI_SHP_GetVSIL(hDBF->fp))) + if (VSIFEofL(VSI_SHP_GetVSIL(hDBF->fp)) || + VSIFErrorL(VSI_SHP_GetVSIL(hDBF->fp))) break; } } @@ -2843,7 +2848,8 @@ OGRErr OGRShapeLayer::Repack() } panRecordsToDelete[nDeleteCount++] = iShape; } - if (VSIFEofL(VSI_SHP_GetVSIL(hDBF->fp))) + if (VSIFEofL(VSI_SHP_GetVSIL(hDBF->fp)) || + VSIFErrorL(VSI_SHP_GetVSIL(hDBF->fp))) { CPLFree(panRecordsToDelete); return OGRERR_FAILURE; // I/O error. @@ -3914,7 +3920,8 @@ int OGRShapeLayer::GetNextArrowArray(struct ArrowArrayStream *stream, ++iNextShapeId; continue; } - if (VSIFEofL(VSI_SHP_GetVSIL(hDBF->fp))) + if (VSIFEofL(VSI_SHP_GetVSIL(hDBF->fp)) || + VSIFErrorL(VSI_SHP_GetVSIL(hDBF->fp))) { out_array->release(out_array); memset(out_array, 0, sizeof(*out_array)); diff --git a/ogr/ogrsf_frmts/svg/ogrsvgdatasource.cpp b/ogr/ogrsf_frmts/svg/ogrsvgdatasource.cpp index 0a5bafa78e13..0731fcc8d7a1 100644 --- a/ogr/ogrsf_frmts/svg/ogrsvgdatasource.cpp +++ b/ogr/ogrsf_frmts/svg/ogrsvgdatasource.cpp @@ -178,7 +178,7 @@ int OGRSVGDataSource::Open(const char *pszFilename) { nDataHandlerCounter = 0; nLen = (unsigned int)VSIFReadL(aBuf.data(), 1, aBuf.size(), fp); - nDone = VSIFEofL(fp); + nDone = nLen < aBuf.size(); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { if (nLen <= PARSER_BUF_SIZE - 1) diff --git a/ogr/ogrsf_frmts/svg/ogrsvglayer.cpp b/ogr/ogrsf_frmts/svg/ogrsvglayer.cpp index ac0821897a1a..d29baa4bec1d 100644 --- a/ogr/ogrsf_frmts/svg/ogrsvglayer.cpp +++ b/ogr/ogrsf_frmts/svg/ogrsvglayer.cpp @@ -158,6 +158,7 @@ void OGRSVGLayer::ResetReading() if (fpSVG) { VSIFSeekL(fpSVG, 0, SEEK_SET); + VSIFClearErrL(fpSVG); #ifdef HAVE_EXPAT if (oParser) XML_ParserFree(oParser); @@ -536,7 +537,7 @@ OGRFeature *OGRSVGLayer::GetNextFeature() return ppoFeatureTab[nFeatureTabIndex++]; } - if (VSIFEofL(fpSVG)) + if (VSIFEofL(fpSVG) || VSIFErrorL(fpSVG)) return nullptr; std::vector aBuf(PARSER_BUF_SIZE); @@ -554,7 +555,7 @@ OGRFeature *OGRSVGLayer::GetNextFeature() nDataHandlerCounter = 0; unsigned int nLen = (unsigned int)VSIFReadL(aBuf.data(), 1, aBuf.size(), fpSVG); - nDone = VSIFEofL(fpSVG); + nDone = nLen < aBuf.size(); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { CPLError( @@ -664,7 +665,7 @@ void OGRSVGLayer::LoadSchema() nDataHandlerCounter = 0; unsigned int nLen = (unsigned int)VSIFReadL(aBuf.data(), 1, aBuf.size(), fpSVG); - nDone = VSIFEofL(fpSVG); + nDone = nLen < aBuf.size(); if (XML_Parse(oSchemaParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { diff --git a/ogr/ogrsf_frmts/xlsx/ogrxlsxdatasource.cpp b/ogr/ogrsf_frmts/xlsx/ogrxlsxdatasource.cpp index 580e8fee59f7..6f2fa9857f0f 100644 --- a/ogr/ogrsf_frmts/xlsx/ogrxlsxdatasource.cpp +++ b/ogr/ogrsf_frmts/xlsx/ogrxlsxdatasource.cpp @@ -1235,7 +1235,7 @@ void OGRXLSXDataSource::BuildLayer(OGRXLSXLayer *poLayer) nDataHandlerCounter = 0; unsigned int nLen = (unsigned int)VSIFReadL(aBuf.data(), 1, aBuf.size(), fp); - nDone = VSIFEofL(fp); + nDone = (nLen < aBuf.size()); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { CPLError(CE_Failure, CPLE_AppDefined, @@ -1419,7 +1419,7 @@ void OGRXLSXDataSource::AnalyseSharedStrings(VSILFILE *fpSharedStrings) nDataHandlerCounter = 0; unsigned int nLen = (unsigned int)VSIFReadL(aBuf.data(), 1, aBuf.size(), fpSharedStrings); - nDone = VSIFEofL(fpSharedStrings); + nDone = (nLen < aBuf.size()); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { CPLError(CE_Failure, CPLE_AppDefined, @@ -1501,7 +1501,7 @@ void OGRXLSXDataSource::AnalyseWorkbookRels(VSILFILE *fpWorkbookRels) nDataHandlerCounter = 0; unsigned int nLen = (unsigned int)VSIFReadL(aBuf.data(), 1, aBuf.size(), fpWorkbookRels); - nDone = VSIFEofL(fpWorkbookRels); + nDone = (nLen < aBuf.size()); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { CPLError(CE_Failure, CPLE_AppDefined, @@ -1616,7 +1616,7 @@ void OGRXLSXDataSource::AnalyseWorkbook(VSILFILE *fpWorkbook) nDataHandlerCounter = 0; unsigned int nLen = (unsigned int)VSIFReadL(aBuf.data(), 1, aBuf.size(), fpWorkbook); - nDone = VSIFEofL(fpWorkbook); + nDone = (nLen < aBuf.size()); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { CPLError(CE_Failure, CPLE_AppDefined, @@ -1780,7 +1780,7 @@ void OGRXLSXDataSource::AnalyseStyles(VSILFILE *fpStyles) nDataHandlerCounter = 0; unsigned int nLen = (unsigned int)VSIFReadL(aBuf.data(), 1, aBuf.size(), fpStyles); - nDone = VSIFEofL(fpStyles); + nDone = (nLen < aBuf.size()); if (XML_Parse(oParser, aBuf.data(), nLen, nDone) == XML_STATUS_ERROR) { CPLError(CE_Failure, CPLE_AppDefined, From b7d46e5a51db9616d96e5ea67bc812c9dc90211e Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 15 May 2024 20:20:09 +0200 Subject: [PATCH 028/191] VSIWin32Handle: Read(): unhandle cleanly nSize * nCount > UINT32_MAX --- port/cpl_vsil_win32.cpp | 46 ++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/port/cpl_vsil_win32.cpp b/port/cpl_vsil_win32.cpp index e3a3546770dd..81a5bc3678a0 100644 --- a/port/cpl_vsil_win32.cpp +++ b/port/cpl_vsil_win32.cpp @@ -352,23 +352,35 @@ int VSIWin32Handle::Flush() size_t VSIWin32Handle::Read(void *pBuffer, size_t nSize, size_t nCount) { - DWORD dwSizeRead = 0; - size_t nResult = 0; - - if (!ReadFile(hFile, pBuffer, static_cast(nSize * nCount), - &dwSizeRead, nullptr)) + GByte *const pabyBuffer = static_cast(pBuffer); + size_t nTotalRead = 0; + size_t nRemaining = nSize * nCount; + while (nRemaining > 0) { - bError = true; - nResult = 0; - errno = ErrnoFromGetLastError(); - } - else - { - if (nSize == 0) - nResult = 0; + DWORD dwSizeRead = 0; + DWORD dwToRead = static_cast( + nRemaining > UINT32_MAX ? UINT32_MAX : nRemaining); + + if (!ReadFile(hFile, pabyBuffer + nTotalRead, dwToRead, &dwSizeRead, + nullptr)) + { + bError = true; + errno = ErrnoFromGetLastError(); + return 0; + } else - nResult = dwSizeRead / nSize; + { + nTotalRead += dwSizeRead; + nRemaining -= dwSizeRead; + if (dwSizeRead < dwToRead) + break; + } + } + size_t nResult = 0; + if (nSize) + { + nResult = nTotalRead / nSize; if (nResult != nCount) bEOF = true; } @@ -386,6 +398,12 @@ size_t VSIWin32Handle::Write(const void *pBuffer, size_t nSize, size_t nCount) DWORD dwSizeWritten = 0; size_t nResult = 0; + if (nSize > 0 && nCount > UINT32_MAX / nSize) + { + CPLError(CE_Failure, CPLE_FileIO, "Too many bytes to write at once"); + return 0; + } + if (!WriteFile(hFile, pBuffer, static_cast(nSize * nCount), &dwSizeWritten, nullptr)) { From 9b597d4552a1cd51b1b7c4a4d5704896b119e955 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 15 May 2024 23:54:34 +0200 Subject: [PATCH 029/191] /vsigzip/: Read(): detect attempts to read more than 4 GB at once --- port/cpl_vsil_gzip.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/port/cpl_vsil_gzip.cpp b/port/cpl_vsil_gzip.cpp index ec5f111fd654..0c1ad3570ae5 100644 --- a/port/cpl_vsil_gzip.cpp +++ b/port/cpl_vsil_gzip.cpp @@ -1021,6 +1021,12 @@ size_t VSIGZipHandle::Read(void *const buf, size_t const nSize, return 0; } + if (nSize > 0 && nMemb > UINT32_MAX / nSize) + { + CPLError(CE_Failure, CPLE_FileIO, "Too many bytes to read at once"); + return 0; + } + const unsigned len = static_cast(nSize) * static_cast(nMemb); Bytef *pStart = @@ -1694,6 +1700,12 @@ size_t VSIDeflate64Handle::Read(void *const buf, size_t const nSize, return 0; } + if (nSize > 0 && nMemb > UINT32_MAX / nSize) + { + CPLError(CE_Failure, CPLE_FileIO, "Too many bytes to read at once"); + return 0; + } + const unsigned len = static_cast(nSize) * static_cast(nMemb); Bytef *pStart = From b04f97e55c7483b243043c5ca34ad6106367fa6a Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 16 May 2024 00:11:58 +0200 Subject: [PATCH 030/191] test_vsifile_CopyFileRestartable(): silence error message --- autotest/gcore/vsifile.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/autotest/gcore/vsifile.py b/autotest/gcore/vsifile.py index ac8ff4943160..d15a91609e93 100755 --- a/autotest/gcore/vsifile.py +++ b/autotest/gcore/vsifile.py @@ -1409,9 +1409,10 @@ def test_vsifile_CopyFileRestartable(tmp_vsimem): dstfilename = str(tmp_vsimem / "out.txt") - retcode, output_payload = gdal.CopyFileRestartable( - str(tmp_vsimem / "i_do_not_exist.txt"), dstfilename, None - ) + with gdal.quiet_errors(): + retcode, output_payload = gdal.CopyFileRestartable( + str(tmp_vsimem / "i_do_not_exist.txt"), dstfilename, None + ) assert retcode == -1 assert output_payload is None assert gdal.VSIStatL(dstfilename) is None From 1c3e4c71a18aaf0aec53b7fec9bd1b12c14bc2a7 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 16 May 2024 03:12:35 +0200 Subject: [PATCH 031/191] cpl_vsil_crypt.cpp: remove a few useless casts --- port/cpl_vsil_crypt.cpp | 54 +++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 32 deletions(-) diff --git a/port/cpl_vsil_crypt.cpp b/port/cpl_vsil_crypt.cpp index 7f4b931165fa..d5d78f51b34e 100644 --- a/port/cpl_vsil_crypt.cpp +++ b/port/cpl_vsil_crypt.cpp @@ -555,9 +555,7 @@ int VSICryptFileHeader::ReadFromFile(VSIVirtualHandle *fp, return VSICryptReadError(); osIV.resize(nIVSize); - // TODO(schwehr): Using the const buffer of a string is a bad idea. - if (fp->Read(reinterpret_cast(const_cast(osIV.c_str())), 1, - nIVSize) != nIVSize) + if (fp->Read(osIV.data(), 1, nIVSize) != nIVSize) return VSICryptReadError(); GUInt16 nFreeTextSize; @@ -565,9 +563,7 @@ int VSICryptFileHeader::ReadFromFile(VSIVirtualHandle *fp, return VSICryptReadError(); osFreeText.resize(nFreeTextSize); - if (fp->Read( - reinterpret_cast(const_cast(osFreeText.c_str())), 1, - nFreeTextSize) != nFreeTextSize) + if (fp->Read(osFreeText.data(), 1, nFreeTextSize) != nFreeTextSize) return VSICryptReadError(); GByte nKeyCheckSize; @@ -578,9 +574,7 @@ int VSICryptFileHeader::ReadFromFile(VSIVirtualHandle *fp, { CPLString osKeyCheck; osKeyCheck.resize(nKeyCheckSize); - if (fp->Read(reinterpret_cast( - const_cast(osKeyCheck.c_str())), - 1, nKeyCheckSize) != nKeyCheckSize) + if (fp->Read(osKeyCheck.data(), 1, nKeyCheckSize) != nKeyCheckSize) return VSICryptReadError(); if (osKey.empty() && pabyGlobalKey == nullptr) @@ -657,8 +651,8 @@ int VSICryptFileHeader::ReadFromFile(VSIVirtualHandle *fp, nExtraContentSize = CPL_LSBWORD16(nExtraContentSize); osExtraContent.resize(nExtraContentSize); - if (fp->Read(const_cast(osExtraContent.c_str()), 1, - nExtraContentSize) != nExtraContentSize) + if (fp->Read(osExtraContent.data(), 1, nExtraContentSize) != + nExtraContentSize) return VSICryptReadError(); return TRUE; @@ -1589,7 +1583,7 @@ static CPLString GetKey(const char *pszFilename) CPLFree(key); } // coverity[tainted_data] - memset(const_cast(osKeyB64.c_str()), 0, osKeyB64.size()); + memset(osKeyB64.data(), 0, osKeyB64.size()); } return osKey; } @@ -1630,7 +1624,7 @@ VSICryptFilesystemHandler::Open(const char *pszFilename, const char *pszAccess, VSICryptFileHeader *poHeader = new VSICryptFileHeader(); if (!poHeader->ReadFromFile(fpBase, osKey)) { - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); fpBase->Close(); delete fpBase; delete poHeader; @@ -1643,11 +1637,11 @@ VSICryptFilesystemHandler::Open(const char *pszFilename, const char *pszAccess, : VSICRYPT_READ); if (!poHandle->Init(osKey, false)) { - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); delete poHandle; poHandle = nullptr; } - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); return poHandle; } else if (strchr(pszAccess, 'w')) @@ -1687,7 +1681,7 @@ VSICryptFilesystemHandler::Open(const char *pszFilename, const char *pszAccess, CPLError(CE_Failure, CPLE_AppDefined, "Cipher algorithm not supported in this build: %s", osAlg.c_str()); - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); return nullptr; } int nMinKeySize = static_cast(poBlock->MinKeyLength()); @@ -1701,7 +1695,7 @@ VSICryptFilesystemHandler::Open(const char *pszFilename, const char *pszAccess, { CPLError(CE_Failure, CPLE_AppDefined, "IV should be %d byte large", nBlockSize); - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); return nullptr; } } @@ -1710,9 +1704,7 @@ VSICryptFilesystemHandler::Open(const char *pszFilename, const char *pszAccess, osIV.resize(nBlockSize); CryptoPP::OS_GenerateRandomBlock( false, // Do not need cryptographic randomness. - reinterpret_cast( - const_cast(osIV.c_str())), - osIV.size()); + reinterpret_cast(osIV.data()), osIV.size()); } if (EQUAL(osKey, "GENERATE_IT")) @@ -1725,9 +1717,7 @@ VSICryptFilesystemHandler::Open(const char *pszFilename, const char *pszAccess, // Config option for speeding tests. CPLTestBool( CPLGetConfigOption("VSICRYPT_CRYPTO_RANDOM", "TRUE")), - reinterpret_cast( - const_cast(osKey.c_str())), - osKey.size()); + reinterpret_cast(osKey.data()), osKey.size()); char *pszB64 = CPLBase64Encode(static_cast(osKey.size()), @@ -1751,7 +1741,7 @@ VSICryptFilesystemHandler::Open(const char *pszFilename, const char *pszAccess, CPLError(CE_Failure, CPLE_AppDefined, "Key is too short: %d bytes. Should be at least %d bytes", nKeyLength, nMinKeySize); - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); return nullptr; } @@ -1759,7 +1749,7 @@ VSICryptFilesystemHandler::Open(const char *pszFilename, const char *pszAccess, VSIFOpenL(osFilename, osAccess.c_str())); if (fpBase == nullptr) { - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); return nullptr; } @@ -1778,11 +1768,11 @@ VSICryptFilesystemHandler::Open(const char *pszFilename, const char *pszAccess, : VSICRYPT_WRITE); if (!poHandle->Init(osKey, true)) { - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); delete poHandle; poHandle = nullptr; } - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); return poHandle; } else if (strchr(pszAccess, 'a')) @@ -1791,13 +1781,13 @@ VSICryptFilesystemHandler::Open(const char *pszFilename, const char *pszAccess, reinterpret_cast(VSIFOpenL(osFilename, "rb+")); if (fpBase == nullptr) { - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); return VSIFilesystemHandler::Open(pszFilename, "wb+"); } VSICryptFileHeader *poHeader = new VSICryptFileHeader(); if (!poHeader->ReadFromFile(fpBase, osKey)) { - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); fpBase->Close(); delete fpBase; delete poHeader; @@ -1811,7 +1801,7 @@ VSICryptFilesystemHandler::Open(const char *pszFilename, const char *pszAccess, delete poHandle; poHandle = nullptr; } - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); if (poHandle != nullptr) poHandle->Seek(0, SEEK_END); return poHandle; @@ -1841,13 +1831,13 @@ int VSICryptFilesystemHandler::Stat(const char *pszFilename, CPLString osKey(GetKey(pszFilename)); if (!poHeader->ReadFromFile(fp, osKey)) { - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); fp->Close(); delete fp; delete poHeader; return -1; } - memset(const_cast(osKey.c_str()), 0, osKey.size()); + memset(osKey.data(), 0, osKey.size()); fp->Close(); delete fp; if (poHeader) From b1cc25f8b3efca0eb2853fed0324d9e89daa5c84 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 16 May 2024 00:03:47 +0200 Subject: [PATCH 032/191] VSICopyFile(): detect error when reading source file, and delete output file on error --- autotest/gcore/vsifile.py | 65 ++++++++++++++++++++++++++++++--------- port/cpl_vsil.cpp | 50 ++++++++++++++++++++---------- 2 files changed, 85 insertions(+), 30 deletions(-) diff --git a/autotest/gcore/vsifile.py b/autotest/gcore/vsifile.py index d15a91609e93..094d7e5da1d3 100755 --- a/autotest/gcore/vsifile.py +++ b/autotest/gcore/vsifile.py @@ -1239,24 +1239,40 @@ def test_vsifile_vsimem_truncate_zeroize(): # Test VSICopyFile() -def test_vsifile_copyfile(): +def test_vsifile_copyfile_regular(tmp_vsimem): # Most simple invocation - dstfilename = "/vsimem/test_vsifile_copyfile.tif" + dstfilename = str(tmp_vsimem / "out.bin") assert gdal.CopyFile("data/byte.tif", dstfilename) == 0 assert gdal.VSIStatL(dstfilename).size == gdal.VSIStatL("data/byte.tif").size + +def test_vsifile_copyfile_srcfilename_none(tmp_vsimem): + # Test srcfilename passed to None - srcfilename = "/vsimem/test.bin" + srcfilename = str(tmp_vsimem / "src.bin") + dstfilename = str(tmp_vsimem / "out.bin") f = gdal.VSIFOpenL(srcfilename, "wb+") gdal.VSIFTruncateL(f, 1000 * 1000) assert gdal.CopyFile(None, dstfilename, f) == 0 gdal.VSIFCloseL(f) - gdal.Unlink(srcfilename) assert gdal.VSIStatL(dstfilename).size == 1000 * 1000 + +def test_vsifile_copyfile_srcfilename_and_srcfilehandle_none(tmp_vsimem): + + # Test srcfilename passed to None + dstfilename = str(tmp_vsimem / "out.bin") + with gdal.quiet_errors(): + assert gdal.CopyFile(None, dstfilename) != 0 + assert gdal.VSIStatL(dstfilename) is None + + +def test_vsifile_copyfile_progress(tmp_vsimem): + # Test progress callback - srcfilename = "/vsimem/test.bin" + srcfilename = str(tmp_vsimem / "src.bin") + dstfilename = str(tmp_vsimem / "out.bin") f = gdal.VSIFOpenL(srcfilename, "wb+") gdal.VSIFTruncateL(f, 1000 * 1000) gdal.VSIFCloseL(f) @@ -1271,11 +1287,14 @@ def progress(pct, msg, user_data): == 0 ) assert tab[-1] == 1.0 - gdal.Unlink(srcfilename) assert gdal.VSIStatL(dstfilename).size == 1000 * 1000 + +def test_vsifile_copyfile_progress_cancel(tmp_vsimem): + # Test progress callback in error situation - srcfilename = "/vsimem/test.bin" + srcfilename = str(tmp_vsimem / "src.bin") + dstfilename = str(tmp_vsimem / "out.bin") f = gdal.VSIFOpenL(srcfilename, "wb+") gdal.VSIFTruncateL(f, 1000 * 1000) gdal.VSIFCloseL(f) @@ -1287,15 +1306,33 @@ def progress(pct, msg, user_data): return 1 tab = [] - assert ( - gdal.CopyFile(srcfilename, dstfilename, callback=progress, callback_data=tab) - != 0 - ) + with gdal.quiet_errors(): + assert ( + gdal.CopyFile( + srcfilename, dstfilename, callback=progress, callback_data=tab + ) + != 0 + ) assert tab[-1] != 1.0 - gdal.Unlink(srcfilename) - assert gdal.VSIStatL(dstfilename).size != 1000 * 1000 + assert gdal.VSIStatL(dstfilename) is None + + +def test_vsifile_copyfile_error_on_input(tmp_vsimem): - gdal.Unlink(dstfilename) + srcfilename = "/vsigzip/data/corrupted_z_buf_error.gz" + dstfilename = str(tmp_vsimem / "out.bin") + fp = gdal.VSIFOpenL(srcfilename, "rb") + assert fp + try: + with gdal.quiet_errors(): + assert gdal.CopyFile(None, dstfilename, fpSource=fp) != 0 + assert "error while reading source file" in gdal.GetLastErrorMsg() + assert gdal.VSIStatL(dstfilename) is None + finally: + gdal.VSIFCloseL(fp) + + +############################################################################### def test_vsimem_illegal_filename(): diff --git a/port/cpl_vsil.cpp b/port/cpl_vsil.cpp index 1b832ecc20a3..3af4079c9f6c 100644 --- a/port/cpl_vsil.cpp +++ b/port/cpl_vsil.cpp @@ -1448,27 +1448,41 @@ int VSIFilesystemHandler::CopyFile(const char *pszSource, const char *pszTarget, GUIntBig nOffset = 0; while (true) { - size_t nRead = VSIFReadL(&abyBuffer[0], 1, nBufferSize, fpSource); - size_t nWritten = VSIFWriteL(&abyBuffer[0], 1, nRead, fpOut); - if (nWritten != nRead) + const size_t nRead = VSIFReadL(&abyBuffer[0], 1, nBufferSize, fpSource); + if (nRead < nBufferSize && VSIFErrorL(fpSource)) { - CPLError(CE_Failure, CPLE_FileIO, "Copying of %s to %s failed", - pszSource, pszTarget); + CPLError( + CE_Failure, CPLE_FileIO, + "Copying of %s to %s failed: error while reading source file", + pszSource, pszTarget); ret = -1; break; } - nOffset += nRead; - if (pProgressFunc && - !pProgressFunc(nSourceSize == 0 ? 1.0 - : nSourceSize > 0 && - nSourceSize != static_cast(-1) - ? double(nOffset) / nSourceSize - : 0.0, - !osMsg.empty() ? osMsg.c_str() : nullptr, - pProgressData)) + if (nRead > 0) { - ret = -1; - break; + const size_t nWritten = VSIFWriteL(&abyBuffer[0], 1, nRead, fpOut); + if (nWritten != nRead) + { + CPLError(CE_Failure, CPLE_FileIO, + "Copying of %s to %s failed: error while writing into " + "target file", + pszSource, pszTarget); + ret = -1; + break; + } + nOffset += nRead; + if (pProgressFunc && + !pProgressFunc( + nSourceSize == 0 ? 1.0 + : nSourceSize > 0 && + nSourceSize != static_cast(-1) + ? double(nOffset) / nSourceSize + : 0.0, + !osMsg.empty() ? osMsg.c_str() : nullptr, pProgressData)) + { + ret = -1; + break; + } } if (nRead < nBufferSize) { @@ -1490,6 +1504,10 @@ int VSIFilesystemHandler::CopyFile(const char *pszSource, const char *pszTarget, { ret = -1; } + + if (ret != 0) + VSIUnlink(pszTarget); + return ret; } From 5f92903c15e89580caace585015f10566ea2bca5 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 22 May 2024 19:12:30 +0200 Subject: [PATCH 033/191] PDF: fix build against PoDoFo with MSYS2 UCRT64 and CLANG64 environments Fixes #9976 --- frmts/pdf/pdfsdk_headers.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/frmts/pdf/pdfsdk_headers.h b/frmts/pdf/pdfsdk_headers.h index 77d7a438de6e..d5a3af3dea5b 100644 --- a/frmts/pdf/pdfsdk_headers.h +++ b/frmts/pdf/pdfsdk_headers.h @@ -89,14 +89,19 @@ #ifdef HAVE_PODOFO +#ifdef _WIN32 /* * Some Windows header defines a GetObject macro that - * shadows a GetObject() method in PoDoFo. This - * workaround is documented in the PoDoFo source. + * shadows a GetObject() method in PoDoFo. As pdfdataset.cpp includes cpl_spawn.h + * which includes windows.h, so let's bite the bullet and important windows.h + * right now, and then undef GetObject. Undef'ing GetObject is done in some + * source files of PoDoFo itself. */ +#include #ifdef GetObject #undef GetObject #endif +#endif // Related fix submitted per https://github.com/podofo/podofo/pull/98 #ifdef HAVE_PODOFO_0_10_OR_LATER From eaffd8c32465c09aa9b71421f587acc577d0b4f1 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 22 May 2024 19:22:54 +0200 Subject: [PATCH 034/191] CI: add package mingw-w64-x86_64-podofo --- .github/workflows/cmake_builds.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake_builds.yml b/.github/workflows/cmake_builds.yml index fab7efb174dd..6d6aaf1606e4 100644 --- a/.github/workflows/cmake_builds.yml +++ b/.github/workflows/cmake_builds.yml @@ -323,7 +323,7 @@ jobs: base-devel git mingw-w64-x86_64-toolchain mingw-w64-x86_64-cmake mingw-w64-x86_64-ccache mingw-w64-x86_64-pcre mingw-w64-x86_64-xerces-c mingw-w64-x86_64-zstd mingw-w64-x86_64-libarchive mingw-w64-x86_64-geos mingw-w64-x86_64-libspatialite mingw-w64-x86_64-proj - mingw-w64-x86_64-cgal mingw-w64-x86_64-libfreexl mingw-w64-x86_64-hdf5 mingw-w64-x86_64-netcdf mingw-w64-x86_64-poppler mingw-w64-x86_64-postgresql + mingw-w64-x86_64-cgal mingw-w64-x86_64-libfreexl mingw-w64-x86_64-hdf5 mingw-w64-x86_64-netcdf mingw-w64-x86_64-poppler mingw-w64-x86_64-podofo mingw-w64-x86_64-postgresql mingw-w64-x86_64-libgeotiff mingw-w64-x86_64-libpng mingw-w64-x86_64-libtiff mingw-w64-x86_64-openjpeg2 mingw-w64-x86_64-python-pip mingw-w64-x86_64-python-numpy mingw-w64-x86_64-python-pytest mingw-w64-x86_64-python-setuptools mingw-w64-x86_64-python-lxml mingw-w64-x86_64-swig mingw-w64-x86_64-python-psutil mingw-w64-x86_64-blosc - name: Setup cache From d0c49b7cad9d95e019f29fa93fd18f468fc025d2 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 22 May 2024 19:53:36 +0200 Subject: [PATCH 035/191] OGRSQL: validate column name in COUNT(field_name) and error out if it doesn't exist, otherwise it evaluates as COUNT(*) Fixes #9972 --- autotest/ogr/ogr_sql_test.py | 4 +++- ogr/ogrsf_frmts/generic/ogr_gensql.cpp | 2 +- ogr/swq_select.cpp | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/autotest/ogr/ogr_sql_test.py b/autotest/ogr/ogr_sql_test.py index 3a7667407237..187c8e5795d9 100755 --- a/autotest/ogr/ogr_sql_test.py +++ b/autotest/ogr/ogr_sql_test.py @@ -784,8 +784,10 @@ def test_ogr_sql_28(): "SELECT COUNT(*) FROM", "SELECT COUNT(*) AS foo FROM", "SELECT COUNT(* FROM my_layer", + "SELECT COUNT(i_dont_exist) FROM my_layer", "SELECT COUNT(FOO intfield) FROM my_layer", "SELECT COUNT(DISTINCT intfield FROM my_layer", + "SELECT COUNT(DISTINCT i_dont_exist) FROM my_layer", "SELECT COUNT(DISTINCT *) FROM my_layer", "SELECT FOO(DISTINCT intfield) FROM my_layer", "SELECT FOO(DISTINCT intfield) as foo FROM my_layer", @@ -1084,7 +1086,7 @@ def test_ogr_sql_36(): # Test select count([distinct] column) with null values (#4354) -def test_ogr_sql_37(): +def test_ogr_sql_count_and_null(): ds = ogr.GetDriverByName("Memory").CreateDataSource("ogr_sql_37") lyr = ds.CreateLayer("layer") diff --git a/ogr/ogrsf_frmts/generic/ogr_gensql.cpp b/ogr/ogrsf_frmts/generic/ogr_gensql.cpp index cb0ed0aa2d8a..dc618451b357 100644 --- a/ogr/ogrsf_frmts/generic/ogr_gensql.cpp +++ b/ogr/ogrsf_frmts/generic/ogr_gensql.cpp @@ -901,7 +901,7 @@ int OGRGenSQLResultsLayer::PrepareSummary() /* -------------------------------------------------------------------- */ /* We treat COUNT(*) as a special case, and fill with */ - /* GetFeatureCount(). */ + /* GetFeatureCount(). */ /* -------------------------------------------------------------------- */ if (psSelectInfo->result_columns() == 1 && diff --git a/ogr/swq_select.cpp b/ogr/swq_select.cpp index 9d63d199b00c..becd1221d1a9 100644 --- a/ogr/swq_select.cpp +++ b/ogr/swq_select.cpp @@ -981,7 +981,8 @@ CPLErr swq_select::parse(swq_field_list *field_list, // Record field type. def->field_type = this_type; - if (def->field_index == -1 && def->col_func != SWQCF_COUNT) + if (def->field_index == -1 && !(def->col_func == SWQCF_COUNT && + strcmp(def->field_name, "*") == 0)) { CPLError( CE_Failure, CPLE_AppDefined, "Unrecognized field name %s.", From fe08ea1b313bb6d8c318e74e10debfb7dcdcb6ff Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 22 May 2024 19:23:25 +0200 Subject: [PATCH 036/191] PDF: split import of SDK headers into separate file per backend --- frmts/pdf/CMakeLists.txt | 5 +- frmts/pdf/gdal_pdf.h | 2 +- frmts/pdf/pdfobject.h | 2 +- frmts/pdf/pdfsdk_headers_all.h | 40 +++++++ ...fsdk_headers.h => pdfsdk_headers_pdfium.h} | 108 +----------------- frmts/pdf/pdfsdk_headers_podofo.h | 80 +++++++++++++ frmts/pdf/pdfsdk_headers_poppler.h | 88 ++++++++++++++ 7 files changed, 219 insertions(+), 106 deletions(-) create mode 100644 frmts/pdf/pdfsdk_headers_all.h rename frmts/pdf/{pdfsdk_headers.h => pdfsdk_headers_pdfium.h} (50%) create mode 100644 frmts/pdf/pdfsdk_headers_podofo.h create mode 100644 frmts/pdf/pdfsdk_headers_poppler.h diff --git a/frmts/pdf/CMakeLists.txt b/frmts/pdf/CMakeLists.txt index 1d8006c848b2..b2f1f07ba9ea 100644 --- a/frmts/pdf/CMakeLists.txt +++ b/frmts/pdf/CMakeLists.txt @@ -5,7 +5,10 @@ add_gdal_driver( pdfio.h pdfobject.h pdfcreatecopy.h - pdfsdk_headers.h + pdfsdk_headers_all.h + pdfsdk_headers_poppler.h + pdfsdk_headers_podofo.h + pdfsdk_headers_pdfium.h ogrpdflayer.cpp pdfcreatecopy.cpp pdfdataset.cpp diff --git a/frmts/pdf/gdal_pdf.h b/frmts/pdf/gdal_pdf.h index 110f28fce751..d46ef09e96c9 100644 --- a/frmts/pdf/gdal_pdf.h +++ b/frmts/pdf/gdal_pdf.h @@ -52,7 +52,7 @@ #include // For detecting usage of PDF library #include -#include "pdfsdk_headers.h" +#include "pdfsdk_headers_all.h" #include "pdfdrivercore.h" diff --git a/frmts/pdf/pdfobject.h b/frmts/pdf/pdfobject.h index 3e069c8e7424..362eed4e1b06 100644 --- a/frmts/pdf/pdfobject.h +++ b/frmts/pdf/pdfobject.h @@ -37,7 +37,7 @@ #ifndef PDFOBJECT_H_INCLUDED #define PDFOBJECT_H_INCLUDED -#include "pdfsdk_headers.h" +#include "pdfsdk_headers_all.h" #include "cpl_string.h" #include diff --git a/frmts/pdf/pdfsdk_headers_all.h b/frmts/pdf/pdfsdk_headers_all.h new file mode 100644 index 000000000000..5b1a363e7712 --- /dev/null +++ b/frmts/pdf/pdfsdk_headers_all.h @@ -0,0 +1,40 @@ +/****************************************************************************** + * + * Project: GDAL + * Purpose: Includes PDF SDK headers + * Author: Even Rouault + * + ****************************************************************************** + * Copyright (c) 2015, Even Rouault + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + *****************************************************************************/ + +#ifndef PDFSDK_HEADERS_ALL_H +#define PDFSDK_HEADERS_ALL_H + +#if defined(__GNUC__) && !defined(_MSC_VER) +#pragma GCC system_header +#endif + +#include "pdfsdk_headers_poppler.h" +#include "pdfsdk_headers_podofo.h" +#include "pdfsdk_headers_pdfium.h" + +#endif // PDFSDK_HEADERS_ALL_H diff --git a/frmts/pdf/pdfsdk_headers.h b/frmts/pdf/pdfsdk_headers_pdfium.h similarity index 50% rename from frmts/pdf/pdfsdk_headers.h rename to frmts/pdf/pdfsdk_headers_pdfium.h index d5a3af3dea5b..f2b271560c95 100644 --- a/frmts/pdf/pdfsdk_headers.h +++ b/frmts/pdf/pdfsdk_headers_pdfium.h @@ -1,7 +1,7 @@ /****************************************************************************** * * Project: GDAL - * Purpose: Includes PDF SDK headers + * Purpose: Includes PDFium headers * Author: Even Rouault * ****************************************************************************** @@ -26,111 +26,13 @@ * DEALINGS IN THE SOFTWARE. *****************************************************************************/ -#ifndef PDFSDK_HEADERS_H -#define PDFSDK_HEADERS_H +#ifndef PDFSDK_HEADERS_PDFIUM_H +#define PDFSDK_HEADERS_PDFIUM_H -/* We avoid to include cpl_port.h directly or indirectly */ -#if ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)) && \ - !defined(_MSC_VER)) +#if defined(__GNUC__) && !defined(_MSC_VER) #pragma GCC system_header #endif -#ifdef HAVE_POPPLER - -/* Horrible hack because there's a conflict between struct FlateDecode of */ -/* include/poppler/Stream.h and the FlateDecode() function of */ -/* pdfium/core/include/fpdfapi/fpdf_parser.h. */ -/* The part of Stream.h where struct FlateDecode is defined isn't needed */ -/* by GDAL, and is luckily protected by a #ifndef ENABLE_ZLIB section */ -#ifdef HAVE_PDFIUM -#define ENABLE_ZLIB -#endif /* HAVE_PDFIUM */ - -#ifdef _MSC_VER -#pragma warning(push) -// conversion from 'const int' to 'Guchar', possible loss of data -#pragma warning(disable : 4244) -// conversion from 'size_t' to 'int', possible loss of data -#pragma warning(disable : 4267) -#endif - -/* begin of poppler xpdf includes */ -#include -#include - -#define private public /* Ugly! Page::pageObj is private but we need it... */ -#include -#undef private - -#include - -#define private \ - public /* Ugly! Catalog::optContent is private but we need it... */ -#include -#undef private - -#define private public /* Ugly! PDFDoc::str is private but we need it... */ -#include -#undef private - -#include -#include -#include -#include -#include - -/* end of poppler xpdf includes */ - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -#endif /* HAVE_POPPLER */ - -#ifdef HAVE_PODOFO - -#ifdef _WIN32 -/* - * Some Windows header defines a GetObject macro that - * shadows a GetObject() method in PoDoFo. As pdfdataset.cpp includes cpl_spawn.h - * which includes windows.h, so let's bite the bullet and important windows.h - * right now, and then undef GetObject. Undef'ing GetObject is done in some - * source files of PoDoFo itself. - */ -#include -#ifdef GetObject -#undef GetObject -#endif -#endif - -// Related fix submitted per https://github.com/podofo/podofo/pull/98 -#ifdef HAVE_PODOFO_0_10_OR_LATER -#define USE_HACK_BECAUSE_PdfInputStream_constructor_is_not_exported_in_podofo_0_11 -#endif - -#ifdef USE_HACK_BECAUSE_PdfInputStream_constructor_is_not_exported_in_podofo_0_11 -// If we is included after our below #define private public errors out -// with an error like: -// /usr/include/c++/13.2.1/sstream:457:7: error: 'struct std::__cxx11::basic_stringbuf<_CharT, _Traits, _Alloc>::__xfer_bufptrs' redeclared with different access -// 457 | struct __xfer_bufptrs -// so include it before, as otherwise it would get indirectly included by -// PdfDate.h, which includes , which includes -#include -// Ugly! PfdObjectStream::GetParent() is private but we need it... -#define private public -#endif -#include "podofo.h" -#ifdef private -#undef private -#endif - -#if PODOFO_VERSION_MAJOR > 0 || \ - (PODOFO_VERSION_MAJOR == 0 && PODOFO_VERSION_MINOR >= 10) -#define PdfVecObjects PdfIndirectObjectList -#endif - -#endif // HAVE_PODOFO - #ifdef HAVE_PDFIUM #include "cpl_multiproc.h" @@ -183,4 +85,4 @@ #endif // HAVE_PDFIUM -#endif +#endif // PDFSDK_HEADERS_PDFIUM_H diff --git a/frmts/pdf/pdfsdk_headers_podofo.h b/frmts/pdf/pdfsdk_headers_podofo.h new file mode 100644 index 000000000000..9f52b838c46e --- /dev/null +++ b/frmts/pdf/pdfsdk_headers_podofo.h @@ -0,0 +1,80 @@ +/****************************************************************************** + * + * Project: GDAL + * Purpose: Includes PoDoFo headers + * Author: Even Rouault + * + ****************************************************************************** + * Copyright (c) 2015, Even Rouault + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + *****************************************************************************/ + +#ifndef PDFSDK_HEADERS_PODOFO_H +#define PDFSDK_HEADERS_PODOFO_H + +#if defined(__GNUC__) && !defined(_MSC_VER) +#pragma GCC system_header +#endif + +#ifdef HAVE_PODOFO + +#ifdef _WIN32 +/* + * Some Windows header defines a GetObject macro that + * shadows a GetObject() method in PoDoFo. As pdfdataset.cpp includes cpl_spawn.h + * which includes windows.h, so let's bite the bullet and important windows.h + * right now, and then undef GetObject. Undef'ing GetObject is done in some + * source files of PoDoFo itself. + */ +#include +#ifdef GetObject +#undef GetObject +#endif +#endif + +// Related fix submitted per https://github.com/podofo/podofo/pull/98 +#ifdef HAVE_PODOFO_0_10_OR_LATER +#define USE_HACK_BECAUSE_PdfInputStream_constructor_is_not_exported_in_podofo_0_11 +#endif + +#ifdef USE_HACK_BECAUSE_PdfInputStream_constructor_is_not_exported_in_podofo_0_11 +// If we is included after our below #define private public errors out +// with an error like: +// /usr/include/c++/13.2.1/sstream:457:7: error: 'struct std::__cxx11::basic_stringbuf<_CharT, _Traits, _Alloc>::__xfer_bufptrs' redeclared with different access +// 457 | struct __xfer_bufptrs +// so include it before, as otherwise it would get indirectly included by +// PdfDate.h, which includes , which includes +#include +// Ugly! PfdObjectStream::GetParent() is private but we need it... +#define private public +#endif +#include "podofo.h" +#ifdef private +#undef private +#endif + +#if PODOFO_VERSION_MAJOR > 0 || \ + (PODOFO_VERSION_MAJOR == 0 && PODOFO_VERSION_MINOR >= 10) +#define PdfVecObjects PdfIndirectObjectList +#endif + +#endif // HAVE_PODOFO + +#endif // PDFSDK_HEADERS_PODOFO_H diff --git a/frmts/pdf/pdfsdk_headers_poppler.h b/frmts/pdf/pdfsdk_headers_poppler.h new file mode 100644 index 000000000000..2705c4ee89d7 --- /dev/null +++ b/frmts/pdf/pdfsdk_headers_poppler.h @@ -0,0 +1,88 @@ +/****************************************************************************** + * + * Project: GDAL + * Purpose: Includes Poppler headers + * Author: Even Rouault + * + ****************************************************************************** + * Copyright (c) 2015, Even Rouault + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + *****************************************************************************/ + +#ifndef PDFSDK_HEADERS_POPPLER_H +#define PDFSDK_HEADERS_POPPLER_H + +#if defined(__GNUC__) && !defined(_MSC_VER) +#pragma GCC system_header +#endif + +#ifdef HAVE_POPPLER + +/* Horrible hack because there's a conflict between struct FlateDecode of */ +/* include/poppler/Stream.h and the FlateDecode() function of */ +/* pdfium/core/include/fpdfapi/fpdf_parser.h. */ +/* The part of Stream.h where struct FlateDecode is defined isn't needed */ +/* by GDAL, and is luckily protected by a #ifndef ENABLE_ZLIB section */ +#ifdef HAVE_PDFIUM +#define ENABLE_ZLIB +#endif /* HAVE_PDFIUM */ + +#ifdef _MSC_VER +#pragma warning(push) +// conversion from 'const int' to 'Guchar', possible loss of data +#pragma warning(disable : 4244) +// conversion from 'size_t' to 'int', possible loss of data +#pragma warning(disable : 4267) +#endif + +/* begin of poppler xpdf includes */ +#include +#include + +#define private public /* Ugly! Page::pageObj is private but we need it... */ +#include +#undef private + +#include + +#define private \ + public /* Ugly! Catalog::optContent is private but we need it... */ +#include +#undef private + +#define private public /* Ugly! PDFDoc::str is private but we need it... */ +#include +#undef private + +#include +#include +#include +#include +#include + +/* end of poppler xpdf includes */ + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif /* HAVE_POPPLER */ + +#endif // PDFSDK_HEADERS_POPPLER_H From c81d1a852b6ac8f2f30207616dd1b600425fab0d Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 22 May 2024 20:18:58 +0200 Subject: [PATCH 037/191] ogr_sql_test.py: enable exceptions, and parametrize more tests --- autotest/ogr/ogr_sql_test.py | 147 +++++++++++++++++------------------ 1 file changed, 73 insertions(+), 74 deletions(-) diff --git a/autotest/ogr/ogr_sql_test.py b/autotest/ogr/ogr_sql_test.py index 187c8e5795d9..0b101b8b7b4b 100755 --- a/autotest/ogr/ogr_sql_test.py +++ b/autotest/ogr/ogr_sql_test.py @@ -36,14 +36,6 @@ from osgeo import gdal, ogr - -############################################################################### -@pytest.fixture(autouse=True, scope="module") -def module_disable_exceptions(): - with gdaltest.disable_exceptions(): - yield - - ############################################################################### # Test ExecuteSQL() @@ -497,16 +489,6 @@ def test_ogr_sql_17(): ogrtest.check_features_against_list(sql_lyr, "fid", expect) -############################################################################### -# Test empty request string - - -def test_ogr_sql_19(data_ds): - - with gdal.quiet_errors(): - assert data_ds.ExecuteSQL("") is None - - ############################################################################### # Test query "SELECT * from my_layer" on layer without any field (#2788) @@ -686,7 +668,8 @@ def test_ogr_sql_27(): # code from the grammar -def test_ogr_sql_28(): +@pytest.fixture(scope="module") +def ds_for_invalid_statements(): ds = ogr.GetDriverByName("Memory").CreateDataSource("my_ds") lyr = ds.CreateLayer("my_layer") @@ -710,10 +693,13 @@ def test_ogr_sql_28(): field_defn = ogr.FieldDefn("strfield2", ogr.OFTString) lyr.CreateField(field_defn) - with pytest.raises(Exception): - sql_lyr = ds.ExecuteSQL(None) + yield ds + - queries = [ +@pytest.mark.parametrize( + "sql", + [ + None, "", "1", "*", @@ -841,19 +827,12 @@ def test_ogr_sql_28(): "SELECT * FROM my_layer UNION ALL SELECT", "SELECT * FROM my_layer UNION ALL SELECT *", "SELECT * FROM my_layer UNION ALL SELECT * FROM", - ] - - for query in queries: - gdal.ErrorReset() - # print query - with gdal.quiet_errors(): - sql_lyr = ds.ExecuteSQL(query) - if sql_lyr is not None: - ds.ReleaseResultSet(sql_lyr) - pytest.fail('expected None result on "%s"' % query) - assert gdal.GetLastErrorType() != 0, 'expected error on "%s"' % query + ], +) +def test_ogr_sql_invalid_statements(ds_for_invalid_statements, sql): - ds = None + with pytest.raises(Exception): + ds_for_invalid_statements.ExecuteSQL(None) ############################################################################### @@ -1019,11 +998,8 @@ def test_ogr_sql_34(data_ds): assert val == 1 - with gdal.quiet_errors(): - assert ( - data_ds.ExecuteSQL("select count(*) from poly where eas_id in ('a165')") - is None - ) + with pytest.raises(Exception): + data_ds.ExecuteSQL("select count(*) from poly where eas_id in ('a165')") ############################################################################### @@ -1231,19 +1207,23 @@ def test_ogr_sql_43(data_ds): # Test hstore_get_value() -def test_ogr_sql_44(data_ds): - - # Invalid parameters - for sql in [ +@pytest.mark.parametrize( + "sql", + [ "SELECT hstore_get_value('a') FROM poly", "SELECT hstore_get_value(1, 1) FROM poly", - ]: - with gdal.quiet_errors(): - sql_lyr = data_ds.ExecuteSQL(sql) - assert sql_lyr is None, sql + ], +) +def test_ogr_sql_hstore_get_value_invalid_parameters(data_ds, sql): - # Invalid hstore syntax or empty result - for sql in [ + # Invalid parameters + with pytest.raises(Exception): + data_ds.ExecuteSQL(sql) + + +@pytest.mark.parametrize( + "sql", + [ "SELECT hstore_get_value('a', null) FROM poly", "SELECT hstore_get_value(null, 'a') FROM poly", "SELECT hstore_get_value('a', 'a') FROM poly", @@ -1258,13 +1238,19 @@ def test_ogr_sql_44(data_ds): "SELECT hstore_get_value('\"a\" => ', 'a') FROM poly", "SELECT hstore_get_value('\"a\" => \"', 'a') FROM poly", "SELECT hstore_get_value('\"a\" => \"\" z', 'a') FROM poly", - ]: - with data_ds.ExecuteSQL(sql) as sql_lyr: - f = sql_lyr.GetNextFeature() - assert not f.IsFieldSetAndNotNull(0), sql + ], +) +def test_ogr_sql_hstore_get_value_invalid_hstore_syntax_or_empty_result(data_ds, sql): - # Valid hstore syntax - for (sql, expected) in [ + # Invalid hstore syntax or empty result + with data_ds.ExecuteSQL(sql) as sql_lyr: + f = sql_lyr.GetNextFeature() + assert not f.IsFieldSetAndNotNull(0), sql + + +@pytest.mark.parametrize( + "sql,expected", + [ ("SELECT hstore_get_value('a=>b', 'a') FROM poly", "b"), ("SELECT hstore_get_value(' a => b ', 'a') FROM poly", "b"), ("SELECT hstore_get_value('\"a\"=>b', 'a') FROM poly", "b"), @@ -1274,10 +1260,14 @@ def test_ogr_sql_44(data_ds): ("SELECT hstore_get_value('\"a\"=>\"b\"', 'a') FROM poly", "b"), ("SELECT hstore_get_value(' \"a\" => \"b\" ', 'a') FROM poly", "b"), ('SELECT hstore_get_value(\' "a\\"b" => "b" \', \'a"b\') FROM poly', "b"), - ]: - with data_ds.ExecuteSQL(sql) as sql_lyr: - f = sql_lyr.GetNextFeature() - assert f.GetField(0) == expected, sql + ], +) +def test_ogr_sql_hstore_get_value_valid(data_ds, sql, expected): + + # Valid hstore syntax + with data_ds.ExecuteSQL(sql) as sql_lyr: + f = sql_lyr.GetNextFeature() + assert f.GetField(0) == expected, sql ############################################################################### @@ -1317,7 +1307,8 @@ def test_ogr_sql_45(): # Test strict SQL quoting -def test_ogr_sql_46(): +@pytest.fixture(scope="module") +def ogr_sql_strit_quoting_ds(): ds = ogr.GetDriverByName("Memory").CreateDataSource("test") lyr = ds.CreateLayer("test") @@ -1331,9 +1322,13 @@ def test_ogr_sql_46(): feat.SetField(0, 3) feat.SetField(1, "from") lyr.CreateFeature(feat) - feat = None - with ds.ExecuteSQL( + return ds + + +def test_ogr_sql_strict_quoting_non_aggregate(ogr_sql_strit_quoting_ds): + + with ogr_sql_strit_quoting_ds.ExecuteSQL( 'select id, \'id\', "id" as id2, id as "id3", "from" from test where "from" = \'from\'' ) as sql_lyr: feat = sql_lyr.GetNextFeature() @@ -1346,7 +1341,10 @@ def test_ogr_sql_46(): feat = sql_lyr.GetNextFeature() assert feat is None - with ds.ExecuteSQL( + +def test_ogr_sql_strict_quoting_aggregate(ogr_sql_strit_quoting_ds): + + with ogr_sql_strit_quoting_ds.ExecuteSQL( 'select max("id"), max(id), count("id"), count(id) from "test"' ) as sql_lyr: feat = sql_lyr.GetNextFeature() @@ -1355,16 +1353,20 @@ def test_ogr_sql_46(): assert feat.GetField(2) == 2 assert feat.GetField(3) == 2 - # Not accepted - for sql in [ + +@pytest.mark.parametrize( + "sql", + [ "select * from 'test'", "select distinct 'id' from 'test'", "select max('id') from 'test'", "select id as 'id2' from 'test'", - ]: - with gdal.quiet_errors(): - sql_lyr = ds.ExecuteSQL("select * from 'test'") - assert sql_lyr is None, sql + ], +) +def test_ogr_sql_strict_quoting_errors(ogr_sql_strit_quoting_ds, sql): + + with pytest.raises(Exception): + ogr_sql_strit_quoting_ds.ExecuteSQL(sql) ############################################################################### @@ -1598,9 +1600,8 @@ def test_ogr_sql_min_max_string_field(): ], ) def test_ogr_sql_select_except_errors(data_ds, body): - with gdal.quiet_errors(): - lyr = data_ds.ExecuteSQL(f"SELECT * EXCEPT {body} FROM poly") - assert lyr is None + with pytest.raises(Exception): + data_ds.ExecuteSQL(f"SELECT * EXCEPT {body} FROM poly") def test_ogr_sql_select_except_attrs(data_ds): @@ -1865,7 +1866,6 @@ def test_ogr_sql_ilike_utf8(): # Test error on setting a spatial filter during ExecuteSQL -@gdaltest.enable_exceptions() def test_ogr_sql_test_execute_sql_error_on_spatial_filter_mem_layer(): ds = ogr.GetDriverByName("Memory").CreateDataSource("") @@ -1906,7 +1906,6 @@ def get_available_dialects(): return [None, "SQLite"] if ogr.GetDriverByName("SQLite") else [None] -@gdaltest.enable_exceptions() @pytest.mark.parametrize( "where,feature_count", [ # intfield From 89f0e521c3c6aaf1adbc93c9768f1e26706f94d5 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 22 May 2024 22:45:40 +0200 Subject: [PATCH 038/191] OGRGenSQLResultsLayer: modernize C++ usage --- autotest/gdrivers/data/s102/test_s102_v2.1.h5 | Bin 12984 -> 12872 bytes autotest/gdrivers/data/s102/test_s102_v2.2.h5 | Bin 13056 -> 12960 bytes gcore/gdaldataset.cpp | 13 +- ogr/ogrsf_frmts/generic/ogr_gensql.cpp | 645 ++++++++---------- ogr/ogrsf_frmts/generic/ogr_gensql.h | 45 +- 5 files changed, 326 insertions(+), 377 deletions(-) diff --git a/autotest/gdrivers/data/s102/test_s102_v2.1.h5 b/autotest/gdrivers/data/s102/test_s102_v2.1.h5 index ea3bfdc44d1ca2cadf57edf30d89fe81e7ba33a2..3d6c40ba2bb933724f53d2983bb87ee02325cbf3 100644 GIT binary patch delta 336 zcmdmydLm_l2BXJBO-luC21W)31_>Yr0RbQcfd+Xn#lXNc*^6CmW`1wG$iFHy5xUVwPvas&0ZLM4iXxK)rMAo&+e`bxP2*;A#QCx4KV0kU|dL2|KRaxIwr3nqPK!0Ke2C%=%9;Doz% zGpDRQ6QjmtPx*Y_9S}Pico;kw7$zT;2N@))5Dp}B73_iJT7`HZ$*bth%EJ%R delta 370 zcmX?+vLkhZ2IG#2nwIjs42%p63=%*L0s=q?0xRUf6vK|mf!yjFR}`>HzL0^4fK-Ek zfGmVIm~6=QQ=Sv6DIcUDYJPxB*<8SWh*_Q;tHKGA5QPbw1G%5G2naE7fcYT1Dktit zZuSsJ;bdf)yjId5Nd5(rzEbW$_EafnW}sUpZWNt7L6T$g11S-p;8!V-GFNF3ITcJk z1(T*SAT6~r&XX+!1STsmOK`$HvH78lJriTVWJURWUJa-hc^EtxSSHVv2O0BLJ{)AE tf<2IIRfq?YPZdBmb4=W5yLp15lmK%8P_3?FFe^mYT0^i)4ZWeR0|1GXMWX-! diff --git a/autotest/gdrivers/data/s102/test_s102_v2.2.h5 b/autotest/gdrivers/data/s102/test_s102_v2.2.h5 index 3459de09da0ff04be316cdcc8895dbdff388efe7..3af45a901e8e71b4b5b64e924ef4a07da97352d9 100644 GIT binary patch delta 345 zcmZojTaY?IgK2@$L@f&iUIs=61_lWr1_1#e1c3&5FvTFiG}((?ZR3gpR>=c05FwCq z5O@LQb4)g5`zgVqABt)IZ=0NV}ER%1@2uyy#DX=+0 zAcT{VVe(o@e<1l6O!`W>1KCrhoF{*fk^!=Kr9pDBU~(;({0k<1Wx(oWoF~7Kk>G^8 zbu*`|JrkqGWKa2gUID02co;kw7$zT;2N@))5Dp}B73_iJS}^$*OzJ8+v+^)F0M)HE ablyBcQA&V00Lb}j2vY8A1kyU!$QuA7K|(wL delta 383 zcmZ3G+K@IugNea-qLzg`KLaBJ1A_z*gMa`Kg1`!SFvW0Vav-<*#uWvuk}qT+A|TZu zARr5&4JI41{gmg#YRU&Gh?*ZDQ#Kc{A7Ykg$Et9GBt&7t=0NV}ECNCd9AG}kuF8pe zshd3nQaBk|Ca;zB2aHPmtu8{6I psSelectInfoUnique(psSelectInfo); + std::unique_ptr poResults; GDALSQLParseInfo *psParseInfo = - BuildParseInfo(psSelectInfo, poSelectParseOptions); + BuildParseInfo(psSelectInfoUnique.get(), poSelectParseOptions); if (psParseInfo) { const auto nErrorCounter = CPLGetErrorCounter(); poResults = std::make_unique( - this, psSelectInfo, poSpatialFilter, psParseInfo->pszWHERE, - pszDialect); + this, std::move(psSelectInfoUnique), poSpatialFilter, + psParseInfo->pszWHERE, pszDialect); if (CPLGetErrorCounter() > nErrorCounter && CPLGetLastErrorType() != CE_None) poResults.reset(); } - else - { - delete psSelectInfo; - } + DestroyParseInfo(psParseInfo); return poResults.release(); diff --git a/ogr/ogrsf_frmts/generic/ogr_gensql.cpp b/ogr/ogrsf_frmts/generic/ogr_gensql.cpp index dc618451b357..b67645c67f77 100644 --- a/ogr/ogrsf_frmts/generic/ogr_gensql.cpp +++ b/ogr/ogrsf_frmts/generic/ogr_gensql.cpp @@ -86,36 +86,30 @@ static bool OGRGenSQLResultsLayerHasSpecialField(swq_expr_node *expr, /* OGRGenSQLResultsLayer() */ /************************************************************************/ -OGRGenSQLResultsLayer::OGRGenSQLResultsLayer(GDALDataset *poSrcDSIn, - void *pSelectInfoIn, - OGRGeometry *poSpatFilter, - const char *pszWHEREIn, - const char *pszDialect) - : poSrcDS(poSrcDSIn), poSrcLayer(nullptr), pSelectInfo(pSelectInfoIn), - papoTableLayers(nullptr), poDefn(nullptr), - panGeomFieldToSrcGeomField(nullptr), nIndexSize(0), panFIDIndex(nullptr), - bOrderByValid(FALSE), nNextIndexFID(0), poSummaryFeature(nullptr), - iFIDFieldIndex(), nExtraDSCount(0), papoExtraDS(nullptr), - nIteratedFeatures(-1), m_oDistinctList{} +OGRGenSQLResultsLayer::OGRGenSQLResultsLayer( + GDALDataset *poSrcDSIn, std::unique_ptr &&pSelectInfo, + const OGRGeometry *poSpatFilter, const char *pszWHEREIn, + const char *pszDialect) + : m_poSrcDS(poSrcDSIn), m_pSelectInfo(std::move(pSelectInfo)) { - swq_select *psSelectInfo = static_cast(pSelectInfoIn); + swq_select *psSelectInfo = m_pSelectInfo.get(); /* -------------------------------------------------------------------- */ /* Identify all the layers involved in the SELECT. */ /* -------------------------------------------------------------------- */ - papoTableLayers = static_cast( - CPLCalloc(sizeof(OGRLayer *), psSelectInfo->table_count)); + m_apoTableLayers.reserve(psSelectInfo->table_count); for (int iTable = 0; iTable < psSelectInfo->table_count; iTable++) { swq_table_def *psTableDef = psSelectInfo->table_defs + iTable; - GDALDataset *poTableDS = poSrcDS; + GDALDataset *poTableDS = m_poSrcDS; if (psTableDef->data_source != nullptr) { - poTableDS = GDALDataset::Open(psTableDef->data_source, - GDAL_OF_VECTOR | GDAL_OF_SHARED); - if (poTableDS == nullptr) + std::unique_ptr poNewDS( + GDALDataset::Open(psTableDef->data_source, + GDAL_OF_VECTOR | GDAL_OF_SHARED)); + if (!poNewDS) { if (strlen(CPLGetLastErrorMsg()) == 0) CPLError(CE_Failure, CPLE_AppDefined, @@ -125,23 +119,18 @@ OGRGenSQLResultsLayer::OGRGenSQLResultsLayer(GDALDataset *poSrcDSIn, return; } - papoExtraDS = static_cast( - CPLRealloc(papoExtraDS, sizeof(void *) * ++nExtraDSCount)); - - papoExtraDS[nExtraDSCount - 1] = poTableDS; + m_apoExtraDS.emplace_back(std::move(poNewDS)); + poTableDS = m_apoExtraDS.back().get(); } - papoTableLayers[iTable] = - poTableDS->GetLayerByName(psTableDef->table_name); - - CPLAssert(papoTableLayers[iTable] != nullptr); - - if (papoTableLayers[iTable] == nullptr) + m_apoTableLayers.push_back( + poTableDS->GetLayerByName(psTableDef->table_name)); + if (!m_apoTableLayers.back()) return; } - poSrcLayer = papoTableLayers[0]; - SetMetadata(poSrcLayer->GetMetadata("NATIVE_DATA"), "NATIVE_DATA"); + m_poSrcLayer = m_apoTableLayers[0]; + SetMetadata(m_poSrcLayer->GetMetadata("NATIVE_DATA"), "NATIVE_DATA"); /* -------------------------------------------------------------------- */ /* If the user has explicitly requested a OGRSQL dialect, then */ @@ -155,7 +144,7 @@ OGRGenSQLResultsLayer::OGRGenSQLResultsLayer(GDALDataset *poSrcDSIn, EQUAL(pszDialect, "OGRSQL")) { const int nMinIndexForSpecialField = - poSrcLayer->GetLayerDefn()->GetFieldCount(); + m_poSrcLayer->GetLayerDefn()->GetFieldCount(); m_bForwardWhereToSourceLayer = !OGRGenSQLResultsLayerHasSpecialField(psSelectInfo->where_expr, nMinIndexForSpecialField); @@ -166,18 +155,14 @@ OGRGenSQLResultsLayer::OGRGenSQLResultsLayer(GDALDataset *poSrcDSIn, /* -------------------------------------------------------------------- */ /* Prepare a feature definition based on the query. */ /* -------------------------------------------------------------------- */ - OGRFeatureDefn *poSrcDefn = poSrcLayer->GetLayerDefn(); + OGRFeatureDefn *poSrcDefn = m_poSrcLayer->GetLayerDefn(); - poDefn = new OGRFeatureDefn(psSelectInfo->table_defs[0].table_alias); - SetDescription(poDefn->GetName()); - poDefn->SetGeomType(wkbNone); - poDefn->Reference(); + m_poDefn = new OGRFeatureDefn(psSelectInfo->table_defs[0].table_alias); + SetDescription(m_poDefn->GetName()); + m_poDefn->SetGeomType(wkbNone); + m_poDefn->Reference(); - iFIDFieldIndex = poSrcDefn->GetFieldCount(); - - /* + 1 since we can add an implicit geometry field */ - panGeomFieldToSrcGeomField = static_cast( - CPLMalloc(sizeof(int) * (1 + psSelectInfo->column_defs.size()))); + m_iFIDFieldIndex = poSrcDefn->GetFieldCount(); for (std::size_t iField = 0; iField < psSelectInfo->column_defs.size(); iField++) @@ -193,7 +178,7 @@ OGRGenSQLResultsLayer::OGRGenSQLResultsLayer(GDALDataset *poSrcDSIn, if (psColDef->table_index != -1) poLayerDefn = - papoTableLayers[psColDef->table_index]->GetLayerDefn(); + m_apoTableLayers[psColDef->table_index]->GetLayerDefn(); if (psColDef->field_index > -1 && poLayerDefn != nullptr && psColDef->field_index < poLayerDefn->GetFieldCount()) @@ -221,7 +206,7 @@ OGRGenSQLResultsLayer::OGRGenSQLResultsLayer(GDALDataset *poSrcDSIn, CPLFree(psColDef->field_name); psColDef->field_name = static_cast(CPLMalloc(40)); snprintf(psColDef->field_name, 40, "FIELD_%d", - poDefn->GetFieldCount() + 1); + m_poDefn->GetFieldCount() + 1); } if (psColDef->field_alias != nullptr) @@ -289,9 +274,9 @@ OGRGenSQLResultsLayer::OGRGenSQLResultsLayer(GDALDataset *poSrcDSIn, oGFDefn.SetType(poSrcGFDefn->GetType()); oGFDefn.SetSpatialRef(poSrcGFDefn->GetSpatialRef()); } - else if (psColDef->field_index >= iFIDFieldIndex) + else if (psColDef->field_index >= m_iFIDFieldIndex) { - switch (SpecialFieldTypes[psColDef->field_index - iFIDFieldIndex]) + switch (SpecialFieldTypes[psColDef->field_index - m_iFIDFieldIndex]) { case SWQ_INTEGER: oFDefn.SetType(OFTInteger); @@ -306,9 +291,9 @@ OGRGenSQLResultsLayer::OGRGenSQLResultsLayer(GDALDataset *poSrcDSIn, oFDefn.SetType(OFTString); break; } - if (psColDef->field_index - iFIDFieldIndex == SPF_FID && - poSrcLayer->GetMetadataItem(OLMD_FID64) != nullptr && - EQUAL(poSrcLayer->GetMetadataItem(OLMD_FID64), "YES")) + if (psColDef->field_index - m_iFIDFieldIndex == SPF_FID && + m_poSrcLayer->GetMetadataItem(OLMD_FID64) != nullptr && + EQUAL(m_poSrcLayer->GetMetadataItem(OLMD_FID64), "YES")) { oFDefn.SetType(OFTInteger64); } @@ -393,15 +378,14 @@ OGRGenSQLResultsLayer::OGRGenSQLResultsLayer(GDALDataset *poSrcDSIn, if (bIsGeometry) { - panGeomFieldToSrcGeomField[poDefn->GetGeomFieldCount()] = - iSrcGeomField; + m_anGeomFieldToSrcGeomField.push_back(iSrcGeomField); /* Hack while drivers haven't been updated so that */ /* poSrcDefn->GetGeomFieldDefn(0)->GetSpatialRef() == - * poSrcLayer->GetSpatialRef() */ + * m_poSrcLayer->GetSpatialRef() */ if (iSrcGeomField == 0 && poSrcDefn->GetGeomFieldCount() == 1 && oGFDefn.GetSpatialRef() == nullptr) { - oGFDefn.SetSpatialRef(poSrcLayer->GetSpatialRef()); + oGFDefn.SetSpatialRef(m_poSrcLayer->GetSpatialRef()); } int bForceGeomType = FALSE; if (psColDef->eGeomType != wkbUnknown) @@ -423,17 +407,17 @@ OGRGenSQLResultsLayer::OGRGenSQLResultsLayer(GDALDataset *poSrcDSIn, auto poMyGeomFieldDefn = std::make_unique(&oGFDefn); poMyGeomFieldDefn->bForceGeomType = bForceGeomType; - poDefn->AddGeomFieldDefn(std::move(poMyGeomFieldDefn)); + m_poDefn->AddGeomFieldDefn(std::move(poMyGeomFieldDefn)); } else - poDefn->AddFieldDefn(&oFDefn); + m_poDefn->AddFieldDefn(&oFDefn); } /* -------------------------------------------------------------------- */ /* Add implicit geometry field. */ /* -------------------------------------------------------------------- */ if (psSelectInfo->query_mode == SWQM_RECORDSET && - poDefn->GetGeomFieldCount() == 0 && + m_poDefn->GetGeomFieldCount() == 0 && poSrcDefn->GetGeomFieldCount() == 1 && !psSelectInfo->bExcludedGeometry) { psSelectInfo->column_defs.emplace_back(); @@ -454,27 +438,28 @@ OGRGenSQLResultsLayer::OGRGenSQLResultsLayer(GDALDataset *poSrcDSIn, col_def->field_type = SWQ_GEOMETRY; col_def->target_type = SWQ_GEOMETRY; - panGeomFieldToSrcGeomField[poDefn->GetGeomFieldCount()] = 0; + m_anGeomFieldToSrcGeomField.push_back(0); - poDefn->AddGeomFieldDefn(std::make_unique( + m_poDefn->AddGeomFieldDefn(std::make_unique( poSrcDefn->GetGeomFieldDefn(0))); /* Hack while drivers haven't been updated so that */ /* poSrcDefn->GetGeomFieldDefn(0)->GetSpatialRef() == - * poSrcLayer->GetSpatialRef() */ + * m_poSrcLayer->GetSpatialRef() */ if (poSrcDefn->GetGeomFieldDefn(0)->GetSpatialRef() == nullptr) { - poDefn->GetGeomFieldDefn(0)->SetSpatialRef( - poSrcLayer->GetSpatialRef()); + m_poDefn->GetGeomFieldDefn(0)->SetSpatialRef( + m_poSrcLayer->GetSpatialRef()); } } /* -------------------------------------------------------------------- */ - /* Now that we have poSrcLayer, we can install a spatial filter */ + /* Now that we have m_poSrcLayer, we can install a spatial filter */ /* if there is one. */ /* -------------------------------------------------------------------- */ - if (poSpatFilter != nullptr) - OGRGenSQLResultsLayer::SetSpatialFilter(0, poSpatFilter); + if (poSpatFilter) + OGRGenSQLResultsLayer::SetSpatialFilter( + 0, const_cast(poSpatFilter)); OGRGenSQLResultsLayer::ResetReading(); @@ -491,38 +476,18 @@ OGRGenSQLResultsLayer::OGRGenSQLResultsLayer(GDALDataset *poSrcDSIn, OGRGenSQLResultsLayer::~OGRGenSQLResultsLayer() { - if (m_nFeaturesRead > 0 && poDefn != nullptr) + if (m_nFeaturesRead > 0 && m_poDefn != nullptr) { CPLDebug("GenSQL", CPL_FRMT_GIB " features read on layer '%s'.", - m_nFeaturesRead, poDefn->GetName()); + m_nFeaturesRead, m_poDefn->GetName()); } OGRGenSQLResultsLayer::ClearFilters(); - /* -------------------------------------------------------------------- */ - /* Free various datastructures. */ - /* -------------------------------------------------------------------- */ - CPLFree(papoTableLayers); - papoTableLayers = nullptr; - - CPLFree(panFIDIndex); - CPLFree(panGeomFieldToSrcGeomField); - - delete poSummaryFeature; - delete static_cast(pSelectInfo); - - if (poDefn != nullptr) + if (m_poDefn != nullptr) { - poDefn->Release(); + m_poDefn->Release(); } - - /* -------------------------------------------------------------------- */ - /* Release any additional datasources being used in joins. */ - /* -------------------------------------------------------------------- */ - for (int iEDS = 0; iEDS < nExtraDSCount; iEDS++) - GDALClose(GDALDataset::ToHandle(papoExtraDS[iEDS])); - - CPLFree(papoExtraDS); } /************************************************************************/ @@ -539,17 +504,17 @@ void OGRGenSQLResultsLayer::ClearFilters() /* -------------------------------------------------------------------- */ /* Clear any filters installed on the target layer. */ /* -------------------------------------------------------------------- */ - if (poSrcLayer != nullptr) + if (m_poSrcLayer != nullptr) { - poSrcLayer->ResetReading(); - poSrcLayer->SetAttributeFilter(""); - poSrcLayer->SetSpatialFilter(nullptr); + m_poSrcLayer->ResetReading(); + m_poSrcLayer->SetAttributeFilter(""); + m_poSrcLayer->SetSpatialFilter(nullptr); } /* -------------------------------------------------------------------- */ /* Clear any attribute filter installed on the joined layers. */ /* -------------------------------------------------------------------- */ - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); if (psSelectInfo != nullptr) { @@ -557,7 +522,7 @@ void OGRGenSQLResultsLayer::ClearFilters() { swq_join_def *psJoinInfo = psSelectInfo->join_defs + iJoin; OGRLayer *poJoinLayer = - papoTableLayers[psJoinInfo->secondary_table]; + m_apoTableLayers[psJoinInfo->secondary_table]; poJoinLayer->SetAttributeFilter(""); } @@ -570,7 +535,7 @@ void OGRGenSQLResultsLayer::ClearFilters() { for (int iTable = 0; iTable < psSelectInfo->table_count; iTable++) { - OGRLayer *poLayer = papoTableLayers[iTable]; + OGRLayer *poLayer = m_apoTableLayers[iTable]; poLayer->SetIgnoredFields(nullptr); } } @@ -586,7 +551,7 @@ int OGRGenSQLResultsLayer::MustEvaluateSpatialFilterOnGenSQL() if (m_poFilterGeom != nullptr && m_iGeomFieldFilter >= 0 && m_iGeomFieldFilter < GetLayerDefn()->GetGeomFieldCount()) { - int iSrcGeomField = panGeomFieldToSrcGeomField[m_iGeomFieldFilter]; + int iSrcGeomField = m_anGeomFieldToSrcGeomField[m_iGeomFieldFilter]; if (iSrcGeomField < 0) bEvaluateSpatialFilter = TRUE; } @@ -601,21 +566,21 @@ void OGRGenSQLResultsLayer::ApplyFiltersToSource() { if (m_bForwardWhereToSourceLayer && !m_osInitialWHERE.empty()) { - poSrcLayer->SetAttributeFilter(m_osInitialWHERE.c_str()); + m_poSrcLayer->SetAttributeFilter(m_osInitialWHERE.c_str()); } else { - poSrcLayer->SetAttributeFilter(nullptr); + m_poSrcLayer->SetAttributeFilter(nullptr); } if (m_iGeomFieldFilter >= 0 && m_iGeomFieldFilter < GetLayerDefn()->GetGeomFieldCount()) { - int iSrcGeomField = panGeomFieldToSrcGeomField[m_iGeomFieldFilter]; + int iSrcGeomField = m_anGeomFieldToSrcGeomField[m_iGeomFieldFilter]; if (iSrcGeomField >= 0) - poSrcLayer->SetSpatialFilter(iSrcGeomField, m_poFilterGeom); + m_poSrcLayer->SetSpatialFilter(iSrcGeomField, m_poFilterGeom); } - poSrcLayer->ResetReading(); + m_poSrcLayer->ResetReading(); } /************************************************************************/ @@ -625,15 +590,15 @@ void OGRGenSQLResultsLayer::ApplyFiltersToSource() void OGRGenSQLResultsLayer::ResetReading() { - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); if (psSelectInfo->query_mode == SWQM_RECORDSET) { ApplyFiltersToSource(); } - nNextIndexFID = psSelectInfo->offset; - nIteratedFeatures = -1; + m_nNextIndexFID = psSelectInfo->offset; + m_nIteratedFeatures = -1; m_bEOF = false; } @@ -650,12 +615,12 @@ OGRErr OGRGenSQLResultsLayer::SetNextByIndex(GIntBig nIndex) if (nIndex < 0) return OGRERR_FAILURE; - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); if (psSelectInfo->limit >= 0) { - nIteratedFeatures = nIndex; - if (nIteratedFeatures >= psSelectInfo->limit) + m_nIteratedFeatures = nIndex; + if (m_nIteratedFeatures >= psSelectInfo->limit) { return OGRERR_FAILURE; } @@ -669,15 +634,15 @@ OGRErr OGRGenSQLResultsLayer::SetNextByIndex(GIntBig nIndex) return OGRERR_FAILURE; } if (psSelectInfo->query_mode == SWQM_SUMMARY_RECORD || - psSelectInfo->query_mode == SWQM_DISTINCT_LIST || - panFIDIndex != nullptr) + psSelectInfo->query_mode == SWQM_DISTINCT_LIST || !m_anFIDIndex.empty()) { - nNextIndexFID = nIndex + psSelectInfo->offset; + m_nNextIndexFID = nIndex + psSelectInfo->offset; return OGRERR_NONE; } else { - OGRErr eErr = poSrcLayer->SetNextByIndex(nIndex + psSelectInfo->offset); + OGRErr eErr = + m_poSrcLayer->SetNextByIndex(nIndex + psSelectInfo->offset); if (eErr != OGRERR_NONE) m_bEOF = true; return eErr; @@ -692,7 +657,7 @@ OGRErr OGRGenSQLResultsLayer::GetExtent(int iGeomField, OGREnvelope *psExtent, int bForce) { - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); if (iGeomField < 0 || iGeomField >= GetLayerDefn()->GetGeomFieldCount() || GetLayerDefn()->GetGeomFieldDefn(iGeomField)->GetType() == wkbNone) @@ -707,9 +672,9 @@ OGRErr OGRGenSQLResultsLayer::GetExtent(int iGeomField, OGREnvelope *psExtent, if (psSelectInfo->query_mode == SWQM_RECORDSET) { - int iSrcGeomField = panGeomFieldToSrcGeomField[iGeomField]; + int iSrcGeomField = m_anGeomFieldToSrcGeomField[iGeomField]; if (iSrcGeomField >= 0) - return poSrcLayer->GetExtent(iSrcGeomField, psExtent, bForce); + return m_poSrcLayer->GetExtent(iSrcGeomField, psExtent, bForce); else if (iGeomField == 0) return OGRLayer::GetExtent(psExtent, bForce); else @@ -726,7 +691,7 @@ OGRErr OGRGenSQLResultsLayer::GetExtent(int iGeomField, OGREnvelope *psExtent, GIntBig OGRGenSQLResultsLayer::GetFeatureCount(int bForce) { - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); CreateOrderByIndex(); @@ -745,7 +710,7 @@ GIntBig OGRGenSQLResultsLayer::GetFeatureCount(int bForce) return 1; else if (m_poAttrQuery == nullptr && !MustEvaluateSpatialFilterOnGenSQL()) { - nRet = poSrcLayer->GetFeatureCount(bForce); + nRet = m_poSrcLayer->GetFeatureCount(bForce); } else { @@ -767,22 +732,22 @@ GIntBig OGRGenSQLResultsLayer::GetFeatureCount(int bForce) int OGRGenSQLResultsLayer::TestCapability(const char *pszCap) { - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); if (EQUAL(pszCap, OLCFastSetNextByIndex)) { if (psSelectInfo->query_mode == SWQM_SUMMARY_RECORD || psSelectInfo->query_mode == SWQM_DISTINCT_LIST || - panFIDIndex != nullptr) + !m_anFIDIndex.empty()) return TRUE; else - return poSrcLayer->TestCapability(pszCap); + return m_poSrcLayer->TestCapability(pszCap); } if (psSelectInfo->query_mode == SWQM_RECORDSET && (EQUAL(pszCap, OLCFastFeatureCount) || EQUAL(pszCap, OLCRandomRead) || EQUAL(pszCap, OLCFastGetExtent))) - return poSrcLayer->TestCapability(pszCap); + return m_poSrcLayer->TestCapability(pszCap); else if (psSelectInfo->query_mode != SWQM_RECORDSET) { @@ -793,7 +758,7 @@ int OGRGenSQLResultsLayer::TestCapability(const char *pszCap) if (EQUAL(pszCap, OLCStringsAsUTF8) || EQUAL(pszCap, OLCCurveGeometries) || EQUAL(pszCap, OLCMeasuredGeometries) || EQUAL(pszCap, OLCZGeometries)) { - return poSrcLayer->TestCapability(pszCap); + return m_poSrcLayer->TestCapability(pszCap); } return FALSE; @@ -809,7 +774,7 @@ int OGRGenSQLResultsLayer::ContainGeomSpecialField(swq_expr_node *expr) { if (expr->table_index == 0 && expr->field_index != -1) { - OGRLayer *poLayer = papoTableLayers[expr->table_index]; + OGRLayer *poLayer = m_apoTableLayers[expr->table_index]; int nSpecialFieldIdx = expr->field_index - poLayer->GetLayerDefn()->GetFieldCount(); if (nSpecialFieldIdx == SPF_OGR_GEOMETRY || @@ -837,16 +802,16 @@ int OGRGenSQLResultsLayer::ContainGeomSpecialField(swq_expr_node *expr) /* PrepareSummary() */ /************************************************************************/ -int OGRGenSQLResultsLayer::PrepareSummary() +bool OGRGenSQLResultsLayer::PrepareSummary() { - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); - if (poSummaryFeature != nullptr) - return TRUE; + if (m_poSummaryFeature) + return true; - poSummaryFeature = new OGRFeature(poDefn); - poSummaryFeature->SetFID(0); + m_poSummaryFeature = std::make_unique(m_poDefn); + m_poSummaryFeature->SetFID(0); /* -------------------------------------------------------------------- */ /* Ensure our query parameters are in place on the source */ @@ -859,7 +824,7 @@ int OGRGenSQLResultsLayer::PrepareSummary() /* the where clause and no column references OGR_GEOMETRY, */ /* OGR_GEOM_WKT or OGR_GEOM_AREA special fields. */ /* -------------------------------------------------------------------- */ - int bSaveIsGeomIgnored = poSrcLayer->GetLayerDefn()->IsGeometryIgnored(); + int bSaveIsGeomIgnored = m_poSrcLayer->GetLayerDefn()->IsGeometryIgnored(); if (m_poFilterGeom == nullptr && (psSelectInfo->where_expr == nullptr || !ContainGeomSpecialField(psSelectInfo->where_expr))) @@ -870,7 +835,7 @@ int OGRGenSQLResultsLayer::PrepareSummary() swq_col_def *psColDef = &psSelectInfo->column_defs[iField]; if (psColDef->table_index == 0 && psColDef->field_index != -1) { - OGRLayer *poLayer = papoTableLayers[psColDef->table_index]; + OGRLayer *poLayer = m_apoTableLayers[psColDef->table_index]; int nSpecialFieldIdx = psColDef->field_index - poLayer->GetLayerDefn()->GetFieldCount(); if (nSpecialFieldIdx == SPF_OGR_GEOMETRY || @@ -896,7 +861,7 @@ int OGRGenSQLResultsLayer::PrepareSummary() } } if (!bFoundGeomExpr) - poSrcLayer->GetLayerDefn()->SetGeometryIgnored(TRUE); + m_poSrcLayer->GetLayerDefn()->SetGeometryIgnored(TRUE); } /* -------------------------------------------------------------------- */ @@ -908,19 +873,19 @@ int OGRGenSQLResultsLayer::PrepareSummary() psSelectInfo->column_defs[0].col_func == SWQCF_COUNT && psSelectInfo->column_defs[0].field_index < 0) { - GIntBig nRes = poSrcLayer->GetFeatureCount(TRUE); - poSummaryFeature->SetField(0, nRes); + GIntBig nRes = m_poSrcLayer->GetFeatureCount(TRUE); + m_poSummaryFeature->SetField(0, nRes); if (CPL_INT64_FITS_ON_INT32(nRes)) { - poDefn->GetFieldDefn(0)->SetType(OFTInteger); - delete poSummaryFeature; - poSummaryFeature = new OGRFeature(poDefn); - poSummaryFeature->SetFID(0); - poSummaryFeature->SetField(0, static_cast(nRes)); + m_poSummaryFeature.reset(); + m_poDefn->GetFieldDefn(0)->SetType(OFTInteger); + m_poSummaryFeature = std::make_unique(m_poDefn); + m_poSummaryFeature->SetFID(0); + m_poSummaryFeature->SetField(0, static_cast(nRes)); } - poSrcLayer->GetLayerDefn()->SetGeometryIgnored(bSaveIsGeomIgnored); + m_poSrcLayer->GetLayerDefn()->SetGeometryIgnored(bSaveIsGeomIgnored); return TRUE; } @@ -929,9 +894,8 @@ int OGRGenSQLResultsLayer::PrepareSummary() /* building facilities of SWQ. */ /* -------------------------------------------------------------------- */ const char *pszError = nullptr; - OGRFeature *poSrcFeature = nullptr; - while ((poSrcFeature = poSrcLayer->GetNextFeature()) != nullptr) + for (auto &&poSrcFeature : *m_poSrcLayer) { for (int iField = 0; iField < psSelectInfo->result_columns(); iField++) { @@ -942,11 +906,11 @@ int OGRGenSQLResultsLayer::PrepareSummary() /* psColDef->field_index can be -1 in the case of a COUNT(*) */ if (psColDef->field_index < 0) pszError = swq_select_summarize(psSelectInfo, iField, ""); - else if (IS_GEOM_FIELD_INDEX(poSrcLayer->GetLayerDefn(), + else if (IS_GEOM_FIELD_INDEX(m_poSrcLayer->GetLayerDefn(), psColDef->field_index)) { int iSrcGeomField = ALL_FIELD_INDEX_TO_GEOM_FIELD_INDEX( - poSrcLayer->GetLayerDefn(), psColDef->field_index); + m_poSrcLayer->GetLayerDefn(), psColDef->field_index); OGRGeometry *poGeom = poSrcFeature->GetGeomFieldRef(iSrcGeomField); if (poGeom != nullptr) @@ -974,22 +938,18 @@ int OGRGenSQLResultsLayer::PrepareSummary() if (pszError != nullptr) { - delete poSrcFeature; - delete poSummaryFeature; - poSummaryFeature = nullptr; + m_poSummaryFeature.reset(); - poSrcLayer->GetLayerDefn()->SetGeometryIgnored( + m_poSrcLayer->GetLayerDefn()->SetGeometryIgnored( bSaveIsGeomIgnored); CPLError(CE_Failure, CPLE_AppDefined, "%s", pszError); - return FALSE; + return false; } } - - delete poSrcFeature; } - poSrcLayer->GetLayerDefn()->SetGeometryIgnored(bSaveIsGeomIgnored); + m_poSrcLayer->GetLayerDefn()->SetGeometryIgnored(bSaveIsGeomIgnored); /* -------------------------------------------------------------------- */ /* Clear away the filters we have installed till a next run through*/ @@ -1014,18 +974,17 @@ int OGRGenSQLResultsLayer::PrepareSummary() { if (CPL_INT64_FITS_ON_INT32(oSummary.count)) { - delete poSummaryFeature; - poSummaryFeature = nullptr; - poDefn->GetFieldDefn(iField)->SetType(OFTInteger); + m_poSummaryFeature.reset(); + m_poDefn->GetFieldDefn(iField)->SetType(OFTInteger); } } } } - if (poSummaryFeature == nullptr) + if (!m_poSummaryFeature) { - poSummaryFeature = new OGRFeature(poDefn); - poSummaryFeature->SetFID(0); + m_poSummaryFeature = std::make_unique(m_poDefn); + m_poSummaryFeature->SetFID(0); } for (int iField = 0; iField < psSelectInfo->result_columns(); iField++) @@ -1046,7 +1005,7 @@ int OGRGenSQLResultsLayer::PrepareSummary() double dfAvg = oSummary.sum / oSummary.count; CPLUnixTimeToYMDHMS(static_cast(dfAvg), &brokendowntime); - poSummaryFeature->SetField( + m_poSummaryFeature->SetField( iField, brokendowntime.tm_year + 1900, brokendowntime.tm_mon + 1, brokendowntime.tm_mday, brokendowntime.tm_hour, brokendowntime.tm_min, @@ -1055,8 +1014,8 @@ int OGRGenSQLResultsLayer::PrepareSummary() 0); } else - poSummaryFeature->SetField(iField, oSummary.sum / - oSummary.count); + m_poSummaryFeature->SetField( + iField, oSummary.sum / oSummary.count); } else if (psColDef->col_func == SWQCF_MIN && oSummary.count > 0) { @@ -1064,10 +1023,10 @@ int OGRGenSQLResultsLayer::PrepareSummary() psColDef->field_type == SWQ_TIME || psColDef->field_type == SWQ_TIMESTAMP || psColDef->field_type == SWQ_STRING) - poSummaryFeature->SetField(iField, - oSummary.osMin.c_str()); + m_poSummaryFeature->SetField(iField, + oSummary.osMin.c_str()); else - poSummaryFeature->SetField(iField, oSummary.min); + m_poSummaryFeature->SetField(iField, oSummary.min); } else if (psColDef->col_func == SWQCF_MAX && oSummary.count > 0) { @@ -1075,18 +1034,18 @@ int OGRGenSQLResultsLayer::PrepareSummary() psColDef->field_type == SWQ_TIME || psColDef->field_type == SWQ_TIMESTAMP || psColDef->field_type == SWQ_STRING) - poSummaryFeature->SetField(iField, - oSummary.osMax.c_str()); + m_poSummaryFeature->SetField(iField, + oSummary.osMax.c_str()); else - poSummaryFeature->SetField(iField, oSummary.max); + m_poSummaryFeature->SetField(iField, oSummary.max); } else if (psColDef->col_func == SWQCF_COUNT) - poSummaryFeature->SetField(iField, oSummary.count); + m_poSummaryFeature->SetField(iField, oSummary.count); else if (psColDef->col_func == SWQCF_SUM && oSummary.count > 0) - poSummaryFeature->SetField(iField, oSummary.sum); + m_poSummaryFeature->SetField(iField, oSummary.sum); } else if (psColDef->col_func == SWQCF_COUNT) - poSummaryFeature->SetField(iField, 0); + m_poSummaryFeature->SetField(iField, 0); } } @@ -1315,7 +1274,7 @@ static CPLString GetFilterForJoin(swq_expr_node *poExpr, OGRFeature *poSrcFeat, OGRFeature *OGRGenSQLResultsLayer::TranslateFeature(OGRFeature *poSrcFeat) { - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); std::vector apoFeatures; if (poSrcFeat == nullptr) @@ -1339,7 +1298,7 @@ OGRFeature *OGRGenSQLResultsLayer::TranslateFeature(OGRFeature *poSrcFeat) /* we have taken care of this */ CPLAssert(psJoinInfo->secondary_table == iJoin + 1); - OGRLayer *poJoinLayer = papoTableLayers[psJoinInfo->secondary_table]; + OGRLayer *poJoinLayer = m_apoTableLayers[psJoinInfo->secondary_table]; osFilter = GetFilterForJoin(psJoinInfo->poExpr, poSrcFeat, poJoinLayer, psJoinInfo->secondary_table); @@ -1364,7 +1323,7 @@ OGRFeature *OGRGenSQLResultsLayer::TranslateFeature(OGRFeature *poSrcFeat) /* -------------------------------------------------------------------- */ /* Create destination feature. */ /* -------------------------------------------------------------------- */ - OGRFeature *poDstFeat = new OGRFeature(poDefn); + OGRFeature *poDstFeat = new OGRFeature(m_poDefn); poDstFeat->SetFID(poSrcFeat->GetFID()); @@ -1502,11 +1461,11 @@ OGRFeature *OGRGenSQLResultsLayer::TranslateFeature(OGRFeature *poSrcFeat) poDstFeat->SetGeomField(iGeomField++, poSrcFeat->GetGeomFieldRef(iSrcGeomField)); } - else if (psColDef->field_index >= iFIDFieldIndex) + else if (psColDef->field_index >= m_iFIDFieldIndex) { CPLAssert(psColDef->field_index < - iFIDFieldIndex + SPECIAL_FIELD_COUNT); - switch (SpecialFieldTypes[psColDef->field_index - iFIDFieldIndex]) + m_iFIDFieldIndex + SPECIAL_FIELD_COUNT); + switch (SpecialFieldTypes[psColDef->field_index - m_iFIDFieldIndex]) { case SWQ_INTEGER: case SWQ_INTEGER64: @@ -1614,22 +1573,23 @@ OGRFeature *OGRGenSQLResultsLayer::TranslateFeature(OGRFeature *poSrcFeat) OGRFeature *OGRGenSQLResultsLayer::GetNextFeature() { - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); if (m_bEOF) return nullptr; if (psSelectInfo->limit >= 0 && - (nIteratedFeatures < 0 ? 0 : nIteratedFeatures) >= psSelectInfo->limit) + (m_nIteratedFeatures < 0 ? 0 : m_nIteratedFeatures) >= + psSelectInfo->limit) return nullptr; CreateOrderByIndex(); - if (panFIDIndex == nullptr && nIteratedFeatures < 0 && + if (m_anFIDIndex.empty() && m_nIteratedFeatures < 0 && psSelectInfo->offset > 0 && psSelectInfo->query_mode == SWQM_RECORDSET) { - poSrcLayer->SetNextByIndex(psSelectInfo->offset); + m_poSrcLayer->SetNextByIndex(psSelectInfo->offset); } - if (nIteratedFeatures < 0) - nIteratedFeatures = 0; + if (m_nIteratedFeatures < 0) + m_nIteratedFeatures = 0; /* -------------------------------------------------------------------- */ /* Handle summary sets. */ @@ -1637,8 +1597,8 @@ OGRFeature *OGRGenSQLResultsLayer::GetNextFeature() if (psSelectInfo->query_mode == SWQM_SUMMARY_RECORD || psSelectInfo->query_mode == SWQM_DISTINCT_LIST) { - nIteratedFeatures++; - return GetFeature(nNextIndexFID++); + m_nIteratedFeatures++; + return GetFeature(m_nNextIndexFID++); } int bEvaluateSpatialFilter = MustEvaluateSpatialFilterOnGenSQL(); @@ -1649,7 +1609,7 @@ OGRFeature *OGRGenSQLResultsLayer::GetNextFeature() while (true) { std::unique_ptr poSrcFeat; - if (panFIDIndex != nullptr) + if (!m_anFIDIndex.empty()) { /* -------------------------------------------------------------------- */ @@ -1659,15 +1619,16 @@ OGRFeature *OGRGenSQLResultsLayer::GetNextFeature() /* -------------------------------------------------------------------- */ - if (nNextIndexFID >= static_cast(nIndexSize)) + if (m_nNextIndexFID >= static_cast(m_anFIDIndex.size())) return nullptr; - poSrcFeat.reset(poSrcLayer->GetFeature(panFIDIndex[nNextIndexFID])); - nNextIndexFID++; + poSrcFeat.reset(m_poSrcLayer->GetFeature( + m_anFIDIndex[static_cast(m_nNextIndexFID)])); + m_nNextIndexFID++; } else { - poSrcFeat.reset(poSrcLayer->GetNextFeature()); + poSrcFeat.reset(m_poSrcLayer->GetNextFeature()); } if (poSrcFeat == nullptr) @@ -1683,7 +1644,7 @@ OGRFeature *OGRGenSQLResultsLayer::GetNextFeature() (!bEvaluateSpatialFilter || FilterGeometry(poFeature->GetGeomFieldRef(m_iGeomFieldFilter)))) { - nIteratedFeatures++; + m_nIteratedFeatures++; return poFeature.release(); } } @@ -1698,7 +1659,7 @@ OGRFeature *OGRGenSQLResultsLayer::GetNextFeature() OGRFeature *OGRGenSQLResultsLayer::GetFeature(GIntBig nFID) { - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); CreateOrderByIndex(); @@ -1707,10 +1668,10 @@ OGRFeature *OGRGenSQLResultsLayer::GetFeature(GIntBig nFID) /* -------------------------------------------------------------------- */ if (psSelectInfo->query_mode == SWQM_SUMMARY_RECORD) { - if (!PrepareSummary() || nFID != 0 || poSummaryFeature == nullptr) + if (!PrepareSummary() || nFID != 0 || !m_poSummaryFeature) return nullptr; else - return poSummaryFeature->Clone(); + return m_poSummaryFeature->Clone(); } /* -------------------------------------------------------------------- */ @@ -1735,15 +1696,15 @@ OGRFeature *OGRGenSQLResultsLayer::GetFeature(GIntBig nFID) const size_t nIdx = static_cast(nFID); if (oSummary.oVectorDistinctValues[nIdx] != SZ_OGR_NULL) { - poSummaryFeature->SetField( + m_poSummaryFeature->SetField( 0, oSummary.oVectorDistinctValues[nIdx].c_str()); } else - poSummaryFeature->SetFieldNull(0); + m_poSummaryFeature->SetFieldNull(0); } else { - if (m_oDistinctList.empty()) + if (m_aosDistinctList.empty()) { std::set::const_iterator oIter = oSummary.oSetDistinctValues.begin(); @@ -1751,10 +1712,11 @@ OGRFeature *OGRGenSQLResultsLayer::GetFeature(GIntBig nFID) oEnd = oSummary.oSetDistinctValues.end(); try { - m_oDistinctList.reserve(oSummary.oSetDistinctValues.size()); + m_aosDistinctList.reserve( + oSummary.oSetDistinctValues.size()); for (; oIter != oEnd; ++oIter) { - m_oDistinctList.push_back(*oIter); + m_aosDistinctList.push_back(*oIter); } } catch (std::bad_alloc &) @@ -1765,26 +1727,27 @@ OGRFeature *OGRGenSQLResultsLayer::GetFeature(GIntBig nFID) } if (nFID < 0 || - nFID >= static_cast(m_oDistinctList.size())) + nFID >= static_cast(m_aosDistinctList.size())) return nullptr; const size_t nIdx = static_cast(nFID); - if (m_oDistinctList[nIdx] != SZ_OGR_NULL) - poSummaryFeature->SetField(0, m_oDistinctList[nIdx].c_str()); + if (m_aosDistinctList[nIdx] != SZ_OGR_NULL) + m_poSummaryFeature->SetField(0, + m_aosDistinctList[nIdx].c_str()); else - poSummaryFeature->SetFieldNull(0); + m_poSummaryFeature->SetFieldNull(0); } - poSummaryFeature->SetFID(nFID); + m_poSummaryFeature->SetFID(nFID); - return poSummaryFeature->Clone(); + return m_poSummaryFeature->Clone(); } /* -------------------------------------------------------------------- */ /* Handle request for random record. */ /* -------------------------------------------------------------------- */ auto poSrcFeature = - std::unique_ptr(poSrcLayer->GetFeature(nFID)); + std::unique_ptr(m_poSrcLayer->GetFeature(nFID)); if (poSrcFeature == nullptr) return nullptr; @@ -1808,9 +1771,8 @@ OGRGeometry *OGRGenSQLResultsLayer::GetSpatialFilter() OGRFeatureDefn *OGRGenSQLResultsLayer::GetLayerDefn() { - swq_select *psSelectInfo = static_cast(pSelectInfo); - if (psSelectInfo->query_mode == SWQM_SUMMARY_RECORD && - poSummaryFeature == nullptr) + swq_select *psSelectInfo = m_pSelectInfo.get(); + if (psSelectInfo->query_mode == SWQM_SUMMARY_RECORD && !m_poSummaryFeature) { // Run PrepareSummary() is we have a COUNT column so as to be // able to downcast it from OFTInteger64 to OFTInteger @@ -1825,7 +1787,7 @@ OGRFeatureDefn *OGRGenSQLResultsLayer::GetLayerDefn() } } - return poDefn; + return m_poDefn; } /************************************************************************/ @@ -1833,10 +1795,9 @@ OGRFeatureDefn *OGRGenSQLResultsLayer::GetLayerDefn() /************************************************************************/ void OGRGenSQLResultsLayer::FreeIndexFields(OGRField *pasIndexFields, - size_t l_nIndexSize, - bool bFreeArray) + size_t l_nIndexSize) { - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); const int nOrderItems = psSelectInfo->order_specs; /* -------------------------------------------------------------------- */ @@ -1846,13 +1807,13 @@ void OGRGenSQLResultsLayer::FreeIndexFields(OGRField *pasIndexFields, { swq_order_def *psKeyDef = psSelectInfo->order_defs + iKey; - if (psKeyDef->field_index >= iFIDFieldIndex) + if (psKeyDef->field_index >= m_iFIDFieldIndex) { CPLAssert(psKeyDef->field_index < - iFIDFieldIndex + SPECIAL_FIELD_COUNT); + m_iFIDFieldIndex + SPECIAL_FIELD_COUNT); /* warning: only special fields of type string should be deallocated */ - if (SpecialFieldTypes[psKeyDef->field_index - iFIDFieldIndex] == + if (SpecialFieldTypes[psKeyDef->field_index - m_iFIDFieldIndex] == SWQ_STRING) { for (size_t i = 0; i < l_nIndexSize; i++) @@ -1865,7 +1826,7 @@ void OGRGenSQLResultsLayer::FreeIndexFields(OGRField *pasIndexFields, } OGRFieldDefn *poFDefn = - poSrcLayer->GetLayerDefn()->GetFieldDefn(psKeyDef->field_index); + m_poSrcLayer->GetLayerDefn()->GetFieldDefn(psKeyDef->field_index); if (poFDefn->GetType() == OFTString) { @@ -1879,9 +1840,6 @@ void OGRGenSQLResultsLayer::FreeIndexFields(OGRField *pasIndexFields, } } } - - if (bFreeArray) - VSIFree(pasIndexFields); } /************************************************************************/ @@ -1892,18 +1850,18 @@ void OGRGenSQLResultsLayer::ReadIndexFields(OGRFeature *poSrcFeat, int nOrderItems, OGRField *pasIndexFields) { - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); for (int iKey = 0; iKey < nOrderItems; iKey++) { const swq_order_def *psKeyDef = psSelectInfo->order_defs + iKey; OGRField *psDstField = pasIndexFields + iKey; - if (psKeyDef->field_index >= iFIDFieldIndex) + if (psKeyDef->field_index >= m_iFIDFieldIndex) { CPLAssert(psKeyDef->field_index < - iFIDFieldIndex + SPECIAL_FIELD_COUNT); + m_iFIDFieldIndex + SPECIAL_FIELD_COUNT); - switch (SpecialFieldTypes[psKeyDef->field_index - iFIDFieldIndex]) + switch (SpecialFieldTypes[psKeyDef->field_index - m_iFIDFieldIndex]) { case SWQ_INTEGER: case SWQ_INTEGER64: @@ -1928,7 +1886,7 @@ void OGRGenSQLResultsLayer::ReadIndexFields(OGRFeature *poSrcFeat, } OGRFieldDefn *poFDefn = - poSrcLayer->GetLayerDefn()->GetFieldDefn(psKeyDef->field_index); + m_poSrcLayer->GetLayerDefn()->GetFieldDefn(psKeyDef->field_index); OGRField *psSrcField = poSrcFeat->GetRawFieldRef(psKeyDef->field_index); @@ -1967,16 +1925,17 @@ void OGRGenSQLResultsLayer::ReadIndexFields(OGRFeature *poSrcFeat, void OGRGenSQLResultsLayer::CreateOrderByIndex() { - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); const int nOrderItems = psSelectInfo->order_specs; if (!(nOrderItems > 0 && psSelectInfo->query_mode == SWQM_RECORDSET)) return; - if (bOrderByValid) + if (m_bOrderByValid) return; - bOrderByValid = TRUE; + m_bOrderByValid = true; + m_anFIDIndex.clear(); ResetReading(); @@ -1985,43 +1944,38 @@ void OGRGenSQLResultsLayer::CreateOrderByIndex() /* -------------------------------------------------------------------- */ if (psSelectInfo->offset == 0 && psSelectInfo->limit == 1) { - OGRFeature *poSrcFeat = poSrcLayer->GetNextFeature(); - if (poSrcFeat == nullptr) - { - panFIDIndex = nullptr; - nIndexSize = 0; - return; - } - - OGRField *pasCurrentFields = - static_cast(CPLCalloc(sizeof(OGRField), nOrderItems)); - OGRField *pasBestFields = - static_cast(CPLCalloc(sizeof(OGRField), nOrderItems)); - GIntBig nBestFID = poSrcFeat->GetFID(); - ReadIndexFields(poSrcFeat, nOrderItems, pasBestFields); - delete poSrcFeat; - while ((poSrcFeat = poSrcLayer->GetNextFeature()) != nullptr) - { - ReadIndexFields(poSrcFeat, nOrderItems, pasCurrentFields); - if (Compare(pasCurrentFields, pasBestFields) < 0) + std::vector asCurrentFields(nOrderItems); + std::vector asBestFields(nOrderItems); + memset(asCurrentFields.data(), 0, sizeof(OGRField) * nOrderItems); + memset(asBestFields.data(), 0, sizeof(OGRField) * nOrderItems); + bool bFoundSrcFeature = false; + GIntBig nBestFID = 0; + for (auto &&poSrcFeat : *m_poSrcLayer) + { + ReadIndexFields(poSrcFeat.get(), nOrderItems, + asCurrentFields.data()); + if (!bFoundSrcFeature || + Compare(asCurrentFields.data(), asBestFields.data()) < 0) { + bFoundSrcFeature = true; nBestFID = poSrcFeat->GetFID(); - FreeIndexFields(pasBestFields, 1, false); - memcpy(pasBestFields, pasCurrentFields, + FreeIndexFields(asBestFields.data(), 1); + memcpy(asBestFields.data(), asCurrentFields.data(), sizeof(OGRField) * nOrderItems); } else { - FreeIndexFields(pasCurrentFields, 1, false); + FreeIndexFields(asCurrentFields.data(), 1); } - memset(pasCurrentFields, 0, sizeof(OGRField) * nOrderItems); - delete poSrcFeat; - } - VSIFree(pasCurrentFields); - FreeIndexFields(pasBestFields, 1); - panFIDIndex = static_cast(CPLMalloc(sizeof(GIntBig))); - panFIDIndex[0] = nBestFID; - nIndexSize = 1; + memset(asCurrentFields.data(), 0, sizeof(OGRField) * nOrderItems); + } + FreeIndexFields(asBestFields.data(), 1); + + if (bFoundSrcFeature) + { + m_anFIDIndex.resize(1); + m_anFIDIndex[0] = nBestFID; + } return; } @@ -2029,103 +1983,108 @@ void OGRGenSQLResultsLayer::CreateOrderByIndex() /* Allocate set of key values, and the output index. */ /* -------------------------------------------------------------------- */ size_t nFeaturesAlloc = 100; + size_t nIndexSize = 0; + std::vector asIndexFields(nOrderItems * nFeaturesAlloc); + memset(asIndexFields.data(), 0, + sizeof(OGRField) * nOrderItems * nFeaturesAlloc); + std::vector anFIDList; + + // Frees nIndexSize rows of asIndexFields + struct IndexFieldsFreer + { + OGRGenSQLResultsLayer &m_oLayer; + std::vector &m_asIndexFields; + size_t &m_nIndexSize; + + IndexFieldsFreer(OGRGenSQLResultsLayer &poLayerIn, + std::vector &asIndexFieldsIn, + size_t &nIndexSizeIn) + : m_oLayer(poLayerIn), m_asIndexFields(asIndexFieldsIn), + m_nIndexSize(nIndexSizeIn) + { + } + + ~IndexFieldsFreer() + { + m_oLayer.FreeIndexFields(m_asIndexFields.data(), m_nIndexSize); + } + + IndexFieldsFreer(const IndexFieldsFreer &) = delete; + IndexFieldsFreer &operator=(const IndexFieldsFreer &) = delete; + }; - panFIDIndex = nullptr; - OGRField *pasIndexFields = static_cast( - CPLCalloc(sizeof(OGRField), nOrderItems * nFeaturesAlloc)); - GIntBig *panFIDList = - static_cast(CPLMalloc(sizeof(GIntBig) * nFeaturesAlloc)); + IndexFieldsFreer oIndexFieldsFreer(*this, asIndexFields, nIndexSize); /* -------------------------------------------------------------------- */ /* Read in all the key values. */ /* -------------------------------------------------------------------- */ - OGRFeature *poSrcFeat = nullptr; - nIndexSize = 0; - while ((poSrcFeat = poSrcLayer->GetNextFeature()) != nullptr) + for (auto &&poSrcFeat : *m_poSrcLayer) { if (nIndexSize == nFeaturesAlloc) { - GUIntBig nNewFeaturesAlloc = - static_cast(nFeaturesAlloc) + nFeaturesAlloc / 3; + const uint64_t nNewFeaturesAlloc64 = + static_cast(nFeaturesAlloc) + nFeaturesAlloc / 3; #if SIZEOF_SIZE_T == 4 - if (static_cast(nNewFeaturesAlloc) != nNewFeaturesAlloc || + if (static_cast(nNewFeaturesAlloc64) != + nNewFeaturesAlloc64 || static_cast(sizeof(OGRField) * nOrderItems * - nNewFeaturesAlloc) != - static_cast(sizeof(OGRField)) * nOrderItems * - nNewFeaturesAlloc) + nNewFeaturesAlloc64) != + static_cast(sizeof(OGRField)) * nOrderItems * + nNewFeaturesAlloc64) { CPLError(CE_Failure, CPLE_AppDefined, "Cannot allocate pasIndexFields"); - FreeIndexFields(pasIndexFields, nIndexSize); - VSIFree(panFIDList); - nIndexSize = 0; - delete poSrcFeat; return; } #endif - OGRField *pasNewIndexFields = - static_cast(VSI_REALLOC_VERBOSE( - pasIndexFields, - sizeof(OGRField) * nOrderItems * - static_cast(nNewFeaturesAlloc))); - if (pasNewIndexFields == nullptr) + const size_t nNewFeaturesAlloc = + static_cast(nNewFeaturesAlloc64); + + try { - CPLError(CE_Failure, CPLE_AppDefined, - "Cannot allocate pasIndexFields"); - FreeIndexFields(pasIndexFields, nIndexSize); - VSIFree(panFIDList); - nIndexSize = 0; - delete poSrcFeat; - return; + asIndexFields.resize(nOrderItems * nNewFeaturesAlloc); + anFIDList.reserve(nNewFeaturesAlloc); } - pasIndexFields = pasNewIndexFields; - - GIntBig *panNewFIDList = static_cast(VSI_REALLOC_VERBOSE( - panFIDList, - sizeof(GIntBig) * static_cast(nNewFeaturesAlloc))); - if (panNewFIDList == nullptr) + catch (const std::bad_alloc &) { - FreeIndexFields(pasIndexFields, nIndexSize); - VSIFree(panFIDList); - nIndexSize = 0; - delete poSrcFeat; + CPLError(CE_Failure, CPLE_OutOfMemory, + "CreateOrderByIndex(): out of memory"); return; } - panFIDList = panNewFIDList; - memset(pasIndexFields + nFeaturesAlloc * nOrderItems, 0, + memset(asIndexFields.data() + nFeaturesAlloc * nOrderItems, 0, sizeof(OGRField) * nOrderItems * - static_cast(nNewFeaturesAlloc - nFeaturesAlloc)); + (nNewFeaturesAlloc - nFeaturesAlloc)); - nFeaturesAlloc = static_cast(nNewFeaturesAlloc); + nFeaturesAlloc = nNewFeaturesAlloc; } - ReadIndexFields(poSrcFeat, nOrderItems, - pasIndexFields + nIndexSize * nOrderItems); + ReadIndexFields(poSrcFeat.get(), nOrderItems, + asIndexFields.data() + nIndexSize * nOrderItems); - panFIDList[nIndexSize] = poSrcFeat->GetFID(); - delete poSrcFeat; + anFIDList.push_back(poSrcFeat->GetFID()); nIndexSize++; } - // CPLDebug("GenSQL", "CreateOrderByIndex() = %d features", nIndexSize); + // CPLDebug("GenSQL", "CreateOrderByIndex() = %zu features", nIndexSize); /* -------------------------------------------------------------------- */ - /* Initialize panFIDIndex */ + /* Initialize m_anFIDIndex */ /* -------------------------------------------------------------------- */ - panFIDIndex = static_cast( - VSI_MALLOC_VERBOSE(sizeof(GIntBig) * nIndexSize)); - if (panFIDIndex == nullptr) + try { - FreeIndexFields(pasIndexFields, nIndexSize); - VSIFree(panFIDList); - nIndexSize = 0; + m_anFIDIndex.reserve(nIndexSize); + } + catch (const std::bad_alloc &) + { + CPLError(CE_Failure, CPLE_OutOfMemory, + "CreateOrderByIndex(): out of memory"); return; } for (size_t i = 0; i < nIndexSize; i++) - panFIDIndex[i] = static_cast(i); + m_anFIDIndex.push_back(static_cast(i)); /* -------------------------------------------------------------------- */ /* Quick sort the records. */ @@ -2135,15 +2094,12 @@ void OGRGenSQLResultsLayer::CreateOrderByIndex() VSI_MALLOC_VERBOSE(sizeof(GIntBig) * nIndexSize)); if (panMerged == nullptr) { - FreeIndexFields(pasIndexFields, nIndexSize); - VSIFree(panFIDList); - nIndexSize = 0; - VSIFree(panFIDIndex); - panFIDIndex = nullptr; + m_anFIDIndex.clear(); return; } - SortIndexSection(pasIndexFields, panMerged, 0, nIndexSize); + // Note: this merge sort is slightly faster than std::sort() + SortIndexSection(asIndexFields.data(), panMerged, 0, nIndexSize); VSIFree(panMerged); /* -------------------------------------------------------------------- */ @@ -2152,25 +2108,19 @@ void OGRGenSQLResultsLayer::CreateOrderByIndex() bool bAlreadySorted = true; for (size_t i = 0; i < nIndexSize; i++) { - if (panFIDIndex[i] != static_cast(i)) + if (m_anFIDIndex[i] != static_cast(i)) bAlreadySorted = false; - panFIDIndex[i] = panFIDList[panFIDIndex[i]]; + m_anFIDIndex[i] = anFIDList[static_cast(m_anFIDIndex[i])]; } - CPLFree(panFIDList); - FreeIndexFields(pasIndexFields, nIndexSize); - - /* If it is already sorted, then free than panFIDIndex array */ + /* If it is already sorted, then free than m_anFIDIndex array */ /* so that GetNextFeature() can call a sequential GetNextFeature() */ /* on the source array. Very useful for layers where random access */ /* is slow. */ /* Use case: the GML result of a WFS GetFeature with a SORTBY */ if (bAlreadySorted) { - CPLFree(panFIDIndex); - panFIDIndex = nullptr; - - nIndexSize = 0; + m_anFIDIndex.clear(); } ResetReading(); @@ -2190,7 +2140,7 @@ void OGRGenSQLResultsLayer::SortIndexSection(const OGRField *pasIndexFields, if (nEntries < 2) return; - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); const int nOrderItems = psSelectInfo->order_specs; size_t nFirstGroup = nEntries / 2; @@ -2211,25 +2161,25 @@ void OGRGenSQLResultsLayer::SortIndexSection(const OGRField *pasIndexFields, nResult = -1; else nResult = Compare( - pasIndexFields + panFIDIndex[nFirstStart] * nOrderItems, - pasIndexFields + panFIDIndex[nSecondStart] * nOrderItems); + pasIndexFields + m_anFIDIndex[nFirstStart] * nOrderItems, + pasIndexFields + m_anFIDIndex[nSecondStart] * nOrderItems); if (nResult > 0) { - panMerged[iMerge] = panFIDIndex[nSecondStart]; + panMerged[iMerge] = m_anFIDIndex[nSecondStart]; nSecondStart++; nSecondGroup--; } else { - panMerged[iMerge] = panFIDIndex[nFirstStart]; + panMerged[iMerge] = m_anFIDIndex[nFirstStart]; nFirstStart++; nFirstGroup--; } } /* Copy the merge list back into the main index */ - memcpy(panFIDIndex + nStart, panMerged, sizeof(GIntBig) * nEntries); + memcpy(m_anFIDIndex.data() + nStart, panMerged, sizeof(GIntBig) * nEntries); } /************************************************************************/ @@ -2253,7 +2203,7 @@ int OGRGenSQLResultsLayer::Compare(const OGRField *pasFirstTuple, const OGRField *pasSecondTuple) { - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); int nResult = 0, iKey; for (iKey = 0; nResult == 0 && iKey < psSelectInfo->order_specs; iKey++) @@ -2261,11 +2211,11 @@ int OGRGenSQLResultsLayer::Compare(const OGRField *pasFirstTuple, swq_order_def *psKeyDef = psSelectInfo->order_defs + iKey; OGRFieldDefn *poFDefn = nullptr; - if (psKeyDef->field_index >= iFIDFieldIndex) + if (psKeyDef->field_index >= m_iFIDFieldIndex) poFDefn = nullptr; else - poFDefn = - poSrcLayer->GetLayerDefn()->GetFieldDefn(psKeyDef->field_index); + poFDefn = m_poSrcLayer->GetLayerDefn()->GetFieldDefn( + psKeyDef->field_index); if (OGR_RawField_IsUnset(&pasFirstTuple[iKey]) || OGR_RawField_IsNull(&pasFirstTuple[iKey])) @@ -2284,8 +2234,8 @@ int OGRGenSQLResultsLayer::Compare(const OGRField *pasFirstTuple, else if (poFDefn == nullptr) { CPLAssert(psKeyDef->field_index < - iFIDFieldIndex + SPECIAL_FIELD_COUNT); - switch (SpecialFieldTypes[psKeyDef->field_index - iFIDFieldIndex]) + m_iFIDFieldIndex + SPECIAL_FIELD_COUNT); + switch (SpecialFieldTypes[psKeyDef->field_index - m_iFIDFieldIndex]) { case SWQ_INTEGER: // Yes, read Integer in Integer64. @@ -2352,7 +2302,7 @@ void OGRGenSQLResultsLayer::AddFieldDefnToSet(int iTable, int iColumn, { if (iTable != -1 && iColumn != -1) { - OGRLayer *poLayer = papoTableLayers[iTable]; + OGRLayer *poLayer = m_apoTableLayers[iTable]; if (iColumn < poLayer->GetLayerDefn()->GetFieldCount()) { OGRFieldDefn *poFDefn = @@ -2386,7 +2336,7 @@ void OGRGenSQLResultsLayer::ExploreExprForIgnoredFields(swq_expr_node *expr, void OGRGenSQLResultsLayer::FindAndSetIgnoredFields() { - swq_select *psSelectInfo = static_cast(pSelectInfo); + swq_select *psSelectInfo = m_pSelectInfo.get(); CPLHashSet *hSet = CPLHashSetNew(CPLHashSetHashPointer, CPLHashSetEqualPointer, nullptr); @@ -2423,7 +2373,7 @@ void OGRGenSQLResultsLayer::FindAndSetIgnoredFields() /* -------------------------------------------------------------------- */ for (int iTable = 0; iTable < psSelectInfo->table_count; iTable++) { - OGRLayer *poLayer = papoTableLayers[iTable]; + OGRLayer *poLayer = m_apoTableLayers[iTable]; OGRFeatureDefn *poSrcFDefn = poLayer->GetLayerDefn(); char **papszIgnoredFields = nullptr; for (int iSrcField = 0; iSrcField < poSrcFDefn->GetFieldCount(); @@ -2453,11 +2403,8 @@ void OGRGenSQLResultsLayer::FindAndSetIgnoredFields() void OGRGenSQLResultsLayer::InvalidateOrderByIndex() { - CPLFree(panFIDIndex); - panFIDIndex = nullptr; - - nIndexSize = 0; - bOrderByValid = FALSE; + m_anFIDIndex.clear(); + m_bOrderByValid = false; } /************************************************************************/ diff --git a/ogr/ogrsf_frmts/generic/ogr_gensql.h b/ogr/ogrsf_frmts/generic/ogr_gensql.h index 6de58f0fdd3d..dcbd87e7a101 100644 --- a/ogr/ogrsf_frmts/generic/ogr_gensql.h +++ b/ogr/ogrsf_frmts/generic/ogr_gensql.h @@ -55,39 +55,42 @@ /* OGRGenSQLResultsLayer */ /************************************************************************/ +class swq_select; + class OGRGenSQLResultsLayer final : public OGRLayer { private: - GDALDataset *poSrcDS; - OGRLayer *poSrcLayer; - void *pSelectInfo; + GDALDataset *m_poSrcDS = nullptr; + OGRLayer *m_poSrcLayer = nullptr; + std::unique_ptr m_pSelectInfo{}; std::string m_osInitialWHERE{}; bool m_bForwardWhereToSourceLayer = true; bool m_bEOF = false; - OGRLayer **papoTableLayers; + // Array of source layers (owned by m_poSrcDS or m_apoExtraDS) + std::vector m_apoTableLayers{}; - OGRFeatureDefn *poDefn; + // Array of extra datasets when referencing a table/layer by a dataset name + std::vector> + m_apoExtraDS{}; - int *panGeomFieldToSrcGeomField; + OGRFeatureDefn *m_poDefn = nullptr; - size_t nIndexSize; - GIntBig *panFIDIndex; - int bOrderByValid; + std::vector m_anGeomFieldToSrcGeomField{}; - GIntBig nNextIndexFID; - OGRFeature *poSummaryFeature; + std::vector m_anFIDIndex{}; + bool m_bOrderByValid = false; - int iFIDFieldIndex; + GIntBig m_nNextIndexFID = 0; + std::unique_ptr m_poSummaryFeature{}; - int nExtraDSCount; - GDALDataset **papoExtraDS; + int m_iFIDFieldIndex = 0; - GIntBig nIteratedFeatures; - std::vector m_oDistinctList; + GIntBig m_nIteratedFeatures = -1; + std::vector m_aosDistinctList{}; - int PrepareSummary(); + bool PrepareSummary(); OGRFeature *TranslateFeature(OGRFeature *); void CreateOrderByIndex(); @@ -95,8 +98,7 @@ class OGRGenSQLResultsLayer final : public OGRLayer OGRField *pasIndexFields); void SortIndexSection(const OGRField *pasIndexFields, GIntBig *panMerged, size_t nStart, size_t nEntries); - void FreeIndexFields(OGRField *pasIndexFields, size_t l_nIndexSize, - bool bFreeArray = true); + void FreeIndexFields(OGRField *pasIndexFields, size_t l_nIndexSize); int Compare(const OGRField *pasFirst, const OGRField *pasSecond); void ClearFilters(); @@ -115,8 +117,9 @@ class OGRGenSQLResultsLayer final : public OGRLayer CPL_DISALLOW_COPY_ASSIGN(OGRGenSQLResultsLayer) public: - OGRGenSQLResultsLayer(GDALDataset *poSrcDS, void *pSelectInfo, - OGRGeometry *poSpatFilter, const char *pszWHERE, + OGRGenSQLResultsLayer(GDALDataset *poSrcDS, + std::unique_ptr &&pSelectInfo, + const OGRGeometry *poSpatFilter, const char *pszWHERE, const char *pszDialect); virtual ~OGRGenSQLResultsLayer(); From 1b098598ecc440c82466e415b42d537986404192 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 23 May 2024 00:14:36 +0200 Subject: [PATCH 039/191] LIBKML: fix handling of styleUrl element referencing to an external document Fixes #9975 --- .../data/kml/point_with_external_style.kml | 13 ++ .../style.kml | 14 +++ autotest/ogr/ogr_libkml.py | 30 +++++ ogr/ogrsf_frmts/libkml/ogr_libkml.h | 6 +- .../libkml/ogrlibkmldatasource.cpp | 16 +-- .../libkml/ogrlibkmlfeaturestyle.cpp | 115 +++++++++++------- 6 files changed, 136 insertions(+), 58 deletions(-) create mode 100644 autotest/ogr/data/kml/point_with_external_style.kml create mode 100644 autotest/ogr/data/kml/style_of_point_with_external_style/style.kml diff --git a/autotest/ogr/data/kml/point_with_external_style.kml b/autotest/ogr/data/kml/point_with_external_style.kml new file mode 100644 index 000000000000..aee7354616d2 --- /dev/null +++ b/autotest/ogr/data/kml/point_with_external_style.kml @@ -0,0 +1,13 @@ + + + + + point + + my point + style_of_point_with_external_style/style.kml#myStyle + 2,49,0 + + + + diff --git a/autotest/ogr/data/kml/style_of_point_with_external_style/style.kml b/autotest/ogr/data/kml/style_of_point_with_external_style/style.kml new file mode 100644 index 000000000000..8131fdee5e29 --- /dev/null +++ b/autotest/ogr/data/kml/style_of_point_with_external_style/style.kml @@ -0,0 +1,14 @@ + + + + + diff --git a/autotest/ogr/ogr_libkml.py b/autotest/ogr/ogr_libkml.py index 5576ee4fa71a..28608e370283 100755 --- a/autotest/ogr/ogr_libkml.py +++ b/autotest/ogr/ogr_libkml.py @@ -1351,6 +1351,8 @@ def test_ogr_libkml_read_write_style(tmp_vsimem): data = data.decode("ascii") gdal.VSIFCloseL(f) + assert "#unknown_style" in data + expected_style = """