From 48e4a28711dcc1559a0ed5fe0bdc3086431614ac Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 20 Nov 2024 13:07:20 +0100 Subject: [PATCH 01/21] /vsicurl/: fix to allow to read Parquet partitionned datasets from public Azure container using /vsicurl/ Fixes #11309 --- autotest/ogr/ogr_parquet.py | 21 +++++++ port/cpl_vsil_curl.cpp | 112 ++++++++++++++++++++++-------------- port/cpl_vsil_curl_class.h | 1 + 3 files changed, 91 insertions(+), 43 deletions(-) diff --git a/autotest/ogr/ogr_parquet.py b/autotest/ogr/ogr_parquet.py index 912ca56c34e3..add20d7952d0 100755 --- a/autotest/ogr/ogr_parquet.py +++ b/autotest/ogr/ogr_parquet.py @@ -3355,6 +3355,27 @@ def test_ogr_parquet_bbox_float32_but_no_covering_in_metadata(use_dataset): ############################################################################### +@gdaltest.enable_exceptions() +@pytest.mark.require_curl +def test_ogr_parquet_overture_from_azure(): + + if not _has_arrow_dataset(): + pytest.skip("Test requires build with ArrowDataset") + + url = "https://overturemapswestus2.blob.core.windows.net/release?comp=list&delimiter=%2F&prefix=2024-11-13.0%2Ftheme%3Ddivisions%2Ftype%3Ddivision_area%2F&restype=container" + if gdaltest.gdalurlopen(url, timeout=5) is None: + pytest.skip(reason=f"{url} is down") + + with ogr.Open( + "PARQUET:/vsicurl/https://overturemapswestus2.blob.core.windows.net/release/2024-11-13.0/theme=divisions/type=division_area" + ) as ds: + lyr = ds.GetLayer(0) + assert lyr.GetFeatureCount() > 0 + + +############################################################################### + + @gdaltest.enable_exceptions() def test_ogr_parquet_write_arrow(tmp_vsimem): diff --git a/port/cpl_vsil_curl.cpp b/port/cpl_vsil_curl.cpp index 6899cf6af6bd..afdc67bb8982 100644 --- a/port/cpl_vsil_curl.cpp +++ b/port/cpl_vsil_curl.cpp @@ -1348,49 +1348,6 @@ vsi_l_offset VSICurlHandle::GetFileSizeOrHeaders(bool bSetError, if (sWriteFuncHeaderData.pBuffer != nullptr && (response_code == 200 || response_code == 206)) { - const char *pzETag = - strstr(sWriteFuncHeaderData.pBuffer, "ETag: \""); - if (pzETag) - { - pzETag += strlen("ETag: \""); - const char *pszEndOfETag = strchr(pzETag, '"'); - if (pszEndOfETag) - { - oFileProp.ETag.assign(pzETag, pszEndOfETag - pzETag); - } - } - - // Azure Data Lake Storage - const char *pszPermissions = - strstr(sWriteFuncHeaderData.pBuffer, "x-ms-permissions: "); - if (pszPermissions) - { - pszPermissions += strlen("x-ms-permissions: "); - const char *pszEOL = strstr(pszPermissions, "\r\n"); - if (pszEOL) - { - bool bIsDir = - strstr(sWriteFuncHeaderData.pBuffer, - "x-ms-resource-type: directory\r\n") != nullptr; - bool bIsFile = - strstr(sWriteFuncHeaderData.pBuffer, - "x-ms-resource-type: file\r\n") != nullptr; - if (bIsDir || bIsFile) - { - oFileProp.bIsDirectory = bIsDir; - std::string osPermissions; - osPermissions.assign(pszPermissions, - pszEOL - pszPermissions); - if (bIsDir) - oFileProp.nMode = S_IFDIR; - else - oFileProp.nMode = S_IFREG; - oFileProp.nMode |= - VSICurlParseUnixPermissions(osPermissions.c_str()); - } - } - } - { char **papszHeaders = CSLTokenizeString2(sWriteFuncHeaderData.pBuffer, "\r\n", 0); @@ -1412,6 +1369,44 @@ vsi_l_offset VSICurlHandle::GetFileSizeOrHeaders(bool bSetError, { m_bCached = false; } + + else if (EQUAL(pszKey, "ETag")) + { + std::string osValue(pszValue); + if (osValue.size() >= 2 && osValue.front() == '"' && + osValue.back() == '"') + osValue = osValue.substr(1, osValue.size() - 2); + oFileProp.ETag = osValue; + } + + // Azure Data Lake Storage + else if (EQUAL(pszKey, "x-ms-resource-type")) + { + if (EQUAL(pszValue, "file")) + { + oFileProp.nMode |= S_IFREG; + } + else if (EQUAL(pszValue, "directory")) + { + oFileProp.bIsDirectory = true; + oFileProp.nMode |= S_IFDIR; + } + } + else if (EQUAL(pszKey, "x-ms-permissions")) + { + oFileProp.nMode |= + VSICurlParseUnixPermissions(pszValue); + } + + // https://overturemapswestus2.blob.core.windows.net/release/2024-11-13.0/theme%3Ddivisions/type%3Ddivision_area + // returns a x-ms-meta-hdi_isfolder: true header + else if (EQUAL(pszKey, "x-ms-meta-hdi_isfolder") && + EQUAL(pszValue, "true")) + { + oFileProp.bIsAzureFolder = true; + oFileProp.bIsDirectory = true; + oFileProp.nMode |= S_IFDIR; + } } CPLFree(pszKey); } @@ -4893,6 +4888,37 @@ char **VSICurlFilesystemHandlerBase::GetFileList(const char *pszDirname, if (!bListDir) return nullptr; + // Deal with publicly visible Azure directories. + if (STARTS_WITH(osURL.c_str(), "https://")) + { + const char *pszBlobCore = + strstr(osURL.c_str(), ".blob.core.windows.net/"); + if (pszBlobCore) + { + FileProp cachedFileProp; + GetCachedFileProp(osURL.c_str(), cachedFileProp); + if (cachedFileProp.bIsAzureFolder) + { + const char *pszURLWithoutHTTPS = + osURL.c_str() + strlen("https://"); + const std::string osStorageAccount( + pszURLWithoutHTTPS, pszBlobCore - pszURLWithoutHTTPS); + CPLConfigOptionSetter oSetter1("AZURE_NO_SIGN_REQUEST", "YES", + false); + CPLConfigOptionSetter oSetter2("AZURE_STORAGE_ACCOUNT", + osStorageAccount.c_str(), false); + const std::string osVSIAZ(std::string("/vsiaz/").append( + pszBlobCore + strlen(".blob.core.windows.net/"))); + char **papszFileList = VSIReadDirEx(osVSIAZ.c_str(), nMaxFiles); + if (papszFileList) + { + *pbGotFileList = true; + return papszFileList; + } + } + } + } + // HACK (optimization in fact) for MBTiles driver. if (strstr(pszDirname, ".tiles.mapbox.com") != nullptr) return nullptr; diff --git a/port/cpl_vsil_curl_class.h b/port/cpl_vsil_curl_class.h index 4fb1e4ff9bce..6b47050dd638 100644 --- a/port/cpl_vsil_curl_class.h +++ b/port/cpl_vsil_curl_class.h @@ -80,6 +80,7 @@ class FileProp std::string osRedirectURL{}; bool bHasComputedFileSize = false; bool bIsDirectory = false; + bool bIsAzureFolder = false; int nMode = 0; // st_mode member of struct stat bool bS3LikeRedirect = false; std::string ETag{}; From 78518bef378b816376b1e4f95b25d5d2c4964553 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 20 Nov 2024 14:09:38 +0100 Subject: [PATCH 02/21] CMake: Export -DGDAL_DEBUG as PUBLIC for debug builds Fixes #11311 --- autotest/postinstall/test_cpp/test_cpp.cpp | 9 +++++++++ gdal.cmake | 4 ++++ ogr/ogr_api.h | 10 +++++----- ogr/ogr_feature.h | 2 +- ogr/ogr_geometry.h | 2 +- ogr/ogr_srs_api.h | 2 +- ogr/ograpispy.h | 2 +- port/cpl_error.h | 2 +- 8 files changed, 23 insertions(+), 10 deletions(-) diff --git a/autotest/postinstall/test_cpp/test_cpp.cpp b/autotest/postinstall/test_cpp/test_cpp.cpp index b928a03b9205..173daa14f6f2 100644 --- a/autotest/postinstall/test_cpp/test_cpp.cpp +++ b/autotest/postinstall/test_cpp/test_cpp.cpp @@ -9,6 +9,7 @@ #endif #include #include +#include int main(int argc, char **argv) { @@ -16,5 +17,13 @@ int main(int argc, char **argv) OGRGeometryFactory::createFromWkt("POINT(1 2)", nullptr, &poGeom); OGRGeometryFactory::destroyGeometry(poGeom); std::cout << GDALVersionInfo("RELEASE_NAME") << std::endl; + + // Check fix for https://github.com/OSGeo/gdal/issues/11311 + OGRSpatialReference oSRS; + int nEntries = 0; + int *panMatchCondidence = nullptr; + oSRS.FindMatches(nullptr, &nEntries, &panMatchCondidence); + CPLFree(panMatchCondidence); + return (0); } diff --git a/gdal.cmake b/gdal.cmake index 1799ea709019..80c7ddf7ce68 100644 --- a/gdal.cmake +++ b/gdal.cmake @@ -196,6 +196,10 @@ if (MINGW AND BUILD_SHARED_LIBS) set_target_properties(${GDAL_LIB_TARGET_NAME} PROPERTIES SUFFIX "-${GDAL_SOVERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}") endif () +# Some of the types in our public headers are dependent on whether GDAL_DEBUG +# is defined or not +target_compile_definitions(${GDAL_LIB_TARGET_NAME} PUBLIC $<$:GDAL_DEBUG>) + # Install properties if (GDAL_ENABLE_MACOSX_FRAMEWORK) set(FRAMEWORK_VERSION ${GDAL_VERSION_MAJOR}.${GDAL_VERSION_MINOR}) diff --git a/ogr/ogr_api.h b/ogr/ogr_api.h index d17fd9028d35..8a3e674f35dd 100644 --- a/ogr/ogr_api.h +++ b/ogr/ogr_api.h @@ -43,7 +43,7 @@ bool CPL_DLL OGRGetGEOSVersion(int *pnMajor, int *pnMinor, int *pnPatch); /*! @cond Doxygen_Suppress */ #define DEFINEH_OGRGeometryH /*! @endcond */ -#ifdef DEBUG +#if defined(DEBUG) || defined(GDAL_DEBUG) typedef struct OGRGeometryHS *OGRGeometryH; #else /** Opaque type for a geometry */ @@ -57,7 +57,7 @@ typedef void *OGRGeometryH; /*! @endcond */ #ifndef DOXYGEN_XML -#ifdef DEBUG +#if defined(DEBUG) || defined(GDAL_DEBUG) typedef struct OGRSpatialReferenceHS *OGRSpatialReferenceH; typedef struct OGRCoordinateTransformationHS *OGRCoordinateTransformationH; #else @@ -396,7 +396,7 @@ int CPL_DLL OGRPreparedGeometryContains(OGRPreparedGeometryH hPreparedGeom, /*! @cond Doxygen_Suppress */ #define DEFINE_OGRFeatureH /*! @endcond */ -#ifdef DEBUG +#if defined(DEBUG) || defined(GDAL_DEBUG) typedef struct OGRFieldDefnHS *OGRFieldDefnH; typedef struct OGRFeatureDefnHS *OGRFeatureDefnH; typedef struct OGRFeatureHS *OGRFeatureH; @@ -667,7 +667,7 @@ const char CPL_DLL *OGR_GlobFldDomain_GetGlob(OGRFieldDomainH); /* ogrsf_frmts.h */ /* -------------------------------------------------------------------- */ -#ifdef DEBUG +#if defined(DEBUG) || defined(GDAL_DEBUG) typedef struct OGRLayerHS *OGRLayerH; typedef struct OGRDataSourceHS *OGRDataSourceH; typedef struct OGRDriverHS *OGRSFDriverH; @@ -949,7 +949,7 @@ void CPL_DLL OGRCleanupAll(void); /* ogrsf_featurestyle.h */ /* -------------------------------------------------------------------- */ -#ifdef DEBUG +#if defined(DEBUG) || defined(GDAL_DEBUG) typedef struct OGRStyleMgrHS *OGRStyleMgrH; typedef struct OGRStyleToolHS *OGRStyleToolH; #else diff --git a/ogr/ogr_feature.h b/ogr/ogr_feature.h index d710dfd54a93..3ffd6b0201cb 100644 --- a/ogr/ogr_feature.h +++ b/ogr/ogr_feature.h @@ -37,7 +37,7 @@ /*! @cond Doxygen_Suppress */ #define DEFINE_OGRFeatureH /*! @endcond */ -#ifdef DEBUG +#if defined(DEBUG) || defined(GDAL_DEBUG) typedef struct OGRFieldDefnHS *OGRFieldDefnH; typedef struct OGRFeatureDefnHS *OGRFeatureDefnH; typedef struct OGRFeatureHS *OGRFeatureH; diff --git a/ogr/ogr_geometry.h b/ogr/ogr_geometry.h index 2e07be78d05e..4d354cce8f02 100644 --- a/ogr/ogr_geometry.h +++ b/ogr/ogr_geometry.h @@ -36,7 +36,7 @@ /*! @cond Doxygen_Suppress */ #ifndef DEFINEH_OGRGeometryH #define DEFINEH_OGRGeometryH -#ifdef DEBUG +#if defined(DEBUG) || defined(GDAL_DEBUG) typedef struct OGRGeometryHS *OGRGeometryH; #else typedef void *OGRGeometryH; diff --git a/ogr/ogr_srs_api.h b/ogr/ogr_srs_api.h index e9eb02ea90ca..2116821b80bc 100644 --- a/ogr/ogr_srs_api.h +++ b/ogr/ogr_srs_api.h @@ -426,7 +426,7 @@ const char CPL_DLL *OSRAxisEnumToName(OGRAxisOrientation eOrientation); #define DEFINED_OGRSpatialReferenceH /*! @endcond */ -#ifdef DEBUG +#if defined(DEBUG) || defined(GDAL_DEBUG) typedef struct OGRSpatialReferenceHS *OGRSpatialReferenceH; typedef struct OGRCoordinateTransformationHS *OGRCoordinateTransformationH; #else diff --git a/ogr/ograpispy.h b/ogr/ograpispy.h index 70d98b5ea16f..f04d3758657b 100644 --- a/ogr/ograpispy.h +++ b/ogr/ograpispy.h @@ -45,7 +45,7 @@ * @since GDAL 2.0 */ -#ifdef DEBUG +#if defined(DEBUG) || defined(GDAL_DEBUG) #define OGRAPISPY_ENABLED #endif diff --git a/port/cpl_error.h b/port/cpl_error.h index d15d875e619c..c4f1cde43626 100644 --- a/port/cpl_error.h +++ b/port/cpl_error.h @@ -179,7 +179,7 @@ void CPL_DLL CPLDebugProgress(const char *, CPL_FORMAT_STRING(const char *), ...) CPL_PRINT_FUNC_FORMAT(2, 3); #endif -#ifdef DEBUG +#if defined(DEBUG) || defined(GDAL_DEBUG) /** Same as CPLDebug(), but expands to nothing for non-DEBUG builds. * @since GDAL 3.1 */ From 409982bb95164fbc85c17d152bd95033e197fe6a Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 20 Nov 2024 22:12:42 +0100 Subject: [PATCH 03/21] GTI: make the driver work with STAC GeoParquet files that don't have a assets.image.href field Fixes #11317 --- frmts/gti/gdaltileindexdataset.cpp | 87 +++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 13 deletions(-) diff --git a/frmts/gti/gdaltileindexdataset.cpp b/frmts/gti/gdaltileindexdataset.cpp index 52b7618537ac..d19a25dade78 100644 --- a/frmts/gti/gdaltileindexdataset.cpp +++ b/frmts/gti/gdaltileindexdataset.cpp @@ -892,36 +892,97 @@ bool GDALTileIndexDataset::Open(GDALOpenInfo *poOpenInfo) const OGRFeatureDefn *poLayerDefn = m_poLayer->GetLayerDefn(); - // Is this a https://stac-utils.github.io/stac-geoparquet/latest/spec/stac-geoparquet-spec ? - const bool bIsStacGeoParquet = - poLayerDefn->GetFieldIndex("assets.image.href") >= 0; - - const char *pszLocationFieldName = GetOption(MD_LOCATION_FIELD); - if (!pszLocationFieldName) + std::string osLocationFieldName; { - if (bIsStacGeoParquet) + const char *pszLocationFieldName = GetOption(MD_LOCATION_FIELD); + if (pszLocationFieldName) { - pszLocationFieldName = "assets.image.href"; + osLocationFieldName = pszLocationFieldName; } else { - constexpr const char *DEFAULT_LOCATION_FIELD_NAME = "location"; - pszLocationFieldName = DEFAULT_LOCATION_FIELD_NAME; + // Is this a https://stac-utils.github.io/stac-geoparquet/latest/spec/stac-geoparquet-spec ? + if (poLayerDefn->GetFieldIndex("assets.data.href") >= 0) + { + osLocationFieldName = "assets.data.href"; + CPLDebug("GTI", "Using %s as location field", + osLocationFieldName.c_str()); + } + else if (poLayerDefn->GetFieldIndex("assets.image.href") >= 0) + { + osLocationFieldName = "assets.image.href"; + CPLDebug("GTI", "Using %s as location field", + osLocationFieldName.c_str()); + } + else if (poLayerDefn->GetFieldIndex("stac_version") >= 0) + { + const int nFieldCount = poLayerDefn->GetFieldCount(); + // Look for "assets.xxxxx.href" fields + int nAssetCount = 0; + for (int i = 0; i < nFieldCount; ++i) + { + const auto poFDefn = poLayerDefn->GetFieldDefn(i); + const char *pszFieldName = poFDefn->GetNameRef(); + if (STARTS_WITH(pszFieldName, "assets.") && + EQUAL(pszFieldName + strlen(pszFieldName) - + strlen(".href"), + ".href") && + // Assets with "metadata" in them are very much likely + // not rasters... We could potentially confirm that by + // inspecting the value of the assets.XXX.type or + // assets.XXX.roles fields of one feature + !strstr(pszFieldName, "metadata")) + { + ++nAssetCount; + if (!osLocationFieldName.empty()) + { + osLocationFieldName += ", "; + } + osLocationFieldName += pszFieldName; + } + } + if (nAssetCount > 1) + { + CPLError(CE_Failure, CPLE_AppDefined, + "Several potential STAC assets. Please select one " + "among %s with the LOCATION_FIELD open option", + osLocationFieldName.c_str()); + return false; + } + else if (nAssetCount == 0) + { + CPLError(CE_Failure, CPLE_AppDefined, + "File has stac_version property but lacks assets"); + return false; + } + } + else + { + constexpr const char *DEFAULT_LOCATION_FIELD_NAME = "location"; + osLocationFieldName = DEFAULT_LOCATION_FIELD_NAME; + } } } - m_nLocationFieldIndex = poLayerDefn->GetFieldIndex(pszLocationFieldName); + const bool bIsStacGeoParquet = + STARTS_WITH(osLocationFieldName.c_str(), "assets.") && + EQUAL(osLocationFieldName.c_str() + osLocationFieldName.size() - + strlen(".href"), + ".href"); + + m_nLocationFieldIndex = + poLayerDefn->GetFieldIndex(osLocationFieldName.c_str()); if (m_nLocationFieldIndex < 0) { CPLError(CE_Failure, CPLE_AppDefined, "Cannot find field %s", - pszLocationFieldName); + osLocationFieldName.c_str()); return false; } if (poLayerDefn->GetFieldDefn(m_nLocationFieldIndex)->GetType() != OFTString) { CPLError(CE_Failure, CPLE_AppDefined, "Field %s is not of type string", - pszLocationFieldName); + osLocationFieldName.c_str()); return false; } From f8532eebd5485fc7232744d921885da99c9d1874 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 21 Nov 2024 01:16:59 +0100 Subject: [PATCH 04/21] GTI: advertize SRS open option --- doc/source/drivers/raster/gti.rst | 8 +++++++- frmts/gti/gdaltileindexdataset.cpp | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/source/drivers/raster/gti.rst b/doc/source/drivers/raster/gti.rst index f44f0b359b12..f169e2f82298 100644 --- a/doc/source/drivers/raster/gti.rst +++ b/doc/source/drivers/raster/gti.rst @@ -42,7 +42,7 @@ driver with the following main differences: * Contrary to the VRT driver, the GTI driver does not enable to alter characteristics of referenced tiles, such as their georeferencing, nodata value, etc. If such behavior is desired, the tiles must be for example wrapped - individually in a VRT file (or `vrt://` connection string) before being referenced + individually in a VRT file (or `vrt://` connection string) before being referenced in the GTI index. Connection strings @@ -444,6 +444,12 @@ also defined as layer metadata items or in the .gti XML file Resolution along Y axis in SRS units / pixel. +- .. oo:: SRS + :choices: + + Override/sets the Spatial Reference System in one of the formats supported + by :cpp:func:`OGRSpatialReference::SetFromUserInput`. + - .. oo:: MINX :choices: diff --git a/frmts/gti/gdaltileindexdataset.cpp b/frmts/gti/gdaltileindexdataset.cpp index d19a25dade78..ba326ed8b8ec 100644 --- a/frmts/gti/gdaltileindexdataset.cpp +++ b/frmts/gti/gdaltileindexdataset.cpp @@ -4693,6 +4693,7 @@ void GDALRegister_GTI() "