From 3ed3c6e8475eaad545838453366b91a97f5f90d8 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Thu, 25 Apr 2024 23:54:16 +0200 Subject: [PATCH] Propose version 2 of this extension. Fixes #16, #21, #22, #23 --- .github/workflows/publish.yaml | 6 +- .github/workflows/test.yaml | 4 +- CHANGELOG.md | 21 +++- README.md | 70 +++++++++---- examples/catalog-link.json | 33 ++++++ examples/item-naip.json | 120 +++++++++++++++------- examples/item-nsl.json | 78 +++++++------- json-schema/schema.json | 181 ++++++++++++++++++++++++--------- package.json | 7 +- 9 files changed, 368 insertions(+), 152 deletions(-) create mode 100644 examples/catalog-link.json diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 9cba42f..e000fc3 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -7,10 +7,10 @@ jobs: runs-on: ubuntu-latest steps: - name: Inject env variables - uses: rlespinasse/github-slug-action@v3.x - - uses: actions/checkout@v2 + uses: rlespinasse/github-slug-action@v4 + - uses: actions/checkout@v4 - name: deploy JSON Schema for version ${{ env.GITHUB_REF_SLUG }} - uses: peaceiris/actions-gh-pages@v3 + uses: peaceiris/actions-gh-pages@v4 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: json-schema diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index dbe73cf..45fd78f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -4,10 +4,10 @@ jobs: deploy: runs-on: ubuntu-latest steps: - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v4 with: node-version: 'lts/*' - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - run: | npm install npm test diff --git a/CHANGELOG.md b/CHANGELOG.md index fdf8250..d0bd782 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,9 +16,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +## [v2.0.0] - 2021-06-23 + +### Added + +- `storage:schemes`, `storage:refs` and Storage Scheme Object +- Support the storage extension in Links +- Support for the Alternate Assets Extension +- Support for other storage providers, including custom S3 hosts + +### Changed + +- The storage providers are grouped in `storage:schemes` and located in the Item Properties, Collections or Catalog metadata +- Assets and Links reference the storage schemes by key in `storage:refs` + +### Removed + +- `storage:platform`, `storage:region`, `storage:requester_pays` and `storage:tier` + ## [v1.0.0] - 2021-06-23 Initial release -[Unreleased]: +[Unreleased]: +[v2.0.0]: [v1.0.0]: diff --git a/README.md b/README.md index 4d6d45c..570053b 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,20 @@ # Storage Extension Specification - **Title:** Storage -- **Identifier:** +- **Identifier:** - **Field Name Prefix:** storage -- **Scope:** Item, Collection +- **Scope:** Item, Catalog, Collection - **Extension [Maturity Classification](https://github.com/radiantearth/stac-spec/tree/master/extensions/README.md#extension-maturity):** Pilot - **Owner**: @davidraleigh @matthewhanson This document explains the Storage Extension to the [SpatioTemporal Asset Catalog](https://github.com/radiantearth/stac-spec) (STAC) specification. -It allows adding details related to cloud storage access and costs to be associated with STAC Assets. +It allows adding details related to cloud object storage access and costs to be associated with STAC Assets. This extension does not cover NFS solutions provided by PaaS cloud companies. - Examples: - - [Item example 1](examples/item-naip.json): Shows the basic usage of the extension in a STAC Item. - - [Item example 2](examples/item-nsl.json): Another example of basic usage. + - [NAIP Item](examples/item-naip.json): Shows the usage of the extension in combination with the alternate asset extension. + - [NSL Item](examples/item-nsl.json): Shows a mixture of storage providers, including custom S3 hosts. + - [Catalog with Link](examples/catalog-link.json): Shows the usage of the extension on a link in a STAC Catalog. - [JSON Schema](json-schema/schema.json) - [Changelog](./CHANGELOG.md) @@ -21,26 +22,47 @@ This extension does not cover NFS solutions provided by PaaS cloud companies. The fields in the table below can be used in these parts of STAC documents: +- [x] Catalogs +- [x] Collections +- [x] Item Properties (incl. Summaries in Collections) +- [ ] Assets (for both Collections and Items, incl. Item Asset Definitions in Collections) +- [ ] Links + +| Field Name | Type | Description | +| ----------------- | ------------------------------------------------------------ | ----------- | +| `storage:schemes` | Map | **REQUIRED.** A property that contains all of the storage schemes used by Assets and Links in the STAC Item, Catalog or Collection. | + +--- + +The fields in the table below can be used in these parts of STAC documents: + - [ ] Catalogs - [ ] Collections -- [x] Item Properties (incl. Summaries in Collections) +- [ ] Item Properties (incl. Summaries in Collections) - [x] Assets (for both Collections and Items, incl. Item Asset Definitions in Collections) -- [ ] Links +- [x] Links +- [x] [Alternate Assets Object](https://github.com/stac-extensions/alternate-assets?tab=readme-ov-file#alternate-asset-object) + +| Field Name | Type | Description | +| -------------- | ---------- | ----------- | +| `storage:refs` | \[string\] | A property that specifies which schemes in `storage:schemes` may be used to access an Asset or Link. Each value must be one of the keys defined in `storage:schemes`. | + +### Storage Scheme Object -| Field Name | Type | Description | -| ---------------------- | --------- | ----------- | -| storage:platform | string | The [cloud provider](#providers) where data is stored | -| storage:region | string | The region where the data is stored. Relevant to speed of access and inter region egress costs (as defined by PaaS provider) | -| storage:requester_pays | boolean | Is the data requester pays or is it data manager/cloud provider pays. *Defaults to false* | -| storage:tier | string | The title for the tier type (as defined by PaaS provider) | +| Field Name | Type | Description | +| -------------- | ------- | ----------- | +| platform | string | **REQUIRED.** The [cloud provider](#platforms) where data is stored. | +| region | string | The region where the data is stored. Relevant to speed of access and inter region egress costs (as defined by PaaS provider) | +| requester_pays | boolean | Is the data requester pays or is it data manager/cloud provider pays. Defaults to `false` | +| tier | string | The title for the tier type (as defined by PaaS provider) | -While these are all valid properties on an Item, they will typically be defined per-asset. If a field applies equally -to all assets (e.g., storage:platform=AWS if all assets are on AWS), then it should be specified in Item properties. +The properties `title` and `description` as defined in Common Metadata can be used as well. -### Additional Field Information +#### Platforms -#### Providers -Currently this document is arranged to support object storage users of the following PaaS solutions: +The `platform` field identifies the cloud provider where the data is stored. + +There are a couple of pre-defined values for common providers: - Alibaba Cloud (Aliyun): `ALIBABA` - Amazon AWS: `AWS` @@ -48,11 +70,17 @@ Currently this document is arranged to support object storage users of the follo - Google Cloud Platform: `GCP` - IBM Cloud: `IBM` - Oracle Cloud: `ORACLE` -- All other PaaS solutions: `OTHER` -The upper-cased values are meant to be used for `storage:platform`. +All other PaaS solutions must use a unique URL to the service. + +In case an `href` contains a non-HTTP URL that is not directly resolvable, +the `platform` property must identify the host so that the URL can be resolved without further information. +This is especially useful to provide the endpoint URL for custom S3 providers. +In this case the `platform` is effectively the endpoint URL. + +#### Tiers -#### Cloud Provider Storage Tiers +Recommended values for the `tier` field: | Minimum Duration | [Google Cloud Platform](https://cloud.google.com/storage/docs/storage-classes) | [Amazon AWS](https://aws.amazon.com/s3/storage-classes/) | [Microsoft Azure](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-storage-tiers) | [IBM Cloud](https://cloud.ibm.com/objectstorage/create#pricing) | [Oracle Cloud](https://www.oracle.com/cloud/storage/pricing.html) | [Alibaba Cloud](https://www.alibabacloud.com/product/oss/pricing) | | ------------- | --------- | ------------------------ | ------- |---------- | ----------------- | ----------------- | diff --git a/examples/catalog-link.json b/examples/catalog-link.json new file mode 100644 index 0000000..d3a8de9 --- /dev/null +++ b/examples/catalog-link.json @@ -0,0 +1,33 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/storage/v2.0.0/schema.json" + ], + "type": "Catalog", + "id": "20190822T183518Z_746_POM1_ST2_P", + "title": "Example Catalog", + "description": "An example catalog with a link to documentation on object storage.", + "storage:schemes": { + "aws": { + "platform": "AWS", + "region": "us-west-2", + "requester_pays": true, + "tier": "Standard" + } + }, + "links": [ + { + "href": "https://example.com/examples/catalog-link.json", + "rel": "self" + }, + { + "title": "Documentation", + "href": "s3://mybucket/project/documentation.pdf", + "type": "application/pdf", + "rel": "about", + "storage:refs": [ + "aws" + ] + } + ] +} \ No newline at end of file diff --git a/examples/item-naip.json b/examples/item-naip.json index 0ddff93..c9650bb 100644 --- a/examples/item-naip.json +++ b/examples/item-naip.json @@ -1,10 +1,10 @@ { "stac_version": "1.0.0", "stac_extensions": [ - "https://stac-extensions.github.io/storage/v1.0.0/schema.json" + "https://stac-extensions.github.io/storage/v2.0.0/schema.json", + "https://stac-extensions.github.io/version/v1.2.0/schema.json" ], "id": "m_3009743_sw_14_1_20160928_20161129", - "collection": "NAIP_MOSAIC", "bbox": [ -97.75, 30.25, @@ -43,65 +43,107 @@ "datetime": "2016-09-28T00:00:00+00:00", "mission": "NAIP", "platform": "UNKNOWN_PLATFORM", - "gsd": 1 + "gsd": 1, + "storage:schemes": { + "az-wus2-arc": { + "platform": "AZURE", + "region": "westus2", + "tier": "archive" + }, + "gs-cld": { + "platform": "GCP", + "region": "us-central1", + "requester_pays": true, + "tier": "COLDLINE" + }, + "aws-std": { + "platform": "AWS", + "region": "us-west-2", + "requester_pays": true, + "tier": "Standard" + }, + "az-weu-hot": { + "platform": "AZURE", + "region": "westeurope", + "requester_pays": false, + "tier": "hot" + }, + "az-eus-hot": { + "platform": "AZURE", + "region": "eastus", + "requester_pays": false, + "tier": "hot", + "deprecated": true + }, + "minio": { + "platform": "https://play.min.io:9000" + } + } }, "assets": { + "CO_GEOTIFF_RGB": { + "href": "s3://mybucket/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "storage:refs": [ + "minio" + ] + }, + "CO_GEOTIFF_AWS_RGB": { + "href": "s3://naip-visualization/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "storage:refs": [ + "gs-std" + ] + }, "GEOTIFF_AZURE_RGBIR": { "href": "https://naip-nsl.blob.core.windows.net/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/vnd.stac.geotiff", - "storage:platform": "AZURE", - "storage:region": "westus2", - "storage:tier": "archive" + "type": "image/tiff; application=geotiff", + "storage:refs": [ + "az-wus2-ar" + ] }, "CO_GEOTIFF_GCP_RGB": { "href": "gs://naip-data/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/vnd.stac.geotiff; cloud-optimized=true", - "storage:platform": "GCP", - "storage:region": "us-central1", - "storage:requester_pays": true, - "storage:tier": "COLDLINE" - }, - "CO_GEOTIFF_AWS_RGB": { - "href": "s3://naip-visualization/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/vnd.stac.geotiff; cloud-optimized=true", - "storage:platform": "AWS", - "storage:region": "us-west-2", - "storage:requester_pays": true, - "storage:tier": "Standard" + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "storage:refs": [ + "gs-cld" + ] }, "CO_GEOTIFF_AZURE_RGB": { "href": "https://naipeuwest.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/vnd.stac.geotiff; cloud-optimized=true", - "storage:platform": "AZURE", - "storage:region": "westeurope", - "storage:requester_pays": false, - "storage:tier": "hot" + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "storage:refs": [ + "az-weu-hot" + ] }, "CO_GEOTIFF_AZURE_RGB_DEPRECATED": { "href": "https://naipblobs.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/vnd.stac.geotiff; cloud-optimized=true", - "storage:platform": "AZURE", - "storage:region": "eastus", - "storage:requester_pays": false, - "storage:tier": "hot" + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "storage:refs": [ + "az-eus-hot" + ], + "deprecated": true + }, + "THUMBNAIL": { + "href": "https://naipblobs.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.200.jpg", + "type": "image/jpeg", + "storage:refs": [ + "minio" + ] }, "THUMBNAIL_AZURE_DEPRECATED": { "href": "https://naipblobs.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.200.jpg", "type": "image/jpeg", - "storage:platform": "AZURE", - "storage:region": "eastus", - "storage:requester_pays": false, - "storage:tier": "hot" + "storage:refs": [ + "az-eus-hot" + ], + "deprecated": true } }, "links": [ { - "href": "https://example.com/examples/item.json", + "href": "https://example.com/examples/item-naip.json", "rel": "self" - }, - { - "href": "https://example.com/examples/item.json", - "rel": "collection" } ] } \ No newline at end of file diff --git a/examples/item-nsl.json b/examples/item-nsl.json index 9802fd0..9b0c996 100644 --- a/examples/item-nsl.json +++ b/examples/item-nsl.json @@ -1,10 +1,10 @@ { "stac_version": "1.0.0", "stac_extensions": [ - "https://stac-extensions.github.io/storage/v1.0.0/schema.json" + "https://stac-extensions.github.io/storage/v2.0.0/schema.json", + "https://stac-extensions.github.io/alternate-assets/v1.0.0/schema.json" ], "id": "20190822T183518Z_746_POM1_ST2_P", - "collection": "NSL_SCENE", "type": "Feature", "bbox": [ -97.7466867683867, @@ -44,50 +44,58 @@ "mission": "SWIFT", "platform": "SWIFT_2", "instrument": "POM_1", - "gsd": 0.20000000298023224 + "gsd": 0.20000000298023224, + "storage:schemes": { + "gcp-std": { + "platform": "GCP", + "region": "us-central1", + "requester_pays": true, + "tier": "STANDARD" + }, + "aws-glc": { + "platform": "AWS", + "region": "us-central-1", + "requester_pays": true, + "tier": "Glacier" + } + } }, "assets": { - "GEOTIFF_RGB_GCP": { + "GEOTIFF_RGB": { "href": "gs://swiftera-processed-data/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.tif", - "type": "image/vnd.stac.geotiff", - "storage:platform": "GCP", - "storage:region": "us-central1", - "storage:requester_pays": true, - "storage:tier": "STANDARD" + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "storage:refs": [ + "gcp-std" + ], + "alternate": { + "aws": { + "href": "s3://nsl-data-archive/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.tif", + "storage:refs": [ + "aws-std" + ] + } + } }, - "THUMBNAIL_RGB_GCP": { + "THUMBNAIL_RGB": { "href": "gs://swiftera-processed-data/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.png", "type": "image/png", - "storage:platform": "GCP", - "storage:region": "us-central1", - "storage:requester_pays": true, - "storage:tier": "STANDARD" - }, - "GEOTIFF_RGB_AWS": { - "href": "s3://nsl-data-archive/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.tif", - "type": "image/vnd.stac.geotiff", - "storage:platform": "AWS", - "storage:region": "us-central-1", - "storage:requester_pays": true, - "storage:tier": "Glacier" - }, - "THUMBNAIL_RGB_AWS": { - "href": "s3://nsl-data-archive/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.png", - "type": "image/png", - "storage:platform": "AWS", - "storage:region": "us-central-1", - "storage:requester_pays": true, - "storage:tier": "Standard" + "storage:refs": [ + "gcp-std" + ], + "alternate": { + "aws": { + "href": "s3://nsl-data-archive/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.png", + "storage:refs": [ + "aws-std" + ] + } + } } }, "links": [ { - "href": "https://example.com/examples/item.json", + "href": "https://example.com/examples/item-nsl.json", "rel": "self" - }, - { - "href": "https://example.com/examples/item.json", - "rel": "collection" } ] } \ No newline at end of file diff --git a/json-schema/schema.json b/json-schema/schema.json index ca9d7ad..5087247 100644 --- a/json-schema/schema.json +++ b/json-schema/schema.json @@ -1,8 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://stac-extensions.github.io/storage/v1.0.0/schema.json", - "title": "Storage Extension", - "description": "STAC Storage Extension to a STAC Item and STAC Assets.", + "$id": "https://stac-extensions.github.io/storage/v2.0.0/schema.json", + "title": "STAC Storage Extension", "type": "object", "required": [ "stac_extensions" @@ -11,7 +10,7 @@ "stac_extensions": { "type": "array", "contains": { - "const": "https://stac-extensions.github.io/storage/v1.0.0/schema.json" + "const": "https://stac-extensions.github.io/storage/v2.0.0/schema.json" } } }, @@ -21,21 +20,20 @@ "type": "object", "required": [ "type", - "properties", - "assets" + "properties" ], "properties": { "type": { "const": "Feature" }, "properties": { - "$ref": "#/definitions/fields" + "$ref": "#/definitions/schemes_field" }, "assets": { - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/fields" - } + "$ref": "#/definitions/assets" + }, + "links": { + "$ref": "#/definitions/links" } } }, @@ -50,51 +48,140 @@ "const": "Collection" }, "assets": { - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/fields" - } + "$ref": "#/definitions/assets" }, "item_assets": { - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/fields" - } + "$ref": "#/definitions/assets" + }, + "links": { + "$ref": "#/definitions/links" } - } + }, + "allOf": [ + { + "$ref": "#/definitions/schemes_field" + } + ] + }, + { + "$comment": "This is the schema for STAC Catalogs", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "const": "Catalog" + }, + "links": { + "$ref": "#/definitions/links" + } + }, + "allOf": [ + { + "$ref": "#/definitions/schemes_field" + } + ] } ], "definitions": { - "fields": { + "schemes_field": { "type": "object", + "required": [ + "storage:schemes" + ], "properties": { - "storage:platform": { - "title": "Platform", - "type": "string", - "enum": [ - "OTHER", - "AWS", - "GCP", - "AZURE", - "IBM", - "ALIBABA", - "ORACLE" - ], - "default": "OTHER" - }, - "storage:region": { - "title": "Region", - "type": "string" - }, - "storage:requester_pays": { - "type": "boolean", - "title": "Requester pays", - "default": false - }, - "storage:tier": { - "title": "Tier", - "type": "string" + "storage:schemes": { + "type": "object", + "patternProperties": { + "^.{1,}$": { + "required": [ + "platform" + ], + "properties": { + "platform": { + "title": "Platform", + "oneOf": [ + { + "type": "string", + "enum": [ + "AWS", + "GCP", + "AZURE", + "IBM", + "ALIBABA", + "ORACLE" + ] + }, + { + "type": "string", + "format": "iri", + "pattern": "^[\\w\\+.-]+://" + } + ] + }, + "region": { + "title": "Region", + "type": "string" + }, + "requester_pays": { + "type": "boolean", + "title": "Requester pays", + "default": false + }, + "tier": { + "title": "Tier", + "type": "string" + } + } + } + }, + "additionalProperties": false } + }, + "patternProperties": { + "^(?!storage:)": {} + }, + "additionalProperties": false + }, + "refs_field": { + "type": "object", + "properties": { + "storage:refs": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + } + }, + "patternProperties": { + "^(?!storage:)": {} + }, + "additionalProperties": false + }, + "assets": { + "type": "object", + "additionalProperties": { + "allOf": [ + { + "$ref": "#/definitions/refs_field" + }, + { + "type": "object", + "properties": { + "alternate": { + "$ref": "#/definitions/refs_field" + } + } + } + ] + } + }, + "links": { + "type": "array", + "items": { + "$ref": "#/definitions/refs_field" } } } diff --git a/package.json b/package.json index 5a68b68..948eb39 100644 --- a/package.json +++ b/package.json @@ -1,14 +1,13 @@ { "name": "stac-extensions", - "version": "1.0.0", + "version": "2.0.0", "scripts": { "test": "npm run check-markdown && npm run check-examples", "check-markdown": "remark . -f -r .github/remark.yaml", - "check-examples": "stac-node-validator . --lint --verbose --schemaMap https://stac-extensions.github.io/storage/v1.0.0/schema.json=./json-schema/schema.json", - "format-examples": "stac-node-validator . --format --schemaMap https://stac-extensions.github.io/storage/v1.0.0/schema.json=./json-schema/schema.json" + "check-examples": "stac-node-validator . --lint --verbose --schemaMap https://stac-extensions.github.io/storage/v2.0.0/schema.json=./json-schema/schema.json", + "format-examples": "stac-node-validator . --format --schemaMap https://stac-extensions.github.io/storage/v2.0.0/schema.json=./json-schema/schema.json" }, "dependencies": { - "babel": "^6.23.0", "remark-cli": "^8.0.0", "remark-lint": "^7.0.0", "remark-lint-no-html": "^2.0.0",