Merge pull request #10 from cedadev/v0.4.1

V0.4.1: Cloud Product accessibility externally.
cedadev · Jan 24, 2025 · 1860ae1 · 1860ae1
2 parents e3280f6 + abb1f7b
commit 1860ae1
Show file tree

Hide file tree

Showing 6 changed files with 990 additions and 15 deletions.
diff --git a/ceda_datapoint/core/cloud.py b/ceda_datapoint/core/cloud.py
@@ -116,6 +116,8 @@ def __init__(
         self._cloud_format = cf
 
         self._mapper = mapper or DataPointMapper(id)
+
+        meta = meta or {}
 
         self._asset_stac = asset_stac
         self._meta = meta | {

diff --git a/ceda_datapoint/core/item.py b/ceda_datapoint/core/item.py
@@ -277,14 +277,8 @@ def _identify_cloud_assets(self) -> None:
         if len(assets) == 0:
             return cloud_list
 
-        rf_titles = list(method_format.keys())
-
         for id, asset in self._assets.items():
-            cf = self._mapper.get('cloud_format', asset)
-
-            if cf is None and id in rf_titles:
-                cf = method_format[id]
-
+            cf = identify_cloud_type(id, asset, asset_mapper=self._mapper)
             if cf is not None:
                 cloud_list.append((id, cf))
 
@@ -341,4 +335,33 @@ def _load_cloud_assets(
             return DataPointCluster(asset_list, meta=self._meta, parent_id=self._id)
         else:
             return asset_list[0]
-
+
+def identify_cloud_type(
+        id: str, 
+        asset,
+        cflabel: str = 'cloud_format',
+        asset_mapper: DataPointMapper | None = None,
+    ) -> str:
+    """
+    Identify the type of cloud format
+    to which this asset conforms.
+    """
+
+    # Try using the mapper tool
+    if asset_mapper is not None:
+        cf = asset_mapper.get(cflabel, asset)
+
+    if cf is not None:
+        return cf
+
+    rf_titles = list(method_format.keys())
+
+    # Try getting the correct property
+    if hasattr(asset, cflabel):
+        return getattr(asset, cflabel)
+
+    # Otherwise identify from known methods
+    if id in rf_titles:
+        return method_format[id]
+
+    return None
diff --git a/docs/source/cloud_formats.rst b/docs/source/cloud_formats.rst
@@ -1,6 +1,56 @@
-=============
+=================================
+DataPoint's Cloud Product Handler
+=================================
+
+The ``DataPointCloudProduct`` class
+-----------------------------------
+
+For any users wanting to take advantage of the functionality within datapoint to configure and open datasets via STAC records, this operator is the object to use.
+The ``DataPointCloudProduct`` operator can be instantiated for each conformant asset from one or more items. For a single item:
+
+.. code::
+
+   from ceda_datapoint.core.cloud import DataPointCloudProduct
+   from ceda_datapoint.core.item import identify_cloud_type
+
+   products = []
+   for name, asset in item.assets.items():
+      cf = identify_cloud_type(id, asset)
+      if cf is None:
+         continue
+      products.append(
+         DataPointCloudProduct(
+            asset,   # The asset obtained from pystac.Item
+            id=name, # ID of the asset (can be combined with the item ID)
+            cf=cf,   # Cloud format identified above.
+            meta={'bbox':bounding_box}, # See below.
+            properties=properties       # Properties of the parent item.
+         )
+      )
+      
+In this example, ``item`` is a pystac object that can be obtained from the ``pystac-client`` or a similar pystac implementation.
+The cloud format/type (see below) can be identifier using the function ``identify_cloud_type`` also imported from DataPoint. 
+This relies on either the ``id`` of the asset conforming to the labels that DataPoint expects (i.e ``reference_file``) or the asset containing
+a property called ``cloud_format``. If the asset contains a the cloud format but under a different name, the ``cflabel`` can be adjusted accordingly.
+If the cloud label is nested within the asset, a mapper can be supplied (see the section on Mappers).
+
+We can then initialise a ``DataPointCloudProduct`` for this asset. There are additional kwargs that can be supplied but the important ones are highlighted above.
+For the ``meta`` argument, a dictionary must be given which (at minimum) includes the bounding box (which is not typically part of the Item's properties).
+Other attributes of the item that apply to the asset can be passed using this mechanism. STAC properties (like STAC version) can be passed using the
+``stac_attrs`` kwarg if necessary.
+
+``DataPointCluster`` objects
+----------------------------
+In the above example, a list of cloud products is generated for convenience. Instead, we could combine these into a ``cluster`` object which comes with some benefits
+over just using a list:
+ - String representation with metadata
+ - Able to obtain a listing of metadata in each cloud product.
+ - Help/Info methods available.
+ - Able to open a dataset directly from the cluster.
+ - Indexable, so can extract a product by ID or position.
+
 Cloud Formats
-=============
+-------------
 
 From recent user surveys relating to the Climate Model Intercomparison Project (CMIP6) 
 datasets available via the CEDA Archive, some common issues and barriers to research relate to how to find and access the data itself.

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -15,6 +15,8 @@ access our collection, but DataPoint is unique in that it is automatically confi
 
 **v0.4.0**: Zarr and COG products now supported via DataPoint, as well as Mappings for use with external APIs.
 
+**v0.4.1**: Added increased support for using `DataPointCloudProduct` objects externally.
+
 Installation
 ------------
 The datapoint package can be installed via pip, and requires Python 3.8 or later.
@@ -33,11 +35,11 @@ The long term goal is for datapoint to be included in the set of standard packag
 
    Inspiration <inspiration>
    How to Use DataPoint <usage>
+   DataPoint's Cloud Product Handler <cloud_formats>
    DataPoint Objects <objects>
    Mappings for Non-CEDA STAC Catalogs <mappers>
    When to Use DataPoint <examples>
    STAC Catalogs Explained <stac>
-   Cloud Formats Explained <cloud_formats>
 
 .. toctree::
    :maxdepth: 1