From e74fa9d9e030e4a56af9b0f5b605f3d59a345c7a Mon Sep 17 00:00:00 2001
From: corink21 <corina.keller2@gmail.com>
Date: Mon, 18 Nov 2024 08:58:46 +0100
Subject: [PATCH 01/13] Round all the dxs and dys to the number of decimal
 places given by the parameter "rounding"

---
 emiproc/grids.py | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/emiproc/grids.py b/emiproc/grids.py
index 9fb17956..a7003e40 100644
--- a/emiproc/grids.py
+++ b/emiproc/grids.py
@@ -325,6 +325,9 @@ def from_centers(
             dx = round(dx, rounding)
             dy = round(dy, rounding)
 
+            dxs = [round(dxi, rounding) for dxi in dxs]
+            dys = [round(dyi, rounding) for dyi in dys]
+
         if not np.allclose(dxs, dx) or not np.allclose(dys, dy):
             raise ValueError("The centers are not equally spaced.")
 

From a20e339ddee34a51b08f9046f5cfeaafd09b11eb Mon Sep 17 00:00:00 2001
From: corink21 <corina.keller2@gmail.com>
Date: Mon, 18 Nov 2024 09:20:39 +0100
Subject: [PATCH 02/13] Updated link to download data

---
 emiproc/inventories/gfas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/emiproc/inventories/gfas.py b/emiproc/inventories/gfas.py
index a68b6dce..0afd36ef 100644
--- a/emiproc/inventories/gfas.py
+++ b/emiproc/inventories/gfas.py
@@ -30,7 +30,7 @@ class GFAS(Inventory):
 
     You can access the data at
     `CAMS global biomass burning emissions based on fire radiative power
-    <https://ads.atmosphere.copernicus.eu/cdsapp#!/dataset/cams-global-fire-emissions-gfas>`_
+    <https://ads.atmosphere.copernicus.eu/datasets/cams-global-fire-emissions-gfas?tab=overview>`_
 
     """
 

From f95c5437c52518e85160768a6154abc8b21d945b Mon Sep 17 00:00:00 2001
From: corink21 <corina.keller2@gmail.com>
Date: Mon, 18 Nov 2024 10:12:39 +0100
Subject: [PATCH 03/13] CAMS regional air quality inventory v6.1-Ref2 added

---
 emiproc/inventories/cams_reg_aq.py | 161 +++++++++++++++++++++++++++++
 1 file changed, 161 insertions(+)
 create mode 100644 emiproc/inventories/cams_reg_aq.py

diff --git a/emiproc/inventories/cams_reg_aq.py b/emiproc/inventories/cams_reg_aq.py
new file mode 100644
index 00000000..ecb453ec
--- /dev/null
+++ b/emiproc/inventories/cams_reg_aq.py
@@ -0,0 +1,161 @@
+from os import PathLike
+
+import geopandas as gpd
+import xarray as xr
+import re
+from pathlib import Path
+
+from emiproc.grids import WGS84, RegularGrid
+from emiproc.inventories import Inventory
+from emiproc.profiles.temporal_profiles import read_temporal_profiles
+from emiproc.profiles.vertical_profiles import read_vertical_profiles
+
+UNIT_CONVERSION_FACTOR = 1e9  # Tg -> kg
+
+class CAMS_REG_AQ(Inventory):
+
+    grid: RegularGrid
+
+    def __init__(
+        self,
+        nc_dir: PathLike,
+        profiles_dir: PathLike = None,
+        year: int = 2022,
+        substances_mapping: dict[str, str] = {
+            "nox": "NOx",
+            "co": "CO",
+            "ch4": "CH4",
+            "nmvoc": "VOC",
+            "sox": "SO2",
+            "nh3": "NH3",
+            "pm2_5": "PM25",
+            "pm10":"PM10"
+        },
+        categories_mapping: dict[str, str] = {
+            "A_PublicPower": "A",
+            "B_Industry": "B",
+            "C_OtherStationaryComb": "C",
+            "D_Fugitives": "D",
+            "E_Solvents": "E",
+            "F_RoadTransport": "F",
+            "G_Shipping": "G",
+            "H_Aviation": "H",
+            "I_OffRoad": "I",
+            "J_Waste": "J",
+            "K_AgriLivestock": "K",
+            "L_AgriOther": "L"
+        },
+        substances_mapping_profiles: dict[str, str] = {
+            "nox": "NOx",
+            "co": "CO",
+            "ch4": "CH4",
+            "nmvoc": "VOC",
+            "so2": "SO2",
+            "nh3": "NH3",
+            "pm2_5": "PM25",
+            "pm10":"PM10"
+        },
+    ):
+        
+        super().__init__()
+
+        filename_pattern = fr"CAMS-REG-ANT_EUR_0\.05x0\.1_anthro_(?P<substance>\w+)_v6\.1-Ref2_yearly_{year}\.nc"
+
+        nc_dir = Path(nc_dir)
+        if not nc_dir.is_dir():
+                raise FileNotFoundError(
+                    f"Profiles directory {nc_dir} is not a directory."
+                )
+        nc_files = [f for f in nc_dir.iterdir() if f.is_file()]
+
+        if profiles_dir is None:
+            profiles_dir = Path(nc_dir)
+        else:
+            profiles_dir = Path(profiles_dir)
+            if not profiles_dir.is_dir():
+                raise FileNotFoundError(
+                    f"Profiles directory {profiles_dir} is not a directory."
+                )
+        
+        # Read the vertical and temporal profile files
+        v_profiles, v_profiles_indexes = read_vertical_profiles(profiles_dir)
+
+        t_profiles, t_profiles_indexes = read_temporal_profiles(
+            profiles_dir,
+            profile_csv_kwargs={
+                "encoding": "latin",
+            },
+        )
+        # Rename substances in profiles according to dictionary
+        if "substance" in t_profiles_indexes.dims:
+            t_profiles_indexes = t_profiles_indexes.assign_coords(
+                substance=[
+                    substances_mapping_profiles[name] for name in t_profiles_indexes['substance'].values
+                ]
+            )
+        if "substance" in v_profiles_indexes.dims:
+            v_profiles_indexes = v_profiles_indexes.assign_coords(
+                substance=[
+                    substances_mapping_profiles[name] for name in v_profiles_indexes['substance'].values
+                ]
+            )
+
+        # Read in emission data
+        inv_data = {}
+
+        for nc_file in nc_files:
+            if nc_file.suffix == '.nc' and re.match(filename_pattern, nc_file.name):
+        
+                ds = xr.open_dataset(nc_file)
+
+                match = re.match(filename_pattern, nc_file.name)
+                sub_cams = match.group('substance')
+                sub_name = substances_mapping.get(sub_cams, None)
+                if sub_name is None: 
+                    raise ValueError(f"No substance mapping fround for {sub_cams}")
+            
+                file_vars = ds.data_vars.keys()
+
+                for var, cat in categories_mapping.items():
+                    if var in file_vars:
+                        col_index = (cat, sub_name)
+                        inv_data[col_index] = ds[var].expand_dims(cat_sub=[col_index])
+                    else:
+                        raise ValueError(f"Category {var} not found in the file {nc_file}.")
+
+                # Extract grid information
+                if not hasattr(self, 'grid'):
+                    self.grid = RegularGrid.from_centers(
+                        x_centers=ds["lon"].values,
+                        y_centers=ds["lat"].values,
+                        name="CAMS_REG_AQ",
+                        rounding=2,
+                    )
+            else:
+                print(f"Skipping file: {nc_file} (does not match the expected pattern or not a .nc file)")
+
+        # List of pairs (emis cat, sub)
+        cat_sub_pairs = [(cat, sub) for cat in categories_mapping.values() for sub in substances_mapping.values()]
+        
+        # Reshape data to regular grid
+        da_inventory: xr.DataArray = (
+            xr.concat(list(inv_data.values()), dim="cat_sub")
+            .stack(cell=("lon", "lat"))
+            .drop_vars(["lon", "lat", "time"])
+        )
+
+        def process_cat_sub(cs):
+            return (
+                da_inventory.sel(cat_sub=cs).values.flatten() 
+                * UNIT_CONVERSION_FACTOR 
+            )
+
+        self.gdf = gpd.GeoDataFrame(
+            {cs: process_cat_sub(cs) for cs in cat_sub_pairs},
+            geometry=self.grid.gdf.geometry
+            )
+        
+        self.gdfs = {} 
+        
+        self.set_profiles(t_profiles, t_profiles_indexes)
+        self.set_profiles(v_profiles, v_profiles_indexes)

From 8da338f2c9e6a99d695565dd098c16e3c4c61ac1 Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Mon, 18 Nov 2024 14:49:21 +0100
Subject: [PATCH 04/13] adding to doc

---
 docs/source/inventories.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/source/inventories.rst b/docs/source/inventories.rst
index 474d024d..72e7d3eb 100644
--- a/docs/source/inventories.rst
+++ b/docs/source/inventories.rst
@@ -58,6 +58,11 @@ Saunois
 
 :py:class:`emiproc.inventories.saunois.SaunoisInventory`
 
+CAMS_REG_AQ
+^^^^^^^^^^^
+
+:py:class:`emiproc.inventories.cams_reg_aq.CAMS_REG_AQ`
+
 
 
 Grids 

From 4bec98e875b510e6c53acf17e1926bc9ef03e699 Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Mon, 18 Nov 2024 14:50:02 +0100
Subject: [PATCH 05/13] improved the name

---
 docs/source/inventories.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/inventories.rst b/docs/source/inventories.rst
index 72e7d3eb..52dc6f32 100644
--- a/docs/source/inventories.rst
+++ b/docs/source/inventories.rst
@@ -58,8 +58,8 @@ Saunois
 
 :py:class:`emiproc.inventories.saunois.SaunoisInventory`
 
-CAMS_REG_AQ
-^^^^^^^^^^^
+CAMS Regional Air Quality
+^^^^^^^^^^^^^^^^^^^^^^^^^
 
 :py:class:`emiproc.inventories.cams_reg_aq.CAMS_REG_AQ`
 

From 5507b99d1ceef67c990946fc1ff6f2af875618ff Mon Sep 17 00:00:00 2001
From: corink21 <corina.keller2@gmail.com>
Date: Mon, 18 Nov 2024 15:39:30 +0100
Subject: [PATCH 06/13] Rounding dxs, dys vectorizes

---
 emiproc/grids.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/emiproc/grids.py b/emiproc/grids.py
index a7003e40..4ffe1465 100644
--- a/emiproc/grids.py
+++ b/emiproc/grids.py
@@ -325,8 +325,8 @@ def from_centers(
             dx = round(dx, rounding)
             dy = round(dy, rounding)
 
-            dxs = [round(dxi, rounding) for dxi in dxs]
-            dys = [round(dyi, rounding) for dyi in dys]
+            dxs = np.round(dxs, decimals=rounding)
+            dys = np.round(dys, decimals=rounding)
 
         if not np.allclose(dxs, dx) or not np.allclose(dys, dy):
             raise ValueError("The centers are not equally spaced.")

From dfb346bd4f76c62ed713146eb406c4c638c48f80 Mon Sep 17 00:00:00 2001
From: corink21 <corina.keller2@gmail.com>
Date: Mon, 18 Nov 2024 15:52:48 +0100
Subject: [PATCH 07/13] docstring added

---
 emiproc/inventories/cams_reg_aq.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/emiproc/inventories/cams_reg_aq.py b/emiproc/inventories/cams_reg_aq.py
index ecb453ec..7ce1e75b 100644
--- a/emiproc/inventories/cams_reg_aq.py
+++ b/emiproc/inventories/cams_reg_aq.py
@@ -13,6 +13,16 @@
 UNIT_CONVERSION_FACTOR = 1e9  # Tg -> kg
 
 class CAMS_REG_AQ(Inventory):
+    """The CAMS regional air quality inventory.
+
+    Contains gridded data of air pollutants (NOx, CO, CH4, VOC, NH3, SO2, PM2.5, PM10)
+    from the Copernicus Atmosphere Monitoring Service (CAMS).
+
+    You can access the data at
+    `CAMS-REG-ANT v6.1-Ref2
+    <https://eccad.sedoo.fr/#/metadata/608>`_
+
+    """
 
     grid: RegularGrid
 
@@ -56,6 +66,21 @@ def __init__(
             "pm10":"PM10"
         },
     ):
+        """Create a CAMS_REG_ANT-inventory.
+
+        :arg nc_dir: The directory containing the NetCDF emission datasets. One file
+            per air pollutant.
+        :arg profiles_dir: The directory where the vertical and temporal profiles 
+            are stored. If None the directory nc_dir is used.
+        :arg year: Year of the inventory.
+        :arg substances_mapping: How to map the names of air pollutants from the 
+            names of the NetCDF files to names for emiproc. 
+        :arg categories_mapping: How to map the names of the emission categories from
+            the NetCDF files to names for emiproc.
+        :arg substances_mapping_profiles: How to map the names of air pollutants from
+            the vertical and/or temporal profiles to names for emiproc. Make sure this 
+            mapping is consistent with the substances_mapping.
+        """
         
         super().__init__()
 

From 15291c39b3c83ab2e0230bf278d3d2bd54d9dc0c Mon Sep 17 00:00:00 2001
From: corink21 <corina.keller2@gmail.com>
Date: Mon, 18 Nov 2024 16:08:11 +0100
Subject: [PATCH 08/13] Restructured so that files selection is only done at
 one place.

---
 emiproc/inventories/cams_reg_aq.py | 61 ++++++++++++++++--------------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/emiproc/inventories/cams_reg_aq.py b/emiproc/inventories/cams_reg_aq.py
index 7ce1e75b..82874a0e 100644
--- a/emiproc/inventories/cams_reg_aq.py
+++ b/emiproc/inventories/cams_reg_aq.py
@@ -91,7 +91,16 @@ def __init__(
                 raise FileNotFoundError(
                     f"Profiles directory {nc_dir} is not a directory."
                 )
-        nc_files = [f for f in nc_dir.iterdir() if f.is_file()]
+        nc_files = [f for f in nc_dir.iterdir() 
+                    if f.is_file() 
+                    and f.suffix == '.nc'
+                    and re.match(filename_pattern, f.name)
+                    ]
+        
+        if not nc_files:
+            raise FileNotFoundError(
+                f"No .nc files found matching the pattern '{filename_pattern}' in {nc_dir}"
+                )
 
         if profiles_dir is None:
             profiles_dir = Path(nc_dir)
@@ -129,35 +138,29 @@ def __init__(
         inv_data = {}
 
         for nc_file in nc_files:
-            if nc_file.suffix == '.nc' and re.match(filename_pattern, nc_file.name):
-        
-                ds = xr.open_dataset(nc_file)
-
-                match = re.match(filename_pattern, nc_file.name)
-                sub_cams = match.group('substance')
-                sub_name = substances_mapping.get(sub_cams, None)
-                if sub_name is None: 
-                    raise ValueError(f"No substance mapping fround for {sub_cams}")
+            ds = xr.open_dataset(nc_file)
             
-                file_vars = ds.data_vars.keys()
-
-                for var, cat in categories_mapping.items():
-                    if var in file_vars:
-                        col_index = (cat, sub_name)
-                        inv_data[col_index] = ds[var].expand_dims(cat_sub=[col_index])
-                    else:
-                        raise ValueError(f"Category {var} not found in the file {nc_file}.")
-
-                # Extract grid information
-                if not hasattr(self, 'grid'):
-                    self.grid = RegularGrid.from_centers(
-                        x_centers=ds["lon"].values,
-                        y_centers=ds["lat"].values,
-                        name="CAMS_REG_AQ",
-                        rounding=2,
-                    )
-            else:
-                print(f"Skipping file: {nc_file} (does not match the expected pattern or not a .nc file)")
+            match = re.match(filename_pattern, nc_file.name)
+            sub_cams = match.group('substance')
+            sub_name = substances_mapping.get(sub_cams, None)
+            if sub_name is None: 
+                raise ValueError(f"No substance mapping fround for {sub_cams}")
+        
+            file_vars = ds.data_vars.keys()
+            for var, cat in categories_mapping.items():
+                if var in file_vars:
+                    col_index = (cat, sub_name)
+                    inv_data[col_index] = ds[var].expand_dims(cat_sub=[col_index])
+                else:
+                    raise ValueError(f"Category {var} not found in the file {nc_file}.")
+            # Extract grid information
+            if not hasattr(self, 'grid'):
+                self.grid = RegularGrid.from_centers(
+                    x_centers=ds["lon"].values,
+                    y_centers=ds["lat"].values,
+                    name="CAMS_REG_AQ",
+                    rounding=2,
+                )
 
         # List of pairs (emis cat, sub)
         cat_sub_pairs = [(cat, sub) for cat in categories_mapping.values() for sub in substances_mapping.values()]

From 8f22633bed7a6f6de4942a370b71474ee1533104 Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Tue, 19 Nov 2024 07:42:39 +0100
Subject: [PATCH 09/13] permalink instead

---
 emiproc/inventories/cams_reg_aq.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/emiproc/inventories/cams_reg_aq.py b/emiproc/inventories/cams_reg_aq.py
index 82874a0e..b0058b1d 100644
--- a/emiproc/inventories/cams_reg_aq.py
+++ b/emiproc/inventories/cams_reg_aq.py
@@ -20,7 +20,7 @@ class CAMS_REG_AQ(Inventory):
 
     You can access the data at
     `CAMS-REG-ANT v6.1-Ref2
-    <https://eccad.sedoo.fr/#/metadata/608>`_
+    <https://permalink.aeris-data.fr/CAMS-REG-ANT>`_
 
     """
 

From 97fc3d3227f93877e3f0d6c4c496a607d694990e Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Tue, 19 Nov 2024 07:43:16 +0100
Subject: [PATCH 10/13] formatted black

---
 emiproc/inventories/cams_reg_aq.py | 76 ++++++++++++++++--------------
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/emiproc/inventories/cams_reg_aq.py b/emiproc/inventories/cams_reg_aq.py
index b0058b1d..a49ee193 100644
--- a/emiproc/inventories/cams_reg_aq.py
+++ b/emiproc/inventories/cams_reg_aq.py
@@ -12,6 +12,7 @@
 
 UNIT_CONVERSION_FACTOR = 1e9  # Tg -> kg
 
+
 class CAMS_REG_AQ(Inventory):
     """The CAMS regional air quality inventory.
 
@@ -39,7 +40,7 @@ def __init__(
             "sox": "SO2",
             "nh3": "NH3",
             "pm2_5": "PM25",
-            "pm10":"PM10"
+            "pm10": "PM10",
         },
         categories_mapping: dict[str, str] = {
             "A_PublicPower": "A",
@@ -53,7 +54,7 @@ def __init__(
             "I_OffRoad": "I",
             "J_Waste": "J",
             "K_AgriLivestock": "K",
-            "L_AgriOther": "L"
+            "L_AgriOther": "L",
         },
         substances_mapping_profiles: dict[str, str] = {
             "nox": "NOx",
@@ -63,44 +64,42 @@ def __init__(
             "so2": "SO2",
             "nh3": "NH3",
             "pm2_5": "PM25",
-            "pm10":"PM10"
+            "pm10": "PM10",
         },
     ):
         """Create a CAMS_REG_ANT-inventory.
 
         :arg nc_dir: The directory containing the NetCDF emission datasets. One file
             per air pollutant.
-        :arg profiles_dir: The directory where the vertical and temporal profiles 
+        :arg profiles_dir: The directory where the vertical and temporal profiles
             are stored. If None the directory nc_dir is used.
         :arg year: Year of the inventory.
-        :arg substances_mapping: How to map the names of air pollutants from the 
-            names of the NetCDF files to names for emiproc. 
+        :arg substances_mapping: How to map the names of air pollutants from the
+            names of the NetCDF files to names for emiproc.
         :arg categories_mapping: How to map the names of the emission categories from
             the NetCDF files to names for emiproc.
         :arg substances_mapping_profiles: How to map the names of air pollutants from
-            the vertical and/or temporal profiles to names for emiproc. Make sure this 
+            the vertical and/or temporal profiles to names for emiproc. Make sure this
             mapping is consistent with the substances_mapping.
         """
-        
+
         super().__init__()
 
-        filename_pattern = fr"CAMS-REG-ANT_EUR_0\.05x0\.1_anthro_(?P<substance>\w+)_v6\.1-Ref2_yearly_{year}\.nc"
+        filename_pattern = rf"CAMS-REG-ANT_EUR_0\.05x0\.1_anthro_(?P<substance>\w+)_v6\.1-Ref2_yearly_{year}\.nc"
 
         nc_dir = Path(nc_dir)
         if not nc_dir.is_dir():
-                raise FileNotFoundError(
-                    f"Profiles directory {nc_dir} is not a directory."
-                )
-        nc_files = [f for f in nc_dir.iterdir() 
-                    if f.is_file() 
-                    and f.suffix == '.nc'
-                    and re.match(filename_pattern, f.name)
-                    ]
-        
+            raise FileNotFoundError(f"Profiles directory {nc_dir} is not a directory.")
+        nc_files = [
+            f
+            for f in nc_dir.iterdir()
+            if f.is_file() and f.suffix == ".nc" and re.match(filename_pattern, f.name)
+        ]
+
         if not nc_files:
             raise FileNotFoundError(
                 f"No .nc files found matching the pattern '{filename_pattern}' in {nc_dir}"
-                )
+            )
 
         if profiles_dir is None:
             profiles_dir = Path(nc_dir)
@@ -110,7 +109,7 @@ def __init__(
                 raise FileNotFoundError(
                     f"Profiles directory {profiles_dir} is not a directory."
                 )
-        
+
         # Read the vertical and temporal profile files
         v_profiles, v_profiles_indexes = read_vertical_profiles(profiles_dir)
 
@@ -124,13 +123,15 @@ def __init__(
         if "substance" in t_profiles_indexes.dims:
             t_profiles_indexes = t_profiles_indexes.assign_coords(
                 substance=[
-                    substances_mapping_profiles[name] for name in t_profiles_indexes['substance'].values
+                    substances_mapping_profiles[name]
+                    for name in t_profiles_indexes["substance"].values
                 ]
             )
         if "substance" in v_profiles_indexes.dims:
             v_profiles_indexes = v_profiles_indexes.assign_coords(
                 substance=[
-                    substances_mapping_profiles[name] for name in v_profiles_indexes['substance'].values
+                    substances_mapping_profiles[name]
+                    for name in v_profiles_indexes["substance"].values
                 ]
             )
 
@@ -139,13 +140,13 @@ def __init__(
 
         for nc_file in nc_files:
             ds = xr.open_dataset(nc_file)
-            
+
             match = re.match(filename_pattern, nc_file.name)
-            sub_cams = match.group('substance')
+            sub_cams = match.group("substance")
             sub_name = substances_mapping.get(sub_cams, None)
-            if sub_name is None: 
+            if sub_name is None:
                 raise ValueError(f"No substance mapping fround for {sub_cams}")
-        
+
             file_vars = ds.data_vars.keys()
             for var, cat in categories_mapping.items():
                 if var in file_vars:
@@ -154,7 +155,7 @@ def __init__(
                 else:
                     raise ValueError(f"Category {var} not found in the file {nc_file}.")
             # Extract grid information
-            if not hasattr(self, 'grid'):
+            if not hasattr(self, "grid"):
                 self.grid = RegularGrid.from_centers(
                     x_centers=ds["lon"].values,
                     y_centers=ds["lat"].values,
@@ -163,8 +164,12 @@ def __init__(
                 )
 
         # List of pairs (emis cat, sub)
-        cat_sub_pairs = [(cat, sub) for cat in categories_mapping.values() for sub in substances_mapping.values()]
-        
+        cat_sub_pairs = [
+            (cat, sub)
+            for cat in categories_mapping.values()
+            for sub in substances_mapping.values()
+        ]
+
         # Reshape data to regular grid
         da_inventory: xr.DataArray = (
             xr.concat(list(inv_data.values()), dim="cat_sub")
@@ -174,16 +179,15 @@ def __init__(
 
         def process_cat_sub(cs):
             return (
-                da_inventory.sel(cat_sub=cs).values.flatten() 
-                * UNIT_CONVERSION_FACTOR 
+                da_inventory.sel(cat_sub=cs).values.flatten() * UNIT_CONVERSION_FACTOR
             )
 
         self.gdf = gpd.GeoDataFrame(
             {cs: process_cat_sub(cs) for cs in cat_sub_pairs},
-            geometry=self.grid.gdf.geometry
-            )
-        
-        self.gdfs = {} 
-        
+            geometry=self.grid.gdf.geometry,
+        )
+
+        self.gdfs = {}
+
         self.set_profiles(t_profiles, t_profiles_indexes)
         self.set_profiles(v_profiles, v_profiles_indexes)

From af3449266bf7eb904ebab5ff4a86dba1cc668bac Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Tue, 19 Nov 2024 10:19:57 +0100
Subject: [PATCH 11/13] work with sub selection of substances

---
 emiproc/inventories/cams_reg_aq.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/emiproc/inventories/cams_reg_aq.py b/emiproc/inventories/cams_reg_aq.py
index a49ee193..424e4700 100644
--- a/emiproc/inventories/cams_reg_aq.py
+++ b/emiproc/inventories/cams_reg_aq.py
@@ -95,6 +95,7 @@ def __init__(
             for f in nc_dir.iterdir()
             if f.is_file() and f.suffix == ".nc" and re.match(filename_pattern, f.name)
         ]
+        self.logger.debug(f"{nc_files=}")
 
         if not nc_files:
             raise FileNotFoundError(
@@ -138,6 +139,8 @@ def __init__(
         # Read in emission data
         inv_data = {}
 
+        substances_available = []
+
         for nc_file in nc_files:
             ds = xr.open_dataset(nc_file)
 
@@ -146,6 +149,7 @@ def __init__(
             sub_name = substances_mapping.get(sub_cams, None)
             if sub_name is None:
                 raise ValueError(f"No substance mapping fround for {sub_cams}")
+            substances_available.append(sub_name)
 
             file_vars = ds.data_vars.keys()
             for var, cat in categories_mapping.items():
@@ -167,7 +171,7 @@ def __init__(
         cat_sub_pairs = [
             (cat, sub)
             for cat in categories_mapping.values()
-            for sub in substances_mapping.values()
+            for sub in substances_available
         ]
 
         # Reshape data to regular grid

From adefae94067d45e29edc06ccae44a705c15de716 Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Thu, 21 Nov 2024 07:50:13 +0100
Subject: [PATCH 12/13] remove profiles

---
 emiproc/inventories/cams_reg_aq.py | 53 ------------------------------
 1 file changed, 53 deletions(-)

diff --git a/emiproc/inventories/cams_reg_aq.py b/emiproc/inventories/cams_reg_aq.py
index 424e4700..6e7ca287 100644
--- a/emiproc/inventories/cams_reg_aq.py
+++ b/emiproc/inventories/cams_reg_aq.py
@@ -30,7 +30,6 @@ class CAMS_REG_AQ(Inventory):
     def __init__(
         self,
         nc_dir: PathLike,
-        profiles_dir: PathLike = None,
         year: int = 2022,
         substances_mapping: dict[str, str] = {
             "nox": "NOx",
@@ -56,31 +55,16 @@ def __init__(
             "K_AgriLivestock": "K",
             "L_AgriOther": "L",
         },
-        substances_mapping_profiles: dict[str, str] = {
-            "nox": "NOx",
-            "co": "CO",
-            "ch4": "CH4",
-            "nmvoc": "VOC",
-            "so2": "SO2",
-            "nh3": "NH3",
-            "pm2_5": "PM25",
-            "pm10": "PM10",
-        },
     ):
         """Create a CAMS_REG_ANT-inventory.
 
         :arg nc_dir: The directory containing the NetCDF emission datasets. One file
             per air pollutant.
-        :arg profiles_dir: The directory where the vertical and temporal profiles
-            are stored. If None the directory nc_dir is used.
         :arg year: Year of the inventory.
         :arg substances_mapping: How to map the names of air pollutants from the
             names of the NetCDF files to names for emiproc.
         :arg categories_mapping: How to map the names of the emission categories from
             the NetCDF files to names for emiproc.
-        :arg substances_mapping_profiles: How to map the names of air pollutants from
-            the vertical and/or temporal profiles to names for emiproc. Make sure this
-            mapping is consistent with the substances_mapping.
         """
 
         super().__init__()
@@ -102,40 +86,6 @@ def __init__(
                 f"No .nc files found matching the pattern '{filename_pattern}' in {nc_dir}"
             )
 
-        if profiles_dir is None:
-            profiles_dir = Path(nc_dir)
-        else:
-            profiles_dir = Path(profiles_dir)
-            if not profiles_dir.is_dir():
-                raise FileNotFoundError(
-                    f"Profiles directory {profiles_dir} is not a directory."
-                )
-
-        # Read the vertical and temporal profile files
-        v_profiles, v_profiles_indexes = read_vertical_profiles(profiles_dir)
-
-        t_profiles, t_profiles_indexes = read_temporal_profiles(
-            profiles_dir,
-            profile_csv_kwargs={
-                "encoding": "latin",
-            },
-        )
-        # Rename substances in profiles according to dictionary
-        if "substance" in t_profiles_indexes.dims:
-            t_profiles_indexes = t_profiles_indexes.assign_coords(
-                substance=[
-                    substances_mapping_profiles[name]
-                    for name in t_profiles_indexes["substance"].values
-                ]
-            )
-        if "substance" in v_profiles_indexes.dims:
-            v_profiles_indexes = v_profiles_indexes.assign_coords(
-                substance=[
-                    substances_mapping_profiles[name]
-                    for name in v_profiles_indexes["substance"].values
-                ]
-            )
-
         # Read in emission data
         inv_data = {}
 
@@ -192,6 +142,3 @@ def process_cat_sub(cs):
         )
 
         self.gdfs = {}
-
-        self.set_profiles(t_profiles, t_profiles_indexes)
-        self.set_profiles(v_profiles, v_profiles_indexes)

From 0582e8b4294830e098103271676d708ae0d5ebe5 Mon Sep 17 00:00:00 2001
From: coli <lionel.constantin@empa.ch>
Date: Thu, 21 Nov 2024 08:08:57 +0100
Subject: [PATCH 13/13] adding unit check

---
 emiproc/inventories/cams_reg_aq.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/emiproc/inventories/cams_reg_aq.py b/emiproc/inventories/cams_reg_aq.py
index 6e7ca287..646e9f04 100644
--- a/emiproc/inventories/cams_reg_aq.py
+++ b/emiproc/inventories/cams_reg_aq.py
@@ -105,6 +105,10 @@ def __init__(
             for var, cat in categories_mapping.items():
                 if var in file_vars:
                     col_index = (cat, sub_name)
+                    if ds[var].attrs["units"] != "Tg":
+                        raise ValueError(
+                            f"Units are {ds[var].attrs['units']}, expected Tg"
+                        )
                     inv_data[col_index] = ds[var].expand_dims(cat_sub=[col_index])
                 else:
                     raise ValueError(f"Category {var} not found in the file {nc_file}.")